### **0. Set-up**

In [1]:
# Import libraries and utils
%run '../../utils.ipynb'

In [2]:
# Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [3]:
# Load dataframe
cleaned_nl_simlex = pd.read_csv("../../../data/dataset/cleaned-nl-simlex-999.csv")

# Remove first three word pairs
cleaned_nl_simlex = cleaned_nl_simlex.iloc[0:331]

# Select subset
# cleaned_nl_simlex = cleaned_nl_simlex.head(150)

# Convert to tuple
tuples_list = list(zip(cleaned_nl_simlex['word1'], cleaned_nl_simlex['word2']))

In [4]:
# Show results
tuples_list

[('oud', 'nieuw'),
 ('slim', 'intelligent'),
 ('hard', 'moeilijk'),
 ('gelukkig', 'vrolijk'),
 ('hard', 'stoer'),
 ('snel', 'razendsnel'),
 ('gelukkig', 'blij'),
 ('kort', 'lang'),
 ('dom', 'stom'),
 ('vreemd', 'eigenaardig'),
 ('breed', 'smal'),
 ('makkelijk', 'moeilijk'),
 ('moeilijk', 'gemakkelijk'),
 ('slim', 'dom'),
 ('krankzinnig', 'gek'),
 ('gelukkig', 'kwaad'),
 ('uitgebreid', 'groot'),
 ('moeilijk', 'simpel'),
 ('nieuw', 'vers'),
 ('scherp', 'saai'),
 ('vlug', 'snel'),
 ('dom', 'dwaas'),
 ('prachtig', 'fantastisch'),
 ('eigenaardig', 'vreemd'),
 ('gelukkig', 'boos'),
 ('smal', 'breed'),
 ('eenvoudig', 'gemakkelijk'),
 ('oud', 'vers'),
 ('kennelijk', 'duidelijk'),
 ('betaalbaar', 'goedkoop'),
 ('leuk', 'grootmoedig'),
 ('raar', 'vreemd'),
 ('vreemd', 'normaal'),
 ('slecht', 'immoreel'),
 ('verdrietig', 'grappig'),
 ('prachtig', 'geweldig'),
 ('schuldig', 'beschaamd'),
 ('mooi', 'prachtig'),
 ('zelfverzekerd', 'zeker'),
 ('dom', 'onderontwikkeld'),
 ('groot', 'flexibel'),
 ('aar

### **1. Define and Evaluate Parameters**

In [9]:
# Define prompt
prompt = ("Beoordeel de semantische gelijkenis van het woordpaar: [('{word1}'), ('{word2}')] op een schaal van 0 tot 10, "
          "waarbij 0 geen semantische gelijkenis vertegenwoordigt, en 10 perfecte semantische gelijkenis. "
          "Gebruik twee decimalen. Het antwoord moet strikt voldoen aan de structuur: [('woord1', 'woord2', <score>)]. "
          "Geef geen extra uitleg of context.")

In [10]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
# sample_size = 5
sample_size = 15

# Delay between individual API calls
delay = 3.0

# Define number of sublists
# n_sublists = 997
n_sublists = 331

In [11]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

331


In [12]:
# Print prompts for each chunk
print_prompts_single(chunks, sample_size, prompt)

Beoordeel de semantische gelijkenis van het woordpaar: [('oud'), ('nieuw')] op een schaal van 0 tot 10, waarbij 0 geen semantische gelijkenis vertegenwoordigt, en 10 perfecte semantische gelijkenis. Gebruik twee decimalen. Het antwoord moet strikt voldoen aan de structuur: [('woord1', 'woord2', <score>)]. Geef geen extra uitleg of context.
Beoordeel de semantische gelijkenis van het woordpaar: [('oud'), ('nieuw')] op een schaal van 0 tot 10, waarbij 0 geen semantische gelijkenis vertegenwoordigt, en 10 perfecte semantische gelijkenis. Gebruik twee decimalen. Het antwoord moet strikt voldoen aan de structuur: [('woord1', 'woord2', <score>)]. Geef geen extra uitleg of context.
Beoordeel de semantische gelijkenis van het woordpaar: [('oud'), ('nieuw')] op een schaal van 0 tot 10, waarbij 0 geen semantische gelijkenis vertegenwoordigt, en 10 perfecte semantische gelijkenis. Gebruik twee decimalen. Het antwoord moet strikt voldoen aan de structuur: [('woord1', 'woord2', <score>)]. Geef geen

In [13]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken_single(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [107, 108, 109, 110, 107, 109, 110, 107, 107, 111, 108, 111, 112, 107, 111, 111, 110, 110, 107, 109, 108, 108, 110, 111, 110, 108, 112, 106, 110, 111, 111, 108, 108, 110, 111, 109, 110, 110, 110, 110, 108, 109, 111, 108, 108, 112, 108, 107, 109, 112, 112, 112, 108, 109, 110, 109, 108, 113, 107, 108, 111, 107, 111, 110, 111, 108, 107, 109, 108, 108, 110, 110, 106, 107, 109, 109, 109, 109, 108, 109, 109, 111, 107, 110, 111, 108, 108, 108, 109, 112, 111, 108, 111, 109, 110, 109, 107, 108, 109, 108, 108, 109, 109, 110, 108, 107, 110, 108, 108, 112, 108, 109, 107, 108, 113, 107, 108, 111, 108, 108, 109, 106, 116, 107, 108, 108, 109, 107, 110, 109, 109, 107, 108, 107, 109, 109, 112, 108, 108, 109, 107, 108, 108, 108, 109, 109, 108, 108, 109, 106, 108, 107, 108, 109, 107, 107, 109, 107, 108, 108, 106, 107, 107, 109, 109, 109, 108, 106, 107, 108, 109, 109, 110, 107, 109, 109, 107, 106, 107, 108, 110, 107, 108, 109, 109, 107, 108, 108, 109, 109, 109, 112,

In [13]:
# Max RPD = 10.000
len(token_counts*15)

4965

In [10]:
# Number of total tokens
sum(token_counts)

35947

### **2. Extract and Process Data**

In [12]:
# Get results from API
# response = get_responses_single(prompt, chunks, model, sample_size, delay)

Processing: 100%|██████████| 4965/4965 [11:32:32<00:00,  8.37s/chunk]    

Total time taken: 41552.14 seconds





In [14]:
# Define filepath
file_path = '../../../data/response/nl/gpt-3.5-turbo-0125/f9-1.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [25]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('sluw', 'koppig'): [0.1, 2.5, 0.2, 0.1, 0.2, 0.1, 0.11, 0.0, 0.1, 1.25, 0.2, 0.1, 0.2, 0.1]}


In [26]:
# Print duplicate word pairs
print_duplicate_word_pairs(cleaned_nl_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [32]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,oud,nieuw,0.10,1.91,1.00,0.14,2.00,0.25,1.00,0.10,1.00,1.00,1.00,0.10,1.00,1.00,0.20
1,slim,intelligent,8.50,9.00,8.70,9.00,9.00,9.50,8.50,9.00,9.00,9.50,8.50,8.50,8.50,8.50,8.50
2,hard,moeilijk,7.50,7.20,7.50,8.00,9.50,8.00,8.50,8.50,8.50,8.50,8.50,7.50,6.50,7.50,9.00
3,gelukkig,vrolijk,5.80,6.50,7.50,8.50,0.65,7.50,7.50,9.00,7.50,7.50,6.50,7.50,7.50,7.50,7.50
4,hard,stoer,6.00,0.30,6.00,6.50,7.50,7.50,0.60,5.50,5.65,0.30,6.50,0.20,5.00,0.40,6.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,hart,operatie,1.75,2.56,1.50,2.80,0.33,2.36,1.67,0.18,0.23,2.56,2.34,3.12,2.34,4.22,2.55
327,vrouw,secretaresse,6.75,7.85,8.75,7.50,7.89,6.82,7.80,7.89,7.25,8.25,6.80,8.75,7.80,7.80,5.32
328,man,vader,7.20,6.50,6.50,8.50,8.40,7.50,8.50,7.50,7.50,7.80,6.75,6.50,6.75,7.50,7.50
329,strand,eiland,2.50,3.50,1.50,0.00,2.50,3.20,0.10,1.20,3.50,3.50,2.50,2.50,2.50,2.50,1.50


In [28]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                  0
word2                  0
similarity_score_1     0
similarity_score_2     0
similarity_score_3     0
similarity_score_4     0
similarity_score_5     0
similarity_score_6     0
similarity_score_7     0
similarity_score_8     0
similarity_score_9     0
similarity_score_10    0
similarity_score_11    0
similarity_score_12    0
similarity_score_13    0
similarity_score_14    0
similarity_score_15    1
dtype: int64


In [45]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
102,sluw,koppig,0.1,2.5,0.2,0.1,0.2,0.1,0.11,0.0,0.1,1.25,0.2,0.1,0.2,0.1,


In [46]:
# Custom prompt
custom_prompt = ("Beoordeel de semantische gelijkenis van het woordpaar: [('sluw'), ('koppig')] op een schaal van 0 tot 10, "
                 "waarbij 0 geen semantische gelijkenis vertegenwoordigt, en 10 perfecte semantische gelijkenis. "
                 "Gebruik twee decimalen. Het antwoord moet strikt voldoen aan de structuur: [('woord1', 'woord2', <score>)]. "
                 "Geef geen extra uitleg of context.")


# Make API call
messages = [{"role": "user", "content": custom_prompt}]
# completion = client.chat.completions.create(
#     model=model,
#     messages=messages,
#     n=1)


# Show results
print(completion.choices[0].message.content)

[('sluw', 'koppig', 0.10)]


In [47]:
# Manually fix inconsistencies
df.loc[(df['word1'] == 'sluw') & (df['word2'] == 'koppig'), 'similarity_score_15'] = 0.10

# Show results
df.loc[(df['word1'] == 'sluw') & (df['word2'] == 'koppig')]

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
102,sluw,koppig,0.1,2.5,0.2,0.1,0.2,0.1,0.11,0.0,0.1,1.25,0.2,0.1,0.2,0.1,0.1


In [79]:
# Drop faulty row
df = df[~((df['word1'] == 'winter') & (df['word2'] == 'seaso'))]

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.05,0.10,1.00,1.0,0.20,0.10,0.15,1.5,0.10,0.15,3.50,0.01,0.20,1.00,0.10
1,smart,intelligent,0.90,0.95,1.00,1.0,0.96,0.95,0.95,9.5,0.95,0.90,9.50,0.93,0.95,0.94,1.00
2,hard,difficult,0.90,0.80,0.71,1.0,0.92,0.90,0.90,8.0,0.85,0.80,7.00,0.85,0.85,0.97,0.85
3,happy,cheerful,0.80,0.80,0.90,0.0,0.70,0.75,0.80,8.5,0.85,0.80,8.00,0.81,0.70,0.86,0.90
4,hard,easy,0.10,0.10,0.10,0.0,0.16,0.05,0.20,2.0,0.20,0.20,2.00,0.15,0.10,0.22,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,join,acquire,2.00,4.00,3.50,0.3,0.60,0.75,3.00,2.5,4.50,0.50,0.45,0.70,5.00,0.50,0.31
996,send,attend,1.00,1.00,1.00,0.2,0.30,0.00,2.00,2.0,3.50,0.00,0.21,0.40,3.00,0.20,0.29
997,gather,attend,2.00,2.00,2.00,0.3,0.45,0.50,2.00,3.0,3.00,0.00,0.32,0.70,4.00,0.30,0.37
998,absorb,withdraw,1.00,1.00,1.50,0.1,0.20,0.00,1.00,1.5,1.50,0.00,0.20,0.30,2.00,0.10,0.16


In [48]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [81]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('fee', 'payment'),
 ('bible', 'hymn'),
 ('exit', 'doorway'),
 ('man', 'sentry'),
 ('aisle', 'hall'),
 ('whiskey', 'gin'),
 ('blood', 'marrow'),
 ('oil', 'mink'),
 ('floor', 'deck'),
 ('roof', 'floor'),
 ('door', 'floor'),
 ('shoulder', 'head'),
 ('wagon', 'carriage'),
 ('car', 'carriage'),
 ('elbow', 'ankle')]

In [82]:
# Format the message
formatted_prompt = format_prompt(missing_word_pair_list, prompt)
messages = [{"role": "user", "content": formatted_prompt}]

# Make API call
# completion = client.chat.completions.create(
#     model=model,
#     messages=messages,
#     n=1)

# Store the response content
missing_word_pairs_response = [completion.choices[0].message.content]

In [83]:
# Process data into dictionary
missing_word_pairs_dict = process_responses(missing_word_pairs_response)

# Show results
missing_word_pairs_dict

{('fee', 'payment'): [9.0],
 ('bible', 'hymn'): [2.0],
 ('exit', 'doorway'): [1.0],
 ('man', 'sentry'): [1.0],
 ('aisle', 'hall'): [5.0],
 ('whiskey', 'gin'): [4.0],
 ('blood', 'marrow'): [1.0],
 ('oil', 'mink'): [1.0],
 ('floor', 'deck'): [2.0],
 ('roof', 'floor'): [1.0],
 ('door', 'floor'): [1.0],
 ('shoulder', 'head'): [2.0],
 ('wagon', 'carriage'): [8.0],
 ('car', 'carriage'): [8.0],
 ('elbow', 'ankle'): [1.0]}

In [84]:
# Iterate over DataFrame and replace missing values
for index, row in df.iterrows():
    word_pair = (row['word1'], row['word2'])

    # Check if current value is NaN
    if pd.isna(row['similarity_score_15']):
        if word_pair in missing_word_pairs_dict:
            
            # Extract first element from list
            df.at[index, 'similarity_score_15'] = missing_word_pairs_dict[word_pair][0]

# Check if any NaN values left
print(df[df['similarity_score_15'].isna()])

Empty DataFrame
Columns: [word1, word2, similarity_score_1, similarity_score_2, similarity_score_3, similarity_score_4, similarity_score_5, similarity_score_6, similarity_score_7, similarity_score_8, similarity_score_9, similarity_score_10, similarity_score_11, similarity_score_12, similarity_score_13, similarity_score_14, similarity_score_15]
Index: []


In [85]:
# Filter the DataFrame based on pairs
filtered_df = df[df.apply(lambda row: (row['word1'], row['word2']) in missing_word_pair_list, axis=1)]

# Select column
result = filtered_df[['word1', 'word2', 'similarity_score_15']]

# Show results
print(result)

        word1     word2  similarity_score_15
486       fee   payment                  9.0
487     bible      hymn                  2.0
488      exit   doorway                  1.0
489       man    sentry                  1.0
490     aisle      hall                  5.0
491   whiskey       gin                  4.0
492     blood    marrow                  1.0
493       oil      mink                  1.0
494     floor      deck                  2.0
495      roof     floor                  1.0
496      door     floor                  1.0
497  shoulder      head                  2.0
498     wagon  carriage                  8.0
499       car  carriage                  8.0
500     elbow     ankle                  1.0


In [86]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [49]:
# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,oud,nieuw,0.10,1.91,1.00,0.14,2.00,0.25,1.00,0.10,1.00,1.00,1.00,0.10,1.00,1.00,0.20
1,slim,intelligent,8.50,9.00,8.70,9.00,9.00,9.50,8.50,9.00,9.00,9.50,8.50,8.50,8.50,8.50,8.50
2,hard,moeilijk,7.50,7.20,7.50,8.00,9.50,8.00,8.50,8.50,8.50,8.50,8.50,7.50,6.50,7.50,9.00
3,gelukkig,vrolijk,5.80,6.50,7.50,8.50,0.65,7.50,7.50,9.00,7.50,7.50,6.50,7.50,7.50,7.50,7.50
4,hard,stoer,6.00,0.30,6.00,6.50,7.50,7.50,0.60,5.50,5.65,0.30,6.50,0.20,5.00,0.40,6.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,hart,operatie,1.75,2.56,1.50,2.80,0.33,2.36,1.67,0.18,0.23,2.56,2.34,3.12,2.34,4.22,2.55
327,vrouw,secretaresse,6.75,7.85,8.75,7.50,7.89,6.82,7.80,7.89,7.25,8.25,6.80,8.75,7.80,7.80,5.32
328,man,vader,7.20,6.50,6.50,8.50,8.40,7.50,8.50,7.50,7.50,7.80,6.75,6.50,6.75,7.50,7.50
329,strand,eiland,2.50,3.50,1.50,0.00,2.50,3.20,0.10,1.20,3.50,3.50,2.50,2.50,2.50,2.50,1.50


In [50]:
# Define file_path
file_path = '../../../data/prompt/nl/gpt-3.5-turbo-0125/f9-1.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
