### **0. Set-up**

In [6]:
# Import libraries and utils
%run '../../utils.ipynb'

In [7]:
#  Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [8]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Remove first three word pairs
en_simlex = en_simlex.iloc[3:]

# Select subset
# en_simlex = en_simlex.head(160)

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [9]:
# Show results
tuples_list

[('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),
 ('strong', 'proud'),
 ('unnecessary', 'necessary'),
 ('restless', 

### **1. Define and Evaluate Parameters**

In [5]:
# Define the prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context. "
          "Examples of word pairs and their semantic similarity scores are: [('old', 'new', 1.58), ('smart', 'intelligent', 9.20), ('hard', 'difficult', 8.77)].")

In [6]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 25

In [7]:
# Split the list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

25


In [8]:
# Print the prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. Examples of word pairs and their semantic similarity scores are: [('old', 'new', 1.58), ('smart', 'intelligent', 9.20), ('hard', 'difficult', 8.77)]. --- ["('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 

In [9]:
# Load the encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count the tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [413, 417, 418, 402, 406, 401, 396, 411, 410, 411, 415, 407, 404, 401, 412, 414, 405, 398, 407, 402, 400, 397, 393, 405, 391]


### **2. Extract and Process Data**

In [10]:
# Process each chunk and get results using the OpenAI API
# response = get_responses(chunks, prompt, model, sample_size, delay)

Processing:   0%|          | 0/375 [00:00<?, ?chunk/s]

Processing: 100%|██████████| 375/375 [2:31:43<00:00, 24.28s/chunk]  

Total time taken: 9103.87 seconds





In [11]:
# Define filepath
file_path = '../../../data/response/en/gpt-3.5-turbo-0125/f3.json'

# Check if the file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [12]:
# Extract data with regular expressions into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}
print(higher_lower_samples)

{('give', 'allow'): [4.65, 2.01, 4.21, 5.14, 4.33, 3.08, 3.89, 5.68]}


In [13]:
# Process data and print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [14]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,happy,cheerful,9.30,8.60,9.50,9.20,9.50,8.40,8.00,9.40,7.50,8.84,7.50,7.50,8.31,8.82,3.33
1,hard,easy,1.20,3.44,2.00,0.36,2.50,1.72,1.00,1.85,2.50,2.63,3.00,3.50,2.59,1.20,1.29
2,fast,rapid,9.80,9.80,9.50,9.20,9.80,9.40,9.50,9.90,8.50,9.58,9.50,9.00,9.68,9.20,9.68
3,happy,glad,8.90,9.20,8.50,9.20,8.50,7.20,7.00,9.60,8.00,9.50,8.00,8.50,8.14,8.82,6.00
4,short,long,3.40,2.00,2.00,8.77,1.50,3.12,3.00,8.92,2.50,5.69,2.00,5.00,1.73,7.00,5.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,join,acquire,3.40,3.16,0.30,3.05,3.00,5.78,2.88,3.08,3.31,2.69,7.07,0.52,2.40,2.49,2.00
992,send,attend,2.92,1.41,0.10,2.35,2.00,2.00,2.05,0.77,2.19,1.78,1.46,0.20,1.56,2.51,0.00
993,gather,attend,2.96,1.41,0.08,3.32,2.09,3.00,2.25,1.58,2.33,2.46,1.12,0.24,1.77,1.69,1.00
994,absorb,withdraw,1.63,1.00,0.05,1.13,1.76,2.00,1.68,0.77,1.63,2.04,2.93,0.13,1.07,1.39,1.00


In [15]:
# Count null values
count_null_values = df.isnull().sum()
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                  0
word2                  0
similarity_score_1     0
similarity_score_2     0
similarity_score_3     0
similarity_score_4     0
similarity_score_5     0
similarity_score_6     0
similarity_score_7     0
similarity_score_8     0
similarity_score_9     1
similarity_score_10    1
similarity_score_11    1
similarity_score_12    1
similarity_score_13    1
similarity_score_14    1
similarity_score_15    1
dtype: int64


In [16]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
956,give,allow,4.65,2.01,4.21,5.14,4.33,3.08,3.89,5.68,,,,,,,


In [17]:
# Custom prompt for wordpair
custom_prompt = ("Rate the semantic similarity of the word pair [('give', 'allow')] on a scale from 0 to 10, "
                 "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
                 "Use two decimals. The response should strictly adhere to the structure: "
                 "[('word1', 'word2', <score>)]. "
                 "Do not provide additional explanations or context. "
                 "An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)]")

for _ in range(7):
    # Make extra API call for 'hysterie' 'verwardheid'
    messages = [{"role": "user", "content": custom_prompt}]
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        n=1)

    # Show results
    print(completion.choices[0].message.content)

[('give', 'allow', 2.50)]
[('give', 'allow', 4.52)]
[('give', 'allow', 0.75)]
[('give', 'allow', 2.00)]
[('give', 'allow', 2.25)]
[('give', 'allow', 3.00)]
[('give', 'allow', 4.12)]


In [19]:
# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_9'] = 2.50

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_10'] = 4.52

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_11'] = 0.75

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_12'] = 2.00

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_13'] = 2.25

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_14'] = 3.00

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_15'] = 4.12

# Check value
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow')]

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
956,give,allow,4.65,2.01,4.21,5.14,4.33,3.08,3.89,5.68,2.5,4.52,0.75,2.0,2.25,3.0,4.12


In [21]:
# Count null values
count_null_values = df.isnull().sum()
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                  0
word2                  0
similarity_score_1     0
similarity_score_2     0
similarity_score_3     0
similarity_score_4     0
similarity_score_5     0
similarity_score_6     0
similarity_score_7     0
similarity_score_8     0
similarity_score_9     0
similarity_score_10    0
similarity_score_11    0
similarity_score_12    0
similarity_score_13    0
similarity_score_14    0
similarity_score_15    0
dtype: int64


In [22]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [24]:
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,happy,cheerful,9.30,8.60,9.50,9.20,9.50,8.40,8.00,9.40,7.50,8.84,7.50,7.50,8.31,8.82,3.33
1,hard,easy,1.20,3.44,2.00,0.36,2.50,1.72,1.00,1.85,2.50,2.63,3.00,3.50,2.59,1.20,1.29
2,fast,rapid,9.80,9.80,9.50,9.20,9.80,9.40,9.50,9.90,8.50,9.58,9.50,9.00,9.68,9.20,9.68
3,happy,glad,8.90,9.20,8.50,9.20,8.50,7.20,7.00,9.60,8.00,9.50,8.00,8.50,8.14,8.82,6.00
4,short,long,3.40,2.00,2.00,8.77,1.50,3.12,3.00,8.92,2.50,5.69,2.00,5.00,1.73,7.00,5.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,join,acquire,3.40,3.16,0.30,3.05,3.00,5.78,2.88,3.08,3.31,2.69,7.07,0.52,2.40,2.49,2.00
992,send,attend,2.92,1.41,0.10,2.35,2.00,2.00,2.05,0.77,2.19,1.78,1.46,0.20,1.56,2.51,0.00
993,gather,attend,2.96,1.41,0.08,3.32,2.09,3.00,2.25,1.58,2.33,2.46,1.12,0.24,1.77,1.69,1.00
994,absorb,withdraw,1.63,1.00,0.05,1.13,1.76,2.00,1.68,0.77,1.63,2.04,2.93,0.13,1.07,1.39,1.00


In [23]:
# Define file_path
file_path = '../../../data/prompt/en/gpt-3.5-turbo-0125/f3.csv'

# Check if the file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
