### **0. Set-up**

In [5]:
# Import libraries and utils
%run '../../utils.ipynb'

In [6]:
#  Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [7]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/en-simlex-999.txt", delimiter='\t')

# Select subset
en_simlex = en_simlex.iloc[0:333]
# en_simlex = en_simlex.iloc[333:666]
# en_simlex = en_simlex.iloc[666:999]

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [8]:
en_simlex

Unnamed: 0,word1,word2,POS,SimLex999,conc(w1),conc(w2),concQ,Assoc(USF),SimAssoc333,SD(SimLex)
0,old,new,A,1.58,2.72,2.81,2,7.25,1,0.41
1,smart,intelligent,A,9.20,1.75,2.46,1,7.11,1,0.67
2,hard,difficult,A,8.77,3.76,2.21,2,5.94,1,1.19
3,happy,cheerful,A,9.55,2.56,2.34,1,5.85,1,2.18
4,hard,easy,A,0.95,3.76,2.07,2,5.82,1,0.93
...,...,...,...,...,...,...,...,...,...,...
328,heart,surgery,N,1.08,4.52,4.34,3,0.48,1,1.07
329,woman,secretary,N,1.98,4.46,4.54,3,0.48,1,1.60
330,man,father,N,4.83,4.79,4.52,3,0.48,1,1.62
331,beach,island,N,5.60,4.79,4.96,4,0.48,1,0.39


### **1. Define and Evaluate Parameters**

In [9]:
# Define the prompt
prompt = ("Rate the semantic similarity of the word pair: [('{word1}'), ('{word2}')] on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. "
          "Do not provide additional explanations or context.")

In [10]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
# sample_size = 5
sample_size = 15

# Delay between individual API calls
delay = 3.0

# Define number of sublists
# n_sublists = 999
n_sublists = 333

In [11]:
# Split the list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

333


In [12]:
# Print the prompts for each chunk
print_prompts_single(chunks, sample_size, prompt)

Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or cont

In [13]:
# Load the encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count the tokens per chunk
token_counts = count_tokens_with_tiktoken_single(chunks, prompt)
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [74, 75, 75, 76, 74, 74, 75, 74, 76, 76, 75, 75, 75, 75, 74, 75, 76, 74, 74, 75, 74, 75, 74, 77, 77, 75, 75, 76, 74, 74, 76, 76, 75, 75, 75, 75, 75, 76, 76, 76, 75, 75, 74, 76, 76, 75, 75, 75, 75, 76, 74, 75, 74, 75, 75, 76, 75, 75, 75, 74, 74, 76, 76, 77, 77, 76, 75, 76, 75, 74, 75, 76, 75, 75, 75, 75, 76, 74, 74, 75, 74, 75, 76, 76, 76, 78, 76, 76, 75, 76, 78, 75, 75, 76, 75, 74, 74, 75, 74, 74, 75, 76, 76, 76, 76, 75, 76, 74, 75, 75, 75, 75, 74, 76, 74, 74, 74, 76, 74, 74, 75, 75, 75, 74, 74, 74, 75, 75, 75, 74, 74, 74, 74, 74, 74, 76, 75, 76, 76, 74, 75, 74, 74, 75, 75, 75, 75, 76, 75, 76, 77, 74, 76, 75, 74, 75, 74, 75, 74, 74, 74, 76, 76, 75, 74, 75, 75, 75, 75, 74, 76, 74, 75, 75, 75, 77, 75, 74, 74, 74, 74, 74, 74, 75, 75, 76, 77, 75, 75, 75, 75, 76, 75, 76, 75, 74, 74, 76, 75, 74, 74, 75, 75, 75, 75, 74, 76, 75, 75, 75, 76, 75, 75, 74, 74, 76, 74, 75, 75, 74, 74, 74, 74, 75, 75, 74, 75, 76, 75, 74, 75, 74, 74, 75, 76, 74, 74, 75, 77, 74,

In [14]:
# Max RPD = 10.000
len(token_counts*15)

4995

In [15]:
# Number of total tokens
print(sum(token_counts))

# Runtime = 6.5 hours with 5 seconds delay

24955


### **2. Extract and Process Data**

In [16]:
# Process each chunk and get results using the OpenAI API
response = get_responses_single(prompt, chunks, model, sample_size, delay)

Processing:  10%|▉         | 499/4995 [32:50<11:08:51,  8.93s/chunk]

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-n1XNQqWDo5G9Gym9DwHfeBHM on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}

In [None]:
# RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-n1XNQqWDo5G9Gym9DwHfeBHM on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit

In [None]:
# Define filepath
file_path = '../../../data/response/en/gpt-3.5-turbo-0125/f9-1.json'

# Check if the file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [59]:
# Extract data with regular expressions into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}
print(higher_lower_samples)

{('old', 'new'): [0.0, 0.0, 1.0, 1.14, 2.0], ('smart', 'intelligent'): [9.5, 9.0, 9.2, 9.0, 9.5], ('hard', 'difficult'): [9.0, 9.0, 9.0, 9.0, 9.0], ('happy', 'cheerful'): [9.8, 8.75, 9.5, 8.5, 0.88], ('hard', 'easy'): [1.19, 1.0, 1.0, 2.5, 1.0], ('fast', 'rapid'): [9.0, 9.5, 9.0, 8.5, 9.5], ('happy', 'glad'): [9.5, 9.5, 9.0, 9.5, 8.5], ('short', 'long'): [1.0, 1.0, 1.0, 1.0, 2.5], ('stupid', 'dumb'): [9.0, 9.65, 8.5, 9.0, 8.5], ('weird', 'strange'): [8.5, 8.5, 8.0, 9.75, 8.5], ('wide', 'narrow'): [1.0, 1.0, 1.0, 1.0, 2.0], ('bad', 'awful'): [9.0, 8.5, 9.0, 9.5, 9.0], ('easy', 'difficult'): [1.0, 1.0, 1.0, 1.17, 1.0], ('bad', 'terrible'): [9.5, 8.5, 9.0, 8.5, 9.2], ('hard', 'simple'): [1.5, 2.0, 2.5, 1.5, 1.8], ('smart', 'dumb'): [1.0, 2.0, 1.0, 2.0, 2.5], ('insane', 'crazy'): [8.5, 8.0, 8.0, 8.5, 8.5], ('happy', 'mad'): [0.19, 1.9, 0.14, 2.75, 2.33], ('large', 'huge'): [9.0, 9.0, 8.0, 8.5, 8.8], ('hard', 'tough'): [7.0, 8.5, 6.5, 8.5, 8.0], ('new', 'fresh'): [7.5, 7.8, 6.0, 7.5, 6.0], 

In [60]:
# Process data and print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

        Combined_Columns
50             bad_great
51      difficult_simple
52   necessary_important
53          bad_terrific
54              mad_glad
..                   ...
994         join_acquire
995          send_attend
996        gather_attend
997      absorb_withdraw
998        attend_arrive

[949 rows x 1 columns]
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [61]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5
0,old,new,0.0,0.0,1.0,1.14,2.0
1,smart,intelligent,9.5,9.0,9.2,9.0,9.5
2,hard,difficult,9.0,9.0,9.0,9.0,9.0
3,happy,cheerful,9.8,8.75,9.5,8.5,0.88
4,hard,easy,1.19,1.0,1.0,2.5,1.0
5,fast,rapid,9.0,9.5,9.0,8.5,9.5
6,happy,glad,9.5,9.5,9.0,9.5,8.5
7,short,long,1.0,1.0,1.0,1.0,2.5
8,stupid,dumb,9.0,9.65,8.5,9.0,8.5
9,weird,strange,8.5,8.5,8.0,9.75,8.5


In [62]:
# Count null values
count_null_values = df.isnull().sum()
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                 0
word2                 0
similarity_score_1    0
similarity_score_2    0
similarity_score_3    0
similarity_score_4    0
similarity_score_5    0
dtype: int64


In [63]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5


In [65]:
# Manually fix inconsistencies
df.loc[(df['word1'] == 'winter') & (df['word2'] == 'season'), 'similarity_score_15'] = 8.00

# Check value
df.loc[(df['word1'] == 'winter') & (df['word2'] == 'season')]

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_15


In [66]:
# Drop faulty row
df = df[~((df['word1'] == 'winter') & (df['word2'] == 'seaso'))]
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_15
0,old,new,0.0,0.0,1.0,1.14,2.0,
1,smart,intelligent,9.5,9.0,9.2,9.0,9.5,
2,hard,difficult,9.0,9.0,9.0,9.0,9.0,
3,happy,cheerful,9.8,8.75,9.5,8.5,0.88,
4,hard,easy,1.19,1.0,1.0,2.5,1.0,
5,fast,rapid,9.0,9.5,9.0,8.5,9.5,
6,happy,glad,9.5,9.5,9.0,9.5,8.5,
7,short,long,1.0,1.0,1.0,1.0,2.5,
8,stupid,dumb,9.0,9.65,8.5,9.0,8.5,
9,weird,strange,8.5,8.5,8.0,9.75,8.5,


In [67]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_15
0,old,new,0.0,0.0,1.0,1.14,2.0,
1,smart,intelligent,9.5,9.0,9.2,9.0,9.5,
2,hard,difficult,9.0,9.0,9.0,9.0,9.0,
3,happy,cheerful,9.8,8.75,9.5,8.5,0.88,
4,hard,easy,1.19,1.0,1.0,2.5,1.0,
5,fast,rapid,9.0,9.5,9.0,8.5,9.5,
6,happy,glad,9.5,9.5,9.0,9.5,8.5,
7,short,long,1.0,1.0,1.0,1.0,2.5,
8,stupid,dumb,9.0,9.65,8.5,9.0,8.5,
9,weird,strange,8.5,8.5,8.0,9.75,8.5,


In [68]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))
missing_word_pair_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

In [70]:
# Make extra API call
formatted_prompt = format_prompt(missing_word_pair_list, prompt)
messages = [{"role": "user", "content": formatted_prompt}]

# Call the OpenAI API
# completion = client.chat.completions.create(
#     model=model,
#     messages=messages,
#     n=1)

# Store the response content
missing_word_pairs_response = [completion.choices[0].message.content]

In [None]:
# Extract data with regular expressions into dictionary
missing_word_pairs_dict = process_responses(missing_word_pairs_response)
missing_word_pairs_dict

{('fee', 'payment'): [9.0],
 ('bible', 'hymn'): [2.0],
 ('exit', 'doorway'): [1.0],
 ('man', 'sentry'): [1.0],
 ('aisle', 'hall'): [5.0],
 ('whiskey', 'gin'): [4.0],
 ('blood', 'marrow'): [1.0],
 ('oil', 'mink'): [1.0],
 ('floor', 'deck'): [2.0],
 ('roof', 'floor'): [1.0],
 ('door', 'floor'): [1.0],
 ('shoulder', 'head'): [2.0],
 ('wagon', 'carriage'): [8.0],
 ('car', 'carriage'): [8.0],
 ('elbow', 'ankle'): [1.0]}

In [None]:
# Iterate over the DataFrame and replace missing values
for index, row in df.iterrows():
    word_pair = (row['word1'], row['word2'])
    # Check if the current value is NaN
    if pd.isna(row['similarity_score_15']):
        if word_pair in missing_word_pairs_dict:
            # Extract the first element from the list to get the scalar value
            df.at[index, 'similarity_score_15'] = missing_word_pairs_dict[word_pair][0]

# Print to check if there are any NaN values left in 'similarity_score_15'
print(df[df['similarity_score_15'].isna()])

Empty DataFrame
Columns: [word1, word2, similarity_score_1, similarity_score_2, similarity_score_3, similarity_score_4, similarity_score_5, similarity_score_6, similarity_score_7, similarity_score_8, similarity_score_9, similarity_score_10, similarity_score_11, similarity_score_12, similarity_score_13, similarity_score_14, similarity_score_15]
Index: []


In [None]:
# Filter the DataFrame based on pairs
filtered_df = df[df.apply(lambda row: (row['word1'], row['word2']) in missing_word_pair_list, axis=1)]

# Select only the similarity_score_15 column
result = filtered_df[['word1', 'word2', 'similarity_score_15']]

# Print or output the result
print(result)

        word1     word2  similarity_score_15
486       fee   payment                  9.0
487     bible      hymn                  2.0
488      exit   doorway                  1.0
489       man    sentry                  1.0
490     aisle      hall                  5.0
491   whiskey       gin                  4.0
492     blood    marrow                  1.0
493       oil      mink                  1.0
494     floor      deck                  2.0
495      roof     floor                  1.0
496      door     floor                  1.0
497  shoulder      head                  2.0
498     wagon  carriage                  8.0
499       car  carriage                  8.0
500     elbow     ankle                  1.0


In [None]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [None]:
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.05,0.10,1.00,1.0,0.20,0.10,0.15,1.5,0.10,0.15,3.50,0.01,0.20,1.00,0.10
1,smart,intelligent,0.90,0.95,1.00,1.0,0.96,0.95,0.95,9.5,0.95,0.90,9.50,0.93,0.95,0.94,1.00
2,hard,difficult,0.90,0.80,0.71,1.0,0.92,0.90,0.90,8.0,0.85,0.80,7.00,0.85,0.85,0.97,0.85
3,happy,cheerful,0.80,0.80,0.90,0.0,0.70,0.75,0.80,8.5,0.85,0.80,8.00,0.81,0.70,0.86,0.90
4,hard,easy,0.10,0.10,0.10,0.0,0.16,0.05,0.20,2.0,0.20,0.20,2.00,0.15,0.10,0.22,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,join,acquire,2.00,4.00,3.50,0.3,0.60,0.75,3.00,2.5,4.50,0.50,0.45,0.70,5.00,0.50,0.31
996,send,attend,1.00,1.00,1.00,0.2,0.30,0.00,2.00,2.0,3.50,0.00,0.21,0.40,3.00,0.20,0.29
997,gather,attend,2.00,2.00,2.00,0.3,0.45,0.50,2.00,3.0,3.00,0.00,0.32,0.70,4.00,0.30,0.37
998,absorb,withdraw,1.00,1.00,1.50,0.1,0.20,0.00,1.00,1.5,1.50,0.00,0.20,0.30,2.00,0.10,0.16


In [None]:
# Define file_path
file_path = '../../../data/prompt/en/gpt-3.5-turbo-0125/f9-1.csv'

# Check if the file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
