### **0. Set-up**

In [10]:
# Import libraries and utils
%run '../../utils.ipynb'

In [11]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [12]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Remove first three word pairs
en_simlex = en_simlex.iloc[3:]

# Select subset
# en_simlex = en_simlex.head(160)

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [13]:
# Show results
tuples_list

[('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),
 ('strong', 'proud'),
 ('unnecessary', 'necessary'),
 ('restless', 

### **1. Define and Evaluate Parameters**

In [14]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context. "
          "Examples of word pairs and their semantic similarity scores are: [('old', 'new', 1.58), ('smart', 'intelligent', 9.20), ('hard', 'difficult', 8.77)].")

In [15]:
# Define model
model = "meta-llama/llama-3.3-70b-instruct"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 25

In [16]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

25


In [17]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. Examples of word pairs and their semantic similarity scores are: [('old', 'new', 1.58), ('smart', 'intelligent', 9.20), ('hard', 'difficult', 8.77)]. --- ["('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 

In [18]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [413, 417, 418, 402, 406, 401, 396, 411, 410, 411, 415, 407, 404, 401, 412, 414, 405, 398, 407, 402, 400, 397, 393, 405, 391]


### **2. Extract and Process Data**

In [19]:
# Get results from API
response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 375/375 [3:15:25<00:00, 31.27s/chunk]  

Total time taken: 11725.93 seconds





In [20]:
# Define filepath
file_path = '../../../data/llama-3.3-70b/response/en/f3.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [31]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('music', 'melody'): [9.13, 9.45, 8.92, 8.89, 9.4, 9.15, 8.59, 9.5, 8.59, 9.41, 9.01, 9.01, 9.15, 8.95], ('flower', 'violet'): [8.19, 8.2, 8.58, 8.23, 8.95, 8.65, 8.15, 8.95, 8.21, 8.21, 8.15, 8.69, 8.69, 8.8], ('paper', 'cardboard'): [8.01, 7.95, 8.13, 8.51, 8.2, 8.21, 7.91, 8.21, 7.41, 7.85, 8.52, 8.22, 8.25, 8.38], ('motel', 'inn'): [8.49, 8.6, 9.04, 8.83, 8.65, 8.96, 8.71, 9.23, 8.46, 8.44, 8.91, 8.95, 8.96, 8.95], ('clothes', 'cloth'): [8.74, 7.7, 8.69, 8.17, 8.4, 8.59, 8.23, 8.76, 7.83, 7.93, 8.39, 8.39, 8.49, 8.49], ('book', 'bible'): [7.91, 7.1, 8.46, 7.93, 8.05, 7.69, 7.67, 8.53, 7.53, 7.49, 7.92, 7.93, 8.17, 7.85], ('alcohol', 'gin'): [9.29, 9.9, 9.53, 9.63, 9.55, 9.28, 9.65, 9.94, 9.58, 9.39, 9.59, 9.36, 9.39, 9.5], ('bread', 'flour'): [8.21, 8.18, 8.19, 8.16, 8.01, 7.49, 6.49, 8.21, 8.15, 8.12, 6.49, 7.14, 8.1, 8.13], ('metal', 'aluminum'): [8.98, 8.96, 8.93, 8.67, 8.67, 8.23, 8.39, 8.56, 8.66, 8.95, 8.85, 8.51, 8.7, 8.98], ('cloud', 'fog'): [8.28, 8.73, 8.21, 8.25, 8.21, 

In [32]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [33]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,happy,cheerful,8.95,8.45,9.25,8.45,8.95,9.45,8.94,8.95,9.45,8.45,8.95,8.95,8.95,8.95,8.95
1,hard,easy,1.21,1.22,1.29,1.22,1.29,1.23,1.29,1.29,1.29,1.21,1.22,1.29,1.21,1.22,1.22
2,fast,rapid,9.45,9.50,9.45,9.50,9.45,9.81,9.41,9.41,9.89,9.50,9.43,9.41,9.45,9.45,9.45
3,happy,glad,9.20,9.17,9.38,9.21,9.18,9.58,9.18,9.23,9.51,9.10,9.15,9.23,9.28,9.18,9.25
4,short,long,1.09,1.04,1.22,1.11,1.41,1.15,1.38,1.42,1.12,1.01,1.41,1.18,1.05,1.58,1.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,join,acquire,8.23,8.19,8.39,8.53,7.95,7.56,7.94,8.04,7.65,8.55,7.51,7.58,7.91,8.44,8.22
1005,send,attend,6.51,6.83,6.79,6.26,5.67,5.49,5.46,5.83,5.76,6.29,5.81,5.84,6.27,6.21,6.17
1006,gather,attend,8.17,8.13,7.96,8.17,7.41,7.11,7.62,7.17,7.22,7.82,7.26,8.26,7.74,8.16,7.39
1007,absorb,withdraw,3.16,3.67,3.19,4.14,3.14,3.46,3.48,2.41,3.15,4.18,3.48,4.48,2.96,4.51,3.41


In [34]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2     13
similarity_score_3     13
similarity_score_4     13
similarity_score_5     13
similarity_score_6     13
similarity_score_7     13
similarity_score_8     13
similarity_score_9     13
similarity_score_10    13
similarity_score_11    13
similarity_score_12    13
similarity_score_13    13
similarity_score_14    15
similarity_score_15    63
dtype: int64


In [35]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
193,music,melody,9.13,9.45,8.92,8.89,9.40,9.15,8.59,9.50,8.59,9.41,9.01,9.01,9.15,8.95,
194,flower,violet,8.19,8.20,8.58,8.23,8.95,8.65,8.15,8.95,8.21,8.21,8.15,8.69,8.69,8.80,
195,paper,cardboard,8.01,7.95,8.13,8.51,8.20,8.21,7.91,8.21,7.41,7.85,8.52,8.22,8.25,8.38,
196,motel,inn,8.49,8.60,9.04,8.83,8.65,8.96,8.71,9.23,8.46,8.44,8.91,8.95,8.96,8.95,
197,clothes,cloth,8.74,7.70,8.69,8.17,8.40,8.59,8.23,8.76,7.83,7.93,8.39,8.39,8.49,8.49,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
936,discuss,explain,8.19,8.23,8.19,8.23,8.15,8.15,8.32,8.15,8.15,8.29,8.19,8.16,8.23,8.19,
938,explore,discover,8.92,8.91,8.51,8.91,8.93,8.92,8.95,8.69,8.69,8.93,8.95,8.94,8.59,8.94,
944,reduce,shrink,8.51,9.25,8.63,8.69,8.67,8.63,8.59,9.25,8.92,8.51,8.51,8.59,8.94,8.51,
955,contemplate,think,9.04,9.01,8.93,9.25,8.24,8.94,9.14,8.58,9.24,9.04,8.94,9.18,8.81,8.92,


In [36]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('music', 'melody'),
 ('flower', 'violet'),
 ('paper', 'cardboard'),
 ('motel', 'inn'),
 ('clothes', 'cloth'),
 ('book', 'bible'),
 ('alcohol', 'gin'),
 ('bread', 'flour'),
 ('metal', 'aluminum'),
 ('cloud', 'fog'),
 ('clothes', 'fabric'),
 ('woman', 'secretary'),
 ('pain', 'harm'),
 ('lover', 'companion'),
 ('wine', 'liquor'),
 ('cow', 'goat'),
 ('seed', 'mustard'),
 ('diet', 'salad'),
 ('science', 'psychology'),
 ('men', 'fraternity'),
 ('journey', 'conquest'),
 ('boy', 'soldier'),
 ('tin', 'metal'),
 ('neck', 'spine'),
 ('dinner', 'soup'),
 ('boy', 'oldier'),
 ('tin', 'etal'),
 ('neck', 'pine'),
 ('dinner', 'oup'),
 ('advise', 'recommend'),
 ('win', 'succeed'),
 ('greet', 'meet'),
 ('destroy', 'make'),
 ('take', 'receive'),
 ('spend', 'save'),
 ('gather', 'meet'),
 ('send', 'receive'),
 ('leave', 'remain'),
 ('advise', 'ecommend'),
 ('win', 'ucceed'),
 ('greet', 'eet'),
 ('destroy', 'ake'),
 ('take', 'eceive'),
 ('spend', 'ave'),
 ('gather', 'eet'),
 ('send', 'eceive'),
 ('leave', 

In [None]:
# Define file_path
file_path = '../../../data/prompt/en/llama-3.3-70b/f3.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")