### **0. Set-up**

In [1]:
# Import libraries and utils
%run '../../utils.ipynb'

In [2]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [3]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Select subset
# en_simlex = en_simlex.head(150)

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [4]:
# Show results
tuples_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

### **1. Define and Evaluate Parameters**

In [5]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 5, "
          "where 0 represents no semantic similarity, and 5 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context.")

In [6]:
# Define model
model = "meta-llama/llama-4-scout"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 20

In [7]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

20


In [8]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 5, where 0 represents no semantic similarity, and 5 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. --- ["('old', 'new'), ('smart', 'intelligent'), ('hard', 'difficult'), ('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 'broad'), ('simple', 'easy'), ('old', 'fresh'), ('apparent', 'obvious'), ('inexpensive'

In [9]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [439, 446, 427, 430, 421, 419, 434, 441, 440, 426, 426, 433, 436, 423, 429, 425, 416, 431, 437, 407]


### **2. Extract and Process Data**

In [10]:
# Get results from API
response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 300/300 [2:26:37<00:00, 29.33s/chunk]   

Total time taken: 8797.91 seconds





In [11]:
# Define filepath
file_path = '../../../data/llama-4-scout/response/en/f4.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [12]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('old', 'new'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('smart', 'intelligent'): [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0], ('hard', 'difficult'): [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0], ('happy', 'cheerful'): [4.8, 4.5, 4.8, 4.8, 4.8, 4.8, 4.8], ('hard', 'easy'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('fast', 'rapid'): [4.9, 4.8, 4.9, 4.9, 4.9, 4.9, 4.9], ('happy', 'glad'): [4.8, 4.6, 4.8, 4.8, 4.7, 4.7, 4.9], ('short', 'long'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('stupid', 'dumb'): [4.9, 4.8, 4.9, 4.9, 4.9, 4.9, 4.9], ('weird', 'strange'): [4.7, 4.8, 4.7, 4.8, 4.8, 4.8, 4.8], ('wide', 'narrow'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('bad', 'awful'): [4.8, 4.5, 4.6, 4.8, 4.8, 4.7, 4.7], ('easy', 'difficult'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('bad', 'terrible'): [4.9, 4.6, 4.8, 4.8, 4.9, 4.8, 4.8], ('hard', 'simple'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('smart', 'dumb'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('insane', 'crazy'): [4.8, 4.8, 4.8, 4.8, 4.8, 4.9, 4.8], ('happy', 'mad'): [0.0,

In [13]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [14]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,...,similarity_score_13,similarity_score_14,similarity_score_15,similarity_score_16,similarity_score_17,similarity_score_18,similarity_score_19,similarity_score_20,similarity_score_21,similarity_score_22
0,old,new,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
1,smart,intelligent,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,...,,,,,,,,,,
2,hard,difficult,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,...,,,,,,,,,,
3,happy,cheerful,4.8,4.5,4.8,4.8,4.8,4.8,4.8,,...,,,,,,,,,,
4,hard,easy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1082,join,acquire,2.5,2.5,2.0,2.0,2.5,2.5,2.5,2.5,...,,,,,,,,,,
1083,send,attend,2.0,2.0,1.5,1.5,2.0,2.0,2.0,2.0,...,,,,,,,,,,
1084,gather,attend,3.5,3.0,2.5,2.5,3.0,3.0,3.0,3.5,...,,,,,,,,,,
1085,absorb,withdraw,1.0,1.0,0.5,0.5,0.5,1.0,1.0,1.0,...,,,,,,,,,,


In [15]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                     0
word2                     0
similarity_score_1        0
similarity_score_2       80
similarity_score_3       88
similarity_score_4       88
similarity_score_5       88
similarity_score_6      138
similarity_score_7      288
similarity_score_8      438
similarity_score_9      687
similarity_score_10     887
similarity_score_11     888
similarity_score_12     987
similarity_score_13    1037
similarity_score_14    1037
similarity_score_15    1037
similarity_score_16    1037
similarity_score_17    1037
similarity_score_18    1037
similarity_score_19    1037
similarity_score_20    1037
similarity_score_21    1037
similarity_score_22    1085
dtype: int64


In [16]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null.reset_index(drop=True)

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,...,similarity_score_13,similarity_score_14,similarity_score_15,similarity_score_16,similarity_score_17,similarity_score_18,similarity_score_19,similarity_score_20,similarity_score_21,similarity_score_22
0,old,new,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
1,smart,intelligent,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,...,,,,,,,,,,
2,hard,difficult,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,...,,,,,,,,,,
3,happy,cheerful,4.8,4.5,4.8,4.8,4.8,4.8,4.8,,...,,,,,,,,,,
4,hard,easy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1080,join,acquire,2.5,2.5,2.0,2.0,2.5,2.5,2.5,2.5,...,,,,,,,,,,
1081,send,attend,2.0,2.0,1.5,1.5,2.0,2.0,2.0,2.0,...,,,,,,,,,,
1082,gather,attend,3.5,3.0,2.5,2.5,3.0,3.0,3.0,3.5,...,,,,,,,,,,
1083,absorb,withdraw,1.0,1.0,0.5,0.5,0.5,1.0,1.0,1.0,...,,,,,,,,,,


In [17]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

In [None]:
# Define file_path
file_path = '../../../data/llama-3.3-70b/processed/en/f4.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")