### **0. Set-up**

In [7]:
# Import libraries and utils
%run '../../utils.ipynb'

In [8]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [9]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [10]:
# Show results
tuples_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

### **1. Define and Evaluate Parameters**

In [11]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context.")

In [12]:
# Define model
model = "meta-llama/llama-4-scout"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 20

In [13]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

20


In [14]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. --- ["('old', 'new'), ('smart', 'intelligent'), ('hard', 'difficult'), ('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 'broad'), ('simple', 'easy'), ('old', 'fresh'), ('apparent', 'obvious'), ('inexpensiv

In [15]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [439, 446, 427, 430, 421, 419, 434, 441, 440, 426, 426, 433, 436, 423, 429, 425, 416, 431, 437, 407]


### **2. Extract and Process Data**

In [16]:
# Get results from API
response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 300/300 [2:12:11<00:00, 26.44s/chunk]  

Total time taken: 7931.83 seconds





In [17]:
# Define filepath
file_path = '../../../data/llama-4-scout/response/en/f1.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [18]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('old', 'new'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('smart', 'intelligent'): [9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5], ('hard', 'difficult'): [9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5], ('happy', 'cheerful'): [8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5], ('hard', 'easy'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('fast', 'rapid'): [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0], ('happy', 'glad'): [8.0, 8.5, 8.5, 8.5, 8.5, 8.5, 8.0], ('short', 'long'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('stupid', 'dumb'): [8.5, 9.0, 8.0, 8.0, 8.0, 8.0, 9.0], ('weird', 'strange'): [8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.5], ('wide', 'narrow'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('bad', 'awful'): [8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 7.5], ('easy', 'difficult'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('bad', 'terrible'): [8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.0], ('hard', 'simple'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('smart', 'dumb'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('insane', 'crazy'): [8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5], ('happy', 'mad'): [0.0,

In [19]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [20]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14
0,old,new,0.0,0.0,0.0,0.0,0.0,0.00,0.0,,,,,,,
1,smart,intelligent,9.5,9.5,9.5,9.5,9.5,9.50,9.5,,,,,,,
2,hard,difficult,9.5,9.5,9.5,9.5,9.5,9.50,9.5,,,,,,,
3,happy,cheerful,8.5,8.5,8.5,8.5,8.5,8.50,8.5,,,,,,,
4,hard,easy,0.0,0.0,0.0,0.0,0.0,0.00,0.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,join,acquire,5.5,4.0,4.0,5.0,5.0,5.67,4.6,,,,,,,
995,send,attend,5.0,3.5,4.0,4.1,4.0,4.23,4.1,,,,,,,
996,gather,attend,5.5,4.0,6.0,5.6,6.0,6.78,6.3,,,,,,,
997,absorb,withdraw,4.0,2.0,0.0,2.3,2.5,2.34,2.1,,,,,,,


In [21]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                    0
word2                    0
similarity_score_1       0
similarity_score_2       0
similarity_score_3       0
similarity_score_4       0
similarity_score_5       1
similarity_score_6      50
similarity_score_7     200
similarity_score_8     549
similarity_score_9     749
similarity_score_10    799
similarity_score_11    849
similarity_score_12    949
similarity_score_13    949
similarity_score_14    949
dtype: int64


In [22]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null.reset_index(drop=True)

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14
0,old,new,0.0,0.0,0.0,0.0,0.0,0.00,0.0,,,,,,,
1,smart,intelligent,9.5,9.5,9.5,9.5,9.5,9.50,9.5,,,,,,,
2,hard,difficult,9.5,9.5,9.5,9.5,9.5,9.50,9.5,,,,,,,
3,happy,cheerful,8.5,8.5,8.5,8.5,8.5,8.50,8.5,,,,,,,
4,hard,easy,0.0,0.0,0.0,0.0,0.0,0.00,0.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,join,acquire,5.5,4.0,4.0,5.0,5.0,5.67,4.6,,,,,,,
945,send,attend,5.0,3.5,4.0,4.1,4.0,4.23,4.1,,,,,,,
946,gather,attend,5.5,4.0,6.0,5.6,6.0,6.78,6.3,,,,,,,
947,absorb,withdraw,4.0,2.0,0.0,2.3,2.5,2.34,2.1,,,,,,,


In [23]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

In [None]:
# Define file_path
file_path = '../../../data/llama-3.3-70b/processed/en/f1.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")