### **0. Set-up**

In [10]:
# Import libraries and utils
%run '../../utils.ipynb'

In [11]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [12]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Remove first word pair
en_simlex = en_simlex.iloc[1:]

# Select subset
# en_simlex = en_simlex.head(100)

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [13]:
# Show results
tuples_list

[('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),
 ('strong', 'pro

### **1. Define and Evaluate Parameters**

In [14]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context. "
          "An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)]")

In [15]:
# Define model
model = "meta-llama/llama-3.3-70b-instruct"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 25

In [16]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

25


In [17]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)] --- ["('smart', 'intelligent'), ('hard', 'difficult'), ('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 'broad'), ('si

In [19]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [392, 393, 397, 378, 386, 379, 376, 386, 387, 391, 390, 390, 380, 379, 391, 392, 382, 377, 386, 379, 379, 380, 378, 383, 369]


### **2. Extract and Process Data**

In [20]:
# Get results from API
response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 375/375 [3:15:23<00:00, 31.26s/chunk]  

Total time taken: 11723.88 seconds





In [22]:
# Define filepath
file_path = '../../../data/llama-3.3-70b/response/en/f2.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [41]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('hard', 'difficult'): [9.23, 9.25, 8.97, 9.25, 9.23, 8.92, 9.28, 8.95, 9.23, 8.45, 8.92, 9.15, 8.5, 8.42], ('bad', 'awful'): [8.95, 9.45, 8.95, 9.15, 8.58, 9.13, 9.55, 8.9, 8.85, 8.85, 9.29, 9.51, 8.8, 8.15], ('happy', 'angry'): [2.11, 1.85, 1.41, 1.6, 1.67, 2.15, 1.51, 1.8, 1.85, 1.85, 1.86, 1.49, 1.2, 1.25], ('bad', 'immoral'): [8.73, 9.25, 9.27, 8.8, 8.76, 8.69, 9.17, 8.25, 8.69, 8.65, 8.85, 9.17, 8.6, 8.48], ('confident', 'sure'): [9.24, 9.85, 9.51, 9.4, 9.43, 9.17, 9.39, 9.15, 9.49, 9.08, 9.01, 9.13, 9.1, 9.05], ('steak', 'meat'): [8.36, 8.46, 8.93, 8.95, 9.11, 8.85, 8.83, 8.96, 9.24, 9.4, 9.0, 8.93, 9.21, 8.76], ('nail', 'thumb'): [4.58, 3.98, 4.86, 5.26, 4.59, 4.91, 3.69, 4.67, 5.18, 4.8, 4.5, 3.68, 4.38, 3.92], ('band', 'orchestra'): [8.99, 8.65, 9.13, 8.83, 8.69, 9.04, 8.55, 8.53, 9.14, 9.1, 8.25, 9.02, 9.14, 8.65], ('book', 'bible'): [7.49, 7.68, 7.65, 7.56, 7.83, 8.26, 7.65, 7.71, 8.56, 8.2, 7.5, 7.44, 8.85, 7.57], ('business', 'industry'): [9.01, 8.91, 9.04, 9.07, 9.21, 9

In [42]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [43]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,smart,intelligent,9.45,9.85,9.45,8.95,8.95,9.45,9.45,0.45,9.45,8.95,8.92,9.45,9.42,9.20,8.95
1,hard,difficult,9.23,9.25,8.97,9.25,9.23,8.92,9.28,8.95,9.23,8.45,8.92,9.15,8.50,8.42,
2,happy,cheerful,9.81,9.40,9.21,9.50,9.45,9.21,9.81,9.00,9.20,9.41,9.21,9.21,9.28,9.80,9.21
3,hard,easy,1.25,1.20,1.29,1.20,1.21,1.23,1.22,1.23,1.40,1.29,1.17,1.23,1.05,1.20,1.39
4,fast,rapid,9.58,9.80,9.56,9.80,9.67,9.56,9.63,9.56,9.50,9.58,9.50,9.56,9.58,9.50,9.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005,join,acquire,8.51,8.59,8.46,8.23,8.72,8.16,8.26,8.56,8.46,7.91,7.85,7.90,7.93,8.46,8.46
1006,send,attend,7.67,6.49,6.67,6.59,6.49,6.94,5.83,7.34,6.93,5.74,5.67,6.00,6.26,6.81,6.75
1007,gather,attend,8.23,7.39,8.19,7.85,7.94,8.49,7.42,8.23,8.19,7.43,7.19,7.50,7.18,8.19,8.13
1008,absorb,withdraw,5.29,4.11,3.85,4.17,5.11,3.19,3.19,3.15,4.67,3.59,3.56,4.60,4.21,4.13,3.51


In [44]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2     10
similarity_score_3     11
similarity_score_4     12
similarity_score_5     12
similarity_score_6     12
similarity_score_7     12
similarity_score_8     12
similarity_score_9     13
similarity_score_10    13
similarity_score_11    13
similarity_score_12    13
similarity_score_13    13
similarity_score_14    17
similarity_score_15    88
dtype: int64


In [45]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
1,hard,difficult,9.23,9.25,8.97,9.25,9.23,8.92,9.28,8.95,9.23,8.45,8.92,9.15,8.50,8.42,
10,bad,awful,8.95,9.45,8.95,9.15,8.58,9.13,9.55,8.90,8.85,8.85,9.29,9.51,8.80,8.15,
25,happy,angry,2.11,1.85,1.41,1.60,1.67,2.15,1.51,1.80,1.85,1.85,1.86,1.49,1.20,1.25,
34,bad,immoral,8.73,9.25,9.27,8.80,8.76,8.69,9.17,8.25,8.69,8.65,8.85,9.17,8.60,8.48,
39,confident,sure,9.24,9.85,9.51,9.40,9.43,9.17,9.39,9.15,9.49,9.08,9.01,9.13,9.10,9.05,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
966,leave,ignore,5.17,5.48,6.19,5.91,6.49,5.64,5.35,5.67,6.43,5.83,5.46,6.53,5.43,5.83,
967,accept,recommend,7.41,7.44,7.94,7.85,8.58,7.26,7.29,7.22,8.11,7.85,7.35,8.13,7.67,7.19,
968,leave,wander,6.28,6.17,6.67,6.49,6.95,6.18,6.49,6.34,6.89,6.45,6.17,6.85,6.35,6.67,
969,keep,give,5.53,5.92,5.49,5.63,5.93,5.49,5.92,5.92,5.39,5.19,5.69,5.93,5.91,5.46,


In [46]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('hard', 'difficult'),
 ('bad', 'awful'),
 ('happy', 'angry'),
 ('bad', 'immoral'),
 ('confident', 'sure'),
 ('steak', 'meat'),
 ('nail', 'thumb'),
 ('band', 'orchestra'),
 ('book', 'bible'),
 ('business', 'industry'),
 ('winter', 'season'),
 ('decade', 'century'),
 ('alcohol', 'gin'),
 ('hat', 'coat'),
 ('window', 'door'),
 ('house', 'apartment'),
 ('bread', 'flour'),
 ('church', 'choir'),
 ('metal', 'aluminum'),
 ('cloud', 'fog'),
 ('bed', 'blanket'),
 ('clothes', 'fabric'),
 ('communication', 'language'),
 ('shore', 'coast'),
 ('bed', 'crib'),
 ('competence', 'ability'),
 ('cloud', 'haze'),
 ('grass', 'moss'),
 ('newspaper', 'magazine'),
 ('word', 'newspaper'),
 ('newspaper', 'journal'),
 ('newspaper', 'writer'),
 ('newspaper', 'news'),
 ('cent', 'size'),
 ('mouse', 'management'),
 ('task', 'straw'),
 ('course', 'stomach'),
 ('hymn', 'straw'),
 ('wife', 'straw'),
 ('apple', 'sunshine'),
 ('stomach', 'vein'),
 ('cent', 'ize'),
 ('mouse', 'anagement'),
 ('task', 'traw'),
 ('course', 

In [None]:
# Define file_path
file_path = '../../../data/prompt/en/llama-3.3-70b/f2.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")