### **0. Set-up**

In [58]:
# Import libraries and utils
%run '../../utils.ipynb'

In [59]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [60]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Select subset
en_simlex = en_simlex.iloc[666:999]

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [61]:
# Show results
tuples_list

[('boy', 'soldier'),
 ('belly', 'abdomen'),
 ('guy', 'girl'),
 ('bed', 'chair'),
 ('clothes', 'jacket'),
 ('gun', 'knife'),
 ('tin', 'metal'),
 ('bottle', 'container'),
 ('hen', 'turkey'),
 ('meat', 'bread'),
 ('arm', 'bone'),
 ('neck', 'spine'),
 ('apple', 'lemon'),
 ('agony', 'grief'),
 ('assignment', 'task'),
 ('night', 'dawn'),
 ('dinner', 'soup'),
 ('calf', 'bull'),
 ('snow', 'storm'),
 ('nail', 'hand'),
 ('dog', 'horse'),
 ('arm', 'neck'),
 ('ball', 'glove'),
 ('flu', 'fever'),
 ('fee', 'salary'),
 ('nerve', 'brain'),
 ('beast', 'animal'),
 ('dinner', 'chicken'),
 ('girl', 'maid'),
 ('child', 'boy'),
 ('alcohol', 'wine'),
 ('nose', 'mouth'),
 ('street', 'car'),
 ('bell', 'door'),
 ('box', 'hat'),
 ('belief', 'impression'),
 ('bias', 'opinion'),
 ('attention', 'awareness'),
 ('anger', 'mood'),
 ('elegance', 'style'),
 ('beauty', 'age'),
 ('book', 'theme'),
 ('friend', 'mother'),
 ('vitamin', 'iron'),
 ('car', 'factory'),
 ('pact', 'condition'),
 ('chapter', 'choice'),
 ('arithmeti

### **1. Define and Evaluate Parameters**

In [62]:
prompt = ("Rate the semantic similarity of the word pair: [('{word1}'), ('{word2}')] on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. "
          "Do not provide additional explanations or context.")

In [63]:
# Define model
model = "openai/gpt-oss-20b"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 1

# Define number of sublists
n_sublists = 333

In [64]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

333


In [65]:
# Print prompts for each chunk
print_prompts_single(chunks, sample_size, prompt)

Rate the semantic similarity of the word pair: [('boy'), ('soldier')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('boy'), ('soldier')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('boy'), ('soldier')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanat

In [66]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken_single(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [75, 77, 75, 74, 76, 74, 74, 75, 75, 75, 74, 75, 75, 76, 74, 75, 75, 75, 74, 75, 74, 74, 75, 75, 74, 75, 75, 76, 74, 74, 75, 75, 74, 74, 74, 75, 75, 75, 75, 76, 75, 74, 74, 75, 74, 75, 74, 76, 74, 74, 74, 75, 75, 77, 75, 75, 74, 75, 75, 75, 75, 74, 76, 74, 74, 74, 75, 76, 75, 75, 74, 76, 75, 76, 74, 74, 74, 76, 77, 76, 75, 74, 76, 75, 74, 74, 74, 75, 75, 75, 75, 75, 77, 74, 76, 74, 74, 74, 74, 75, 75, 76, 74, 74, 75, 75, 76, 77, 76, 75, 75, 74, 75, 74, 76, 76, 76, 74, 75, 75, 75, 74, 74, 74, 74, 74, 74, 74, 74, 76, 77, 74, 75, 74, 74, 74, 75, 75, 76, 75, 75, 75, 75, 76, 74, 74, 75, 75, 74, 75, 75, 75, 75, 76, 75, 75, 74, 74, 75, 74, 74, 74, 74, 75, 75, 75, 74, 75, 75, 74, 75, 74, 75, 75, 76, 74, 74, 74, 74, 74, 75, 74, 74, 74, 75, 74, 77, 74, 74, 77, 74, 75, 74, 74, 74, 75, 74, 74, 75, 75, 76, 74, 75, 74, 74, 74, 76, 76, 76, 75, 76, 76, 76, 77, 75, 75, 74, 76, 74, 74, 76, 74, 75, 76, 76, 75, 75, 75, 75, 75, 74, 74, 74, 75, 75, 75, 76, 74, 75, 74,

In [67]:
# Max RPD = 10.000
len(token_counts*15)

4995

In [68]:
# Number of total tokens
print(sum(token_counts))

24915


### **2. Extract Data**

In [12]:
# Get results from API
response = get_responses_single(prompt, chunks, model, sample_size, delay)

Processing: 100%|██████████| 4995/4995 [4:11:34<00:00,  3.02s/chunk]  

Total time taken: 15094.46 seconds





In [13]:
# Define filepath
file_path = '../../../data/gpt-oss-20b/response/en/f9-3.json'

# Ensure directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


### **3. Process and Inspect Data**

In [70]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('night', 'dawn'): [2.0, 3.0, 1.0, 2.5, 2.0, 4.5, 2.5, 3.0, 0.2, 2.5, 0.0, 0.1, 2.5, 3.25], ('beast', 'animal'): [9.2, 8.5, 9.0, 9.0, 8.0, 8.0, 8.0, 8.7, 8.5, 8.5, 8.33, 8.0, 8.5, 8.0], ('jail', 'choice'): [0.2, 2.5, 1.0, 1.5, 0.05, 0.15, 1.2, 1.5, 0.1, 0.5, 1.0, 0.1, 0.0, 0.1], ('task', 'woman'): [0.0, 0.05, 0.0, 0.1, 0.0, 0.0, 0.05, 0.02, 0.05, 0.1, 0.0, 0.0, 0.0, 0.0], ('hole', 'wife'): [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ('deck', 'mouse'): [0.1, 0.1, 0.0, 0.0, 0.15, 0.0, 0.1, 0.5, 0.0, 0.05, 0.0, 0.3, 0.0, 0.0], ('princess', 'island'): [0.0, 0.05, 0.2, 0.25, 0.1, 0.2, 0.1, 0.15, 0.0, 0.0, 1.0, 0.0, 0.2, 0.05], ('inform', 'notify'): [8.75, 8.5, 8.75, 8.25, 8.5, 9.5, 8.5, 8.5, 8.67, 9.0, 8.5, 8.5, 8.5, 8.5], ('enjoy', 'entertain'): [7.25, 6.5, 8.0, 7.0, 7.5, 6.0, 7.5, 7.2, 7.5, 6.8, 4.5, 6.7, 7.0, 6.25], ('roam', 'wander'): [9.5, 9.5, 9.0, 7.5, 8.5, 8.5, 8.5, 9.25, 8.5, 9.5, 9.5, 9.5, 9.8, 9.5], ('come', 'attend'): [6.8, 4.75, 3.5, 5.5, 6.5, 6.0, 

In [71]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [72]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,boy,soldier,2.5,3.75,2.0,2.00,2.00,4.0,4.00,1.8,3.0,3.00,2.0,3.00,3.00,2.50,2.00
1,belly,abdomen,8.5,9.00,9.5,8.90,7.50,10.0,9.30,9.5,9.5,9.00,8.5,9.00,8.30,8.50,9.50
2,guy,girl,4.5,4.25,3.5,4.50,4.00,2.5,3.00,2.0,2.3,6.00,4.0,4.50,6.00,2.50,2.70
3,bed,chair,5.3,3.25,6.0,4.50,4.50,3.5,2.50,5.0,5.0,4.50,5.5,3.50,4.50,4.70,5.50
4,clothes,jacket,8.7,7.50,8.5,8.75,8.50,8.0,8.00,8.5,7.5,8.00,8.5,8.00,7.50,8.00,9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,join,acquire,4.0,2.75,4.5,6.50,4.00,4.5,6.25,3.5,3.5,4.00,6.5,4.00,2.50,2.50,7.50
332,send,attend,1.0,0.15,1.3,0.10,0.75,1.5,2.50,0.2,1.5,1.00,2.0,0.05,0.05,0.00,2.50
333,gather,attend,5.5,6.00,7.5,5.00,5.00,4.5,4.50,7.5,7.0,6.25,5.5,7.20,7.50,6.50,6.50
334,absorb,withdraw,2.5,0.20,0.1,0.20,0.05,0.0,1.50,1.5,0.1,0.10,0.1,1.00,0.00,0.50,1.00


In [73]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2      3
similarity_score_3      3
similarity_score_4      3
similarity_score_5      3
similarity_score_6      3
similarity_score_7      3
similarity_score_8      3
similarity_score_9      3
similarity_score_10     3
similarity_score_11     3
similarity_score_12     3
similarity_score_13     3
similarity_score_14     4
similarity_score_15    25
dtype: int64


In [74]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
15,night,dawn,2.0,3.0,1.0,2.5,2.0,4.5,2.5,3.0,0.2,2.5,0.0,0.1,2.5,3.25,
26,beast,animal,9.2,8.5,9.0,9.0,8.0,8.0,8.0,8.7,8.5,8.5,8.33,8.0,8.5,8.0,
55,jail,choice,0.2,2.5,1.0,1.5,0.05,0.15,1.2,1.5,0.1,0.5,1.0,0.1,0.0,0.1,
61,task,woman,0.0,0.05,0.0,0.1,0.0,0.0,0.05,0.02,0.05,0.1,0.0,0.0,0.0,0.0,
81,hole,wife,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
96,deck,mouse,0.1,0.1,0.0,0.0,0.15,0.0,0.1,0.5,0.0,0.05,0.0,0.3,0.0,0.0,
101,princess,island,0.0,0.05,0.2,0.25,0.1,0.2,0.1,0.15,0.0,0.0,1.0,0.0,0.2,0.05,
133,inform,notify,8.75,8.5,8.75,8.25,8.5,9.5,8.5,8.5,8.67,9.0,8.5,8.5,8.5,8.5,
143,enjoy,entertain,7.25,6.5,8.0,7.0,7.5,6.0,7.5,7.2,7.5,6.8,4.5,6.7,7.0,6.25,
153,roam,wander,9.5,9.5,9.0,7.5,8.5,8.5,8.5,9.25,8.5,9.5,9.5,9.5,9.8,9.5,


### **4. Fix Faulty Word Pairs**

In [75]:
# Drop all rows with NA
df = df.dropna().reset_index(drop=True)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,boy,soldier,2.5,3.75,2.0,2.00,2.00,4.0,4.00,1.8,3.0,3.00,2.0,3.00,3.00,2.50,2.00
1,belly,abdomen,8.5,9.00,9.5,8.90,7.50,10.0,9.30,9.5,9.5,9.00,8.5,9.00,8.30,8.50,9.50
2,guy,girl,4.5,4.25,3.5,4.50,4.00,2.5,3.00,2.0,2.3,6.00,4.0,4.50,6.00,2.50,2.70
3,bed,chair,5.3,3.25,6.0,4.50,4.50,3.5,2.50,5.0,5.0,4.50,5.5,3.50,4.50,4.70,5.50
4,clothes,jacket,8.7,7.50,8.5,8.75,8.50,8.0,8.00,8.5,7.5,8.00,8.5,8.00,7.50,8.00,9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306,join,acquire,4.0,2.75,4.5,6.50,4.00,4.5,6.25,3.5,3.5,4.00,6.5,4.00,2.50,2.50,7.50
307,send,attend,1.0,0.15,1.3,0.10,0.75,1.5,2.50,0.2,1.5,1.00,2.0,0.05,0.05,0.00,2.50
308,gather,attend,5.5,6.00,7.5,5.00,5.00,4.5,4.50,7.5,7.0,6.25,5.5,7.20,7.50,6.50,6.50
309,absorb,withdraw,2.5,0.20,0.1,0.20,0.05,0.0,1.50,1.5,0.1,0.10,0.1,1.00,0.00,0.50,1.00


### **5. Export Data**

In [76]:
# Define file_path
file_path = '../../../data/gpt-oss-20b/processed/en/f9-3.csv'

# Ensure directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. DataFrame was not saved to prevent overwriting.")


File saved successfully.
