### **0. Set-up**

In [1]:
# Import libraries and utils
%run '../../utils.ipynb'

In [2]:
# Get api key
load_dotenv()
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')

# Set client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY,
)

In [3]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Select subset
en_simlex = en_simlex.iloc[0:333]

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [4]:
# Show results
tuples_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

### **1. Define and Evaluate Parameters**

In [5]:
prompt = ("Rate the semantic similarity of the word pair: [('{word1}'), ('{word2}')] on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. "
          "Do not provide additional explanations or context.")

In [6]:
# Define model
model = "openai/gpt-oss-20b"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 1.0

# Define number of sublists
n_sublists = 333

In [7]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

333


In [8]:
# Print prompts for each chunk
print_prompts_single(chunks, sample_size, prompt)

Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('old'), ('new')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or cont

In [9]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken_single(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [74, 75, 75, 76, 74, 74, 75, 74, 76, 76, 75, 75, 75, 75, 74, 75, 76, 74, 74, 75, 74, 75, 74, 77, 77, 75, 75, 76, 74, 74, 76, 76, 75, 75, 75, 75, 75, 76, 76, 76, 75, 75, 74, 76, 76, 75, 75, 75, 75, 76, 74, 75, 74, 75, 75, 76, 75, 75, 75, 74, 74, 76, 76, 77, 77, 76, 75, 76, 75, 74, 75, 76, 75, 75, 75, 75, 76, 74, 74, 75, 74, 75, 76, 76, 76, 78, 76, 76, 75, 76, 78, 75, 75, 76, 75, 74, 74, 75, 74, 74, 75, 76, 76, 76, 76, 75, 76, 74, 75, 75, 75, 75, 74, 76, 74, 74, 74, 76, 74, 74, 75, 75, 75, 74, 74, 74, 75, 75, 75, 74, 74, 74, 74, 74, 74, 76, 75, 76, 76, 74, 75, 74, 74, 75, 75, 75, 75, 76, 75, 76, 77, 74, 76, 75, 74, 75, 74, 75, 74, 74, 74, 76, 76, 75, 74, 75, 75, 75, 75, 74, 76, 74, 75, 75, 75, 77, 75, 74, 74, 74, 74, 74, 74, 75, 75, 76, 77, 75, 75, 75, 75, 76, 75, 76, 75, 74, 74, 76, 75, 74, 74, 75, 75, 75, 75, 74, 76, 75, 75, 75, 76, 75, 75, 74, 74, 76, 74, 75, 75, 74, 74, 74, 74, 75, 75, 74, 75, 76, 75, 74, 75, 74, 74, 75, 76, 74, 74, 75, 77, 74,

In [10]:
# Max RPD = 10.000
len(token_counts*15)

4995

In [11]:
# Number of total tokens
print(sum(token_counts))

24955


### **2. Extract Data**

In [12]:
# Get results from API
response = get_responses_single(prompt, chunks, model, sample_size, delay)

Processing: 100%|██████████| 4995/4995 [4:18:46<00:00,  3.11s/chunk]   

Total time taken: 15526.95 seconds





In [13]:
# Define filepath
file_path = '../../../data/gpt-oss-20b/response/en/f9-1.json'

# Ensure directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


### **3. Process and Inspect Data**

In [14]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('new', 'fresh'): [8.5, 8.5, 8.5, 8.5, 7.8, 7.5, 7.5, 8.5, 8.0, 7.5, 8.75, 8.25, 7.5, 8.5], ('sad', 'funny'): [0.2, 0.2, 0.2, 1.5, 2.5, 0.2, 0.0, 0.1, 0.0, 1.0, 1.0, 1.5, 1.75, 0.2], ('harsh', 'cruel'): [8.5, 7.5, 8.5, 8.0, 7.25, 7.75, 8.5, 7.5, 7.5, 7.5, 7.5, 7.6, 7.5, 8.5], ('rough', 'frigid'): [0.2, 0.0, 0.0, 0.0, 2.0, 0.05, 0.5, 0.1, 1.0, 0.0, 0.2, 0.0, 0.05, 0.1], ('rough', ',frigid'): [0.02], ('bad', 'guilty'): [7.0, 6.75, 5.5, 6.0, 6.0, 5.5, 6.5, 7.1, 7.5, 5.5, 4.75, 6.5, 4.5, 5.2], ('bottom', 'top'): [0.05, 1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.15, 0.0, 0.05, 1.0, 0.1, 0.0], ('student', 'pupil'): [9.5, 9.5, 9.75, 9.0, 9.5, 9.5, 9.0, 8.75, 9.5, 9.0, 9.0, 9.5, 9.0, 9.0], ('leg', 'arm'): [8.0, 7.0, 7.0, 6.5, 7.33, 5.0, 7.5, 7.5, 6.75, 6.5, 7.5, 6.5, 7.5, 6.75], ('actress', 'actor'): [9.0, 9.0, 9.0, 8.5, 8.5, 8.5, 8.5, 9.0, 9.5, 8.5, 8.0, 9.0, 8.5, 9.0], ('sunset', 'sunrise'): [7.5, 6.75, 7.5, 7.0, 7.5, 6.7, 6.0, 6.8, 6.5, 6.0, 5.5, 5.5, 7.5, 4.5], ('roof', 'ceiling'): [7.5, 6.0, 7

In [15]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [16]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.00,0.00,0.75,2.00,0.00,0.0,0.1,0.05,1.00,0.00,0.00,0.00,0.10,0.00,0.0
1,smart,intelligent,9.00,8.75,9.50,9.50,9.50,9.5,9.2,8.50,9.50,9.75,9.50,9.00,8.50,9.00,9.0
2,hard,difficult,9.00,9.50,9.75,8.75,9.50,9.0,9.5,9.75,8.75,9.20,9.80,9.50,9.00,9.50,9.5
3,happy,cheerful,8.75,9.00,9.20,8.00,9.00,9.5,9.3,9.80,9.00,8.75,8.50,9.45,8.50,9.00,8.5
4,hard,easy,0.00,0.00,0.00,0.05,0.00,0.0,0.0,0.00,0.10,0.00,0.10,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,heart,surgery,4.50,5.00,4.50,4.00,4.50,4.5,4.5,5.00,4.50,4.50,6.50,5.50,5.50,2.50,4.0
334,woman,secretary,1.75,2.50,2.00,2.00,4.50,4.0,2.0,4.50,2.50,2.50,3.25,3.50,2.50,3.25,2.0
335,man,father,8.50,7.50,8.45,8.50,7.50,6.5,7.5,8.50,6.50,8.00,7.50,7.50,6.00,8.00,0.5
336,beach,island,3.50,5.43,4.00,4.50,7.00,6.0,5.0,4.50,6.00,6.75,4.50,5.00,4.25,4.00,3.5


In [17]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2      5
similarity_score_3      5
similarity_score_4      5
similarity_score_5      5
similarity_score_6      5
similarity_score_7      5
similarity_score_8      5
similarity_score_9      5
similarity_score_10     5
similarity_score_11     5
similarity_score_12     5
similarity_score_13     5
similarity_score_14     5
similarity_score_15    27
dtype: int64


In [18]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
20,new,fresh,8.5,8.5,8.5,8.5,7.8,7.5,7.5,8.5,8.0,7.5,8.75,8.25,7.5,8.5,
36,sad,funny,0.2,0.2,0.2,1.5,2.5,0.2,0.0,0.1,0.0,1.0,1.0,1.5,1.75,0.2,
63,harsh,cruel,8.5,7.5,8.5,8.0,7.25,7.75,8.5,7.5,7.5,7.5,7.5,7.6,7.5,8.5,
70,rough,frigid,0.2,0.0,0.0,0.0,2.0,0.05,0.5,0.1,1.0,0.0,0.2,0.0,0.05,0.1,
71,rough,",frigid",0.02,,,,,,,,,,,,,,
74,bad,guilty,7.0,6.75,5.5,6.0,6.0,5.5,6.5,7.1,7.5,5.5,4.75,6.5,4.5,5.2,
120,bottom,top,0.05,1.0,1.0,0.0,0.0,0.0,0.1,0.0,0.15,0.0,0.05,1.0,0.1,0.0,
122,student,pupil,9.5,9.5,9.75,9.0,9.5,9.5,9.0,8.75,9.5,9.0,9.0,9.5,9.0,9.0,
124,leg,arm,8.0,7.0,7.0,6.5,7.33,5.0,7.5,7.5,6.75,6.5,7.5,6.5,7.5,6.75,
128,actress,actor,9.0,9.0,9.0,8.5,8.5,8.5,8.5,9.0,9.5,8.5,8.0,9.0,8.5,9.0,


### **4. Fix Faulty Word Pairs**

In [None]:
# Drop row with incorrect word pair
df = df[~((df['word1'] == 'phrase') & (df['word2'] == 'sentence'))]

In [None]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

In [None]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

In [None]:
# Set sample size
missing_sample_size = 15

# Delay between individual API calls
missing_delay = 1.0

In [None]:
# Define number of sublists
missing_n_sublists = len(missing_word_pair_list)

# Split list
missing_chunks = split_into_n_lists(missing_word_pair_list, missing_n_sublists)

# Count number of lists
print(len(missing_chunks))

In [None]:
# Print prompts for each chunk
print_prompts_single(missing_chunks, missing_sample_size, prompt)

In [None]:
# Get results from API
missing_response = get_responses_single(prompt, missing_chunks, model, missing_sample_size, missing_delay, client)

In [None]:
# Process data into dictionary
missing_data_dict = process_responses(missing_response)

# Show results
missing_data_dict

In [None]:
# Similarity score columns
score_cols = [c for c in df.columns if "similarity_score" in c]

# Replace rows in the DataFrame using the dict
for (w1, w2), values in missing_data_dict.items():
    mask = (df["word1"] == w1) & (df["word2"] == w2)
    df.loc[mask, score_cols] = values

In [None]:
# Show results
df

In [None]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

In [None]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

In [None]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

### **5. Export Data**

In [None]:
# Define file_path
file_path = '../../../data/gpt-oss-20b/processed/en/f9-1.csv'

# Ensure directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. DataFrame was not saved to prevent overwriting.")
