### **0. Set-up**

In [12]:
# Import libraries and utils
%run '../../utils.ipynb'

In [13]:
#  Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [14]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/en-simlex-999.txt", delimiter='\t')

# Select subset
en_simlex = en_simlex.iloc[333:666]

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [15]:
en_simlex

Unnamed: 0,word1,word2,POS,SimLex999,conc(w1),conc(w2),concQ,Assoc(USF),SimAssoc333,SD(SimLex)
333,game,fun,N,3.42,4.50,1.97,2,0.47,1,2.07
334,weekend,week,N,4.00,3.83,3.48,2,0.47,1,1.57
335,couple,pair,N,8.33,3.93,3.88,3,0.47,1,1.50
336,woman,wife,N,5.72,4.46,4.13,3,0.47,1,1.35
337,sheep,cattle,N,4.77,4.90,4.64,4,0.47,1,0.47
...,...,...,...,...,...,...,...,...,...,...
661,letter,paragraph,N,4.00,4.70,3.96,3,0.13,0,1.12
662,page,paragraph,N,3.03,4.90,3.96,3,0.13,0,1.37
663,steeple,chapel,N,7.08,4.74,4.60,3,0.13,0,1.03
664,muscle,bone,N,3.65,4.50,4.90,3,0.13,0,1.68


### **1. Define and Evaluate Parameters**

In [16]:
# Define the prompt
prompt = ("Rate the semantic similarity of the word pair: [('{word1}'), ('{word2}')] on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. "
          "Do not provide additional explanations or context.")

In [17]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
# sample_size = 5
sample_size = 15

# Delay between individual API calls
delay = 5.0

# Define number of sublists
# n_sublists = 999
n_sublists = 333

In [18]:
# Split the list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

333


In [22]:
# Print the prompts for each chunk
print_prompts_single(chunks, sample_size, prompt)

Rate the semantic similarity of the word pair: [('game'), ('fun')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('game'), ('fun')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or context.
Rate the semantic similarity of the word pair: [('game'), ('fun')] on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>)]. Do not provide additional explanations or c

In [23]:
# Load the encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count the tokens per chunk
token_counts = count_tokens_with_tiktoken_single(chunks, prompt)
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [74, 75, 75, 74, 76, 75, 77, 74, 74, 74, 75, 75, 75, 76, 74, 75, 75, 75, 75, 76, 75, 75, 75, 74, 76, 75, 75, 77, 76, 76, 76, 74, 75, 74, 75, 76, 74, 75, 77, 75, 74, 74, 75, 77, 75, 76, 76, 75, 76, 75, 75, 75, 76, 74, 75, 75, 75, 74, 74, 75, 74, 76, 74, 75, 75, 76, 75, 74, 75, 75, 75, 74, 75, 76, 75, 74, 75, 74, 74, 77, 74, 76, 74, 74, 75, 76, 75, 76, 76, 74, 75, 75, 77, 74, 75, 76, 75, 74, 75, 75, 76, 74, 76, 76, 75, 74, 76, 75, 76, 77, 77, 75, 75, 76, 75, 74, 75, 75, 75, 75, 76, 76, 75, 76, 75, 74, 75, 76, 75, 74, 75, 74, 75, 75, 74, 75, 75, 75, 74, 76, 74, 76, 74, 75, 74, 75, 74, 75, 74, 74, 74, 74, 74, 77, 75, 75, 75, 75, 75, 75, 74, 74, 74, 75, 75, 75, 76, 75, 76, 74, 74, 77, 75, 74, 74, 75, 74, 74, 75, 74, 74, 75, 76, 74, 76, 76, 75, 75, 75, 75, 77, 74, 76, 74, 75, 76, 75, 74, 75, 75, 75, 75, 75, 75, 75, 74, 76, 74, 74, 75, 74, 75, 74, 74, 75, 74, 75, 75, 74, 74, 74, 75, 75, 74, 75, 75, 74, 76, 76, 74, 75, 76, 76, 76, 75, 74, 75, 76, 75, 74,

In [24]:
# Max RPD = 10.000
len(token_counts*15)

4995

In [25]:
# Number of total tokens
print(sum(token_counts))

24968


### **2. Extract and Process Data**

In [26]:
# Process each chunk and get results using the OpenAI API
# response = get_responses_single(prompt, chunks, model, sample_size, delay)

Processing: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4995/4995 [11:01:14<00:00,  7.94s/chunk]  

Total time taken: 39674.49 seconds





In [27]:
# Define filepath
file_path = '../../../data/response/en/gpt-3.5-turbo-0125/f9-2.json'

# Check if the file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [28]:
# Extract data with regular expressions into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}
print(higher_lower_samples)

{}


In [29]:
# Process data and print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [30]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,game,fun,1.50,3.77,4.50,0.15,0.42,3.50,3.60,2.50,2.50,0.49,2.50,3.2,4.50,2.12,3.50
1,weekend,week,0.75,8.00,7.00,8.00,7.50,8.50,7.50,7.00,8.50,7.00,1.50,7.5,8.00,8.50,8.50
2,couple,pair,8.50,8.50,8.50,8.50,8.50,7.50,8.00,8.00,8.00,8.50,8.00,8.0,8.00,7.50,9.00
3,woman,wife,8.50,9.50,8.50,8.50,7.50,8.50,9.00,8.75,8.50,8.50,8.50,9.5,8.50,8.75,9.50
4,sheep,cattle,0.15,2.20,0.20,2.00,0.20,0.13,3.75,0.23,2.50,6.00,1.00,3.0,0.10,1.50,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
328,letter,paragraph,1.23,3.00,2.50,1.00,4.20,2.50,2.80,2.85,3.00,2.00,0.28,3.0,3.00,1.20,2.50
329,page,paragraph,4.50,3.00,3.00,2.50,6.50,6.00,3.00,5.00,2.00,6.00,3.00,5.0,1.50,5.00,6.00
330,steeple,chapel,4.50,3.25,6.50,4.50,4.50,2.50,3.00,6.50,2.50,1.50,3.50,2.5,4.50,3.50,3.50
331,muscle,bone,2.50,1.35,2.67,3.20,1.20,2.50,2.71,2.50,2.50,2.00,2.50,1.5,2.38,1.20,1.50


In [31]:
# Count null values
count_null_values = df.isnull().sum()
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                  0
word2                  0
similarity_score_1     0
similarity_score_2     0
similarity_score_3     0
similarity_score_4     0
similarity_score_5     0
similarity_score_6     0
similarity_score_7     0
similarity_score_8     0
similarity_score_9     0
similarity_score_10    0
similarity_score_11    0
similarity_score_12    0
similarity_score_13    0
similarity_score_14    0
similarity_score_15    0
dtype: int64


In [63]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5


In [32]:
# Define file_path
file_path = '../../../data/prompt/en/gpt-3.5-turbo-0125/f9-2.csv'

# Check if the file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
