### **0. Set-up**

In [1]:
# Import libraries and utils
%run '../../utils.ipynb'

In [2]:
# Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [3]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-en-simlex-999.csv")

# Remove first word pair
en_simlex = en_simlex.iloc[1:]

# Select subset
# en_simlex = en_simlex.head(100)

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [6]:
# Show results
tuples_list

[('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),
 ('strong', 'pro

### **1. Define and Evaluate Parameters**

In [6]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context. "
          "An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)]")

In [7]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 25

In [8]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count the number of lists
print(len(chunks))

25


In [9]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)] --- ["('smart', 'intelligent'), ('hard', 'difficult'), ('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 'broad'), ('si

In [10]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [392, 393, 397, 378, 386, 379, 376, 386, 387, 391, 390, 390, 380, 379, 391, 392, 382, 377, 386, 379, 379, 380, 378, 383, 369]


### **2. Extract and Process Data**

In [11]:
# Get results from API
# response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 375/375 [2:30:55<00:00, 24.15s/chunk]  

Total time taken: 9055.70 seconds





In [12]:
# Define filepath
file_path = '../../../data/response/en/gpt-3.5-turbo-0125/f2.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [13]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('large', 'huge'): [8.0, 8.5, 9.0, 8.0, 8.0, 8.0, 9.0, 9.5, 9.0, 8.5, 9.0, 9.0, 8.5, 8.0], ('hard', 'tough'): [7.0, 8.0, 8.0, 7.5, 7.5, 7.0, 8.5, 8.0, 8.75, 8.0, 8.0, 8.0, 8.0, 8.0], ('new', 'fresh'): [8.0, 8.5, 8.5, 8.5, 7.5, 6.0, 8.5, 7.5, 9.0, 8.5, 8.5, 8.0, 7.5, 8.0], ('sharp', 'dull'): [6.5, 6.5, 7.5, 6.5, 7.0, 7.5, 7.0, 7.5, 8.5, 7.5, 7.5, 7.5, 7.5, 6.5], ('quick', 'rapid'): [8.0, 9.0, 9.0, 9.0, 8.5, 8.0, 9.5, 9.5, 9.5, 9.0, 9.0, 9.0, 8.5, 8.0], ('dumb', 'foolish'): [8.25, 8.0, 7.0, 8.5, 7.5, 7.5, 8.0, 8.0, 9.0, 8.5, 8.0, 8.5, 8.0, 7.5], ('wonderful', 'terrific'): [8.25, 9.5, 8.5, 9.5, 8.5, 8.5, 8.0, 9.0, 9.5, 9.0, 8.5, 8.0, 8.5, 8.5], ('strange', 'odd'): [8.0, 8.5, 8.5, 8.5, 8.5, 8.0, 9.0, 9.0, 9.25, 8.5, 8.5, 8.5, 8.0, 8.5], ('happy', 'angry'): [6.0, 4.0, 5.0, 4.0, 6.0, 4.5, 5.5, 5.5, 6.5, 6.0, 7.5, 6.0, 5.5, 4.5], ('narrow', 'broad'): [6.5, 6.0, 7.0, 7.5, 7.0, 7.0, 7.5, 6.0, 7.75, 6.0, 7.5, 6.5, 6.0, 7.0], ('simple', 'easy'): [6.75, 6.0, 7.5, 7.5, 6.5, 7.0, 8.0, 8.0, 9.0, 8.5

In [14]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [15]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,smart,intelligent,8.50,7.00,8.50,8.50,8.50,8.50,9.50,9.5,9.00,8.75,9.50,8.50,9.50,9.50,8.50
1,hard,difficult,7.50,6.00,8.00,8.00,7.00,6.50,7.00,8.5,8.50,8.25,8.00,8.60,8.00,7.50,7.50
2,happy,cheerful,6.75,8.50,9.00,8.00,8.50,7.50,8.50,7.5,8.00,8.50,8.50,8.70,7.00,8.50,7.50
3,hard,easy,2.00,2.00,3.00,2.00,4.00,2.50,4.00,1.0,1.50,1.50,4.00,2.00,2.00,3.00,1.50
4,fast,rapid,8.50,9.00,9.50,9.50,9.00,9.00,9.50,9.5,9.50,9.00,9.50,9.00,9.50,8.00,9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,join,acquire,2.33,0.60,0.41,0.45,0.81,0.22,2.18,1.9,3.64,2.30,2.00,0.52,1.66,0.49,5.14
994,send,attend,1.51,0.35,0.38,0.43,0.93,0.16,0.88,1.0,2.63,1.58,1.57,0.51,1.62,0.17,2.00
995,gather,attend,2.00,0.48,0.44,0.39,0.85,0.17,1.65,1.5,2.60,1.75,1.64,0.44,1.42,0.46,2.00
996,absorb,withdraw,1.52,0.17,0.10,0.09,0.77,0.15,0.45,0.2,1.87,0.85,1.07,0.29,1.52,0.14,1.14


In [16]:
# Show results
response

["[('smart', 'intelligent', 8.50), ('hard', 'difficult', 7.50), ('happy', 'cheerful', 6.75), ('hard', 'easy', 2.00), ('fast', 'rapid', 8.50), ('happy', 'glad', 7.25), ('short', 'long', 6.00), ('stupid', 'dumb', 8.00), ('weird', 'strange', 7.50), ('wide', 'narrow', 6.75), ('bad', 'awful', 4.50), ('easy', 'difficult', 5.75), ('bad', 'terrible', 4.75), ('hard', 'simple', 2.75), ('smart', 'dumb', 7.25), ('insane', 'crazy', 8.50), ('happy', 'mad', 5.75), ('large', 'huge', 8.00), ('hard', 'tough', 7.00), ('new', 'fresh', 8.00), ('sharp', 'dull', 6.50), ('quick', 'rapid', 8.00), ('dumb', 'foolish', 8.25), ('wonderful', 'terrific', 8.25), ('strange', 'odd', 8.00), ('happy', 'angry', 6.00), ('narrow', 'broad', 6.50), ('simple', 'easy', 6.75), ('old', 'fresh', 5.75), ('apparent', 'obvious', 8.50), ('inexpensive', 'cheap', 8.75), ('nice', 'generous', 7.00), ('weird', 'normal', 6.25), ('weird', 'odd', 7.75), ('bad', 'immoral', 6.25), ('sad', 'funny', 5.50), ('wonderful', 'great', 8.25), ('guilty',

In [17]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2      0
similarity_score_3      0
similarity_score_4      0
similarity_score_5      0
similarity_score_6      0
similarity_score_7      0
similarity_score_8      0
similarity_score_9      0
similarity_score_10     0
similarity_score_11     1
similarity_score_12     1
similarity_score_13     1
similarity_score_14     1
similarity_score_15    81
dtype: int64


In [18]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
17,large,huge,8.00,8.50,9.00,8.00,8.00,8.00,9.00,9.50,9.00,8.50,9.00,9.00,8.50,8.00,
18,hard,tough,7.00,8.00,8.00,7.50,7.50,7.00,8.50,8.00,8.75,8.00,8.00,8.00,8.00,8.00,
19,new,fresh,8.00,8.50,8.50,8.50,7.50,6.00,8.50,7.50,9.00,8.50,8.50,8.00,7.50,8.00,
20,sharp,dull,6.50,6.50,7.50,6.50,7.00,7.50,7.00,7.50,8.50,7.50,7.50,7.50,7.50,6.50,
21,quick,rapid,8.00,9.00,9.00,9.00,8.50,8.00,9.50,9.50,9.50,9.00,9.00,9.00,8.50,8.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
678,agony,grief,4.77,4.50,3.00,2.73,4.00,3.75,4.25,5.43,5.25,3.50,0.80,3.33,0.75,5.00,
679,assignment,task,5.52,4.50,1.25,3.45,4.67,4.25,5.50,7.65,6.84,7.00,0.68,3.33,0.90,5.38,
956,leave,wander,1.97,2.13,2.06,1.00,0.75,1.00,1.50,1.49,1.50,4.03,0.35,2.75,0.52,0.36,
957,keep,give,1.83,1.14,3.46,2.00,1.08,1.67,2.50,2.33,2.40,2.05,0.22,2.07,0.19,0.58,


In [20]:
# Custom prompt
custom_prompt = ("Rate the semantic similarity of the word pair [('give', 'allow')] on a scale from 0 to 10, "
                 "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
                 "Use two decimals. The response should strictly adhere to the structure: "
                 "[('word1', 'word2', <score>)]. "
                 "Do not provide additional explanations or context. "
                 "An example of a word pair and its semantic similarity score is: [('old', 'new', 1.58)]")

# for _ in range(4):
#     messages = [{"role": "user", "content": custom_prompt}]
#     completion = client.chat.completions.create(
#         model=model,
#         messages=messages,
#         n=1)

    # # Show results
    # print(completion.choices[0].message.content)

[('give', 'allow', 1.21)]
[('give', 'allow', 5.00)]
[('give', 'allow', 0.75)]
[('give', 'allow', 8.50)]


In [22]:
# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_11'] = 1.21

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_12'] = 5.00

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_13'] = 0.75

# Manually fix inconsistencies
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow'), 'similarity_score_14'] = 8.50

# Show results
df.loc[(df['word1'] == 'give') & (df['word2'] == 'allow')]

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
958,give,allow,1.4,1.52,2.9,1.67,2.27,5.69,0.25,2.41,0.34,0.58,1.21,5.0,0.75,8.5,


In [23]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
17,large,huge,8.00,8.50,9.00,8.00,8.00,8.00,9.00,9.50,9.00,8.50,9.00,9.00,8.50,8.00,
18,hard,tough,7.00,8.00,8.00,7.50,7.50,7.00,8.50,8.00,8.75,8.00,8.00,8.00,8.00,8.00,
19,new,fresh,8.00,8.50,8.50,8.50,7.50,6.00,8.50,7.50,9.00,8.50,8.50,8.00,7.50,8.00,
20,sharp,dull,6.50,6.50,7.50,6.50,7.00,7.50,7.00,7.50,8.50,7.50,7.50,7.50,7.50,6.50,
21,quick,rapid,8.00,9.00,9.00,9.00,8.50,8.00,9.50,9.50,9.50,9.00,9.00,9.00,8.50,8.00,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
678,agony,grief,4.77,4.50,3.00,2.73,4.00,3.75,4.25,5.43,5.25,3.50,0.80,3.33,0.75,5.00,
679,assignment,task,5.52,4.50,1.25,3.45,4.67,4.25,5.50,7.65,6.84,7.00,0.68,3.33,0.90,5.38,
956,leave,wander,1.97,2.13,2.06,1.00,0.75,1.00,1.50,1.49,1.50,4.03,0.35,2.75,0.52,0.36,
957,keep,give,1.83,1.14,3.46,2.00,1.08,1.67,2.50,2.33,2.40,2.05,0.22,2.07,0.19,0.58,


In [24]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('school', 'grade'),
 ('parent', 'adult'),
 ('bar', 'jail'),
 ('car', 'highway'),
 ('dictionary', 'definition'),
 ('door', 'cellar'),
 ('army', 'legion'),
 ('metal', 'aluminum'),
 ('chair', 'bench'),
 ('cloud', 'fog'),
 ('boy', 'son'),
 ('water', 'ice'),
 ('bed', 'blanket'),
 ('attorney', 'lawyer'),
 ('area', 'zone'),
 ('business', 'company'),
 ('clothes', 'fabric'),
 ('sweater', 'jacket'),
 ('money', 'capital'),
 ('hand', 'foot'),
 ('apartment', 'hotel'),
 ('cup', 'cone'),
 ('hors

In [25]:
# Format message
formatted_prompt = format_prompt(missing_word_pair_list, prompt)
messages = [{"role": "user", "content": formatted_prompt}]

# Call the API
# completion = client.chat.completions.create(
#     model=model,
#     messages=messages,
#     n=1)

# Store the response content
missing_word_pairs_response = [completion.choices[0].message.content]

In [26]:
# Extract data with regular expressions into dictionary
missing_word_pairs_dict = process_responses(missing_word_pairs_response)

# Show results
missing_word_pairs_dict

{('large', 'huge'): [8.0],
 ('hard', 'tough'): [9.5],
 ('new', 'fresh'): [6.75],
 ('sharp', 'dull'): [6.75],
 ('quick', 'rapid'): [8.5],
 ('dumb', 'foolish'): [7.5],
 ('wonderful', 'terrific'): [9.0],
 ('strange', 'odd'): [8.5],
 ('happy', 'angry'): [5.25],
 ('narrow', 'broad'): [7.75],
 ('simple', 'easy'): [8.0],
 ('old', 'fresh'): [4.5],
 ('apparent', 'obvious'): [9.5],
 ('inexpensive', 'cheap'): [8.75],
 ('nice', 'generous'): [6.0],
 ('weird', 'normal'): [3.5],
 ('weird', 'odd'): [8.75],
 ('bad', 'immoral'): [7.0],
 ('sad', 'funny'): [2.5],
 ('wonderful', 'great'): [8.0],
 ('guilty', 'ashamed'): [5.5],
 ('beautiful', 'wonderful'): [8.25],
 ('confident', 'sure'): [9.0],
 ('school', 'grade'): [2.5],
 ('parent', 'adult'): [5.5],
 ('bar', 'jail'): [6.0],
 ('car', 'highway'): [4.0],
 ('dictionary', 'definition'): [9.0],
 ('door', 'cellar'): [4.5],
 ('army', 'legion'): [7.0],
 ('metal', 'aluminum'): [8.5],
 ('chair', 'bench'): [7.5],
 ('cloud', 'fog'): [6.5],
 ('boy', 'son'): [5.0],
 ('wa

In [27]:
# Iterate over DataFrame and replace missing values
for index, row in df.iterrows():
    word_pair = (row['word1'], row['word2'])

    # Check if current value is NaN
    if pd.isna(row['similarity_score_15']):
        if word_pair in missing_word_pairs_dict:

            # Extract first element from list
            df.at[index, 'similarity_score_15'] = missing_word_pairs_dict[word_pair][0]

# Check if are any NaN values left
print(df[df['similarity_score_15'].isna()])

Empty DataFrame
Columns: [word1, word2, similarity_score_1, similarity_score_2, similarity_score_3, similarity_score_4, similarity_score_5, similarity_score_6, similarity_score_7, similarity_score_8, similarity_score_9, similarity_score_10, similarity_score_11, similarity_score_12, similarity_score_13, similarity_score_14, similarity_score_15]
Index: []


In [28]:
# Filter the DataFrame based on pairs
filtered_df = df[df.apply(lambda row: (row['word1'], row['word2']) in missing_word_pair_list, axis=1)]

# Select column
result = filtered_df[['word1', 'word2', 'similarity_score_15']]

# Show results
print(result)

          word1   word2  similarity_score_15
17        large    huge                 8.00
18         hard   tough                 9.50
19          new   fresh                 6.75
20        sharp    dull                 6.75
21        quick   rapid                 8.50
..          ...     ...                  ...
678       agony   grief                 7.00
679  assignment    task                 8.00
956       leave  wander                 7.00
957        keep    give                 6.00
958        give   allow                 6.75

[81 rows x 3 columns]


In [29]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [30]:
# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,smart,intelligent,8.50,7.00,8.50,8.50,8.50,8.50,9.50,9.5,9.00,8.75,9.50,8.50,9.50,9.50,8.50
1,hard,difficult,7.50,6.00,8.00,8.00,7.00,6.50,7.00,8.5,8.50,8.25,8.00,8.60,8.00,7.50,7.50
2,happy,cheerful,6.75,8.50,9.00,8.00,8.50,7.50,8.50,7.5,8.00,8.50,8.50,8.70,7.00,8.50,7.50
3,hard,easy,2.00,2.00,3.00,2.00,4.00,2.50,4.00,1.0,1.50,1.50,4.00,2.00,2.00,3.00,1.50
4,fast,rapid,8.50,9.00,9.50,9.50,9.00,9.00,9.50,9.5,9.50,9.00,9.50,9.00,9.50,8.00,9.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,join,acquire,2.33,0.60,0.41,0.45,0.81,0.22,2.18,1.9,3.64,2.30,2.00,0.52,1.66,0.49,5.14
994,send,attend,1.51,0.35,0.38,0.43,0.93,0.16,0.88,1.0,2.63,1.58,1.57,0.51,1.62,0.17,2.00
995,gather,attend,2.00,0.48,0.44,0.39,0.85,0.17,1.65,1.5,2.60,1.75,1.64,0.44,1.42,0.46,2.00
996,absorb,withdraw,1.52,0.17,0.10,0.09,0.77,0.15,0.45,0.2,1.87,0.85,1.07,0.29,1.52,0.14,1.14


In [31]:
# Define file_path
file_path = '../../../data/prompt/en/gpt-3.5-turbo-0125/f2.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
