In [1]:
import pandas as pd
import numpy as np
from time import sleep
from tqdm import tqdm
import random

import asyncio
from openai import AsyncOpenAI
from private_data import API_KEY

In [22]:
# Loading Dataset 
df = pd.read_csv('../data/famous-birthdates.csv', sep = ' ')
df = df[['firstname', 'lastname' , 'birthDate']]


df['name'] = df['firstname'] + ' ' + df['lastname']
df.drop(columns=["firstname", "lastname"], inplace=True)

# Shuffle and resize the data
df = df.sample(frac=1).reset_index(drop=True).dropna().head(1200)

# Subject and Object labels
sub_label = 'name'
obj_label = 'birthDate'

df.describe()

Unnamed: 0,birthDate,name
count,1200,1200
unique,1140,1200
top,1943-01-01,Bulent Ecevit
freq,6,1


In [23]:
## Query function
CHOICE_COUNT = 4

client = AsyncOpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)
async def multiple_choice_query(query):
    my_query = [ {"role": "system", "content": "Choose the correct option."},
        {"role": "user", "content": query} ] 
    completion = await client.chat.completions.create(
        model="gpt-4", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [27]:
def generate_options(column, correct_obj):
    while 1:    # assuming all objects are not equal
        try:
            choices = random.sample(column, CHOICE_COUNT)
            if correct_obj not in choices:
                idx = random.randint(0, CHOICE_COUNT-1)
                choices[idx] = correct_obj
            
            correct_index = choices.index(correct_obj)
            return choices, correct_index
            
        except Exception as e:
            print(f"An error occurred: {e}, obj: {correct_obj}")
            continue

In [28]:
def create_query(subject, choices):  # object is a property of the subject
    query = f"{subject} was born on"
    for i in range(CHOICE_COUNT):
        query += f"\n{i+1}) {choices[i]}"
    return query
        


backoff_time = 10
async def fetch_batch(queries, retries=7):
    global backoff_time
    for i in range(retries):
        try:
            pending_responses = []
            for q in queries:
                pending_responses.append(multiple_choice_query(q))
            
            print("Batch Sent!") #DEBUG
            result =  await asyncio.gather(*pending_responses)
            if backoff_time>=20:
                backoff_time/=2
            return result
        
        except Exception as e:
            if i < retries - 1:
                backoff_time = 2.5*backoff_time + 2
                print(f"Retrying in {backoff_time:.2f} seconds...")
                if i>3: print("My-ERROR: ", e)
                await asyncio.sleep(backoff_time)
            else:
                print(f"Request failed after {retries} retries")
                raise e
    


BATCH_SIZE = 8    # each batch shouldn't have more than 10,000 tockens
all_objects = df[obj_label].unique().tolist()
correct_rows = []
expected_responses = []
responses = []
async def preprocess(df):
    queries = []
    
    for i, row in df.iterrows():
        choices, correct_index = generate_options(all_objects, row[obj_label])
        query = create_query(row[sub_label], choices)
        queries.append(query)
        expected_responses.append(row[obj_label])
            
                              
            
        
        if len(queries)==BATCH_SIZE or i==len(df)-1:
            batch_responses = await fetch_batch(queries)
            responses.extend(batch_responses)
            queries = []
            print("Fetched a batch!") #DEBUG
            

    ## check if the responses are correct
    global correct_rows
    for i, row in df.iterrows():
        try:
            if expected_responses[i].lower() in responses[i].lower():
                correct_rows.append(row)
        except Exception as e:
            print(f"Error at index {i}, Error:\n{e}")
            continue

        
        
            
    new_df = pd.DataFrame(correct_rows)
    return new_df
        
        

In [31]:
len(correct_rows)

887

In [29]:
known_df = await preprocess(df)
# known_df    #DEBUG

Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fe

In [30]:
known_df.describe()

Unnamed: 0,birthDate,name
count,887,887
unique,847,887
top,1943-01-01,Denzel Washington
freq,4,1


In [41]:
BATCH_SIZE = 8
expected_responses = []
responses = []
async def analogy_testing(df):
    prompts = []

    prompt_batch  = []
    
    for i in range(1, len(df)):
        sub1 = df.iloc[i-1][sub_label]
        obj1 = df.iloc[i-1][obj_label]
        
        sub2 = df.iloc[i][sub_label]
        obj2 = df.iloc[i][obj_label]
        
        
        choices, correct_index = generate_options(all_objects, obj2)
            
        
        prompt = f"{sub1}:({obj1})::{sub2}:X; X is"
        for j in range(CHOICE_COUNT):
            prompt += f"\n{j+1}) {choices[j]}"
        
        prompt_batch.append(prompt)
        expected_responses.append(obj2)
        prompts.append(prompt)
        
        if len(prompt_batch) == BATCH_SIZE or i == len(df)-1:
            response_batch = await fetch_batch(prompt_batch)
            prompt_batch = []
            responses.extend(response_batch)
            print("Fetched a batch!")
        
    result = []
    for i in range(len(responses)):
        try:
            prompt, answer, response = prompts[i], expected_responses[i], responses[i]
            status = answer.lower() in response.lower()
            result.append({'Prompt': prompt, 'Correct Object': answer, 'GPT-reply': response, 'Status': status})
        except Exception as e:
            print(f"Error at index {i}, Error:\n{e}")
            continue
            
        
        # print(f"Prompt: {prompt} \t|\t Correct Object: {answer} \t|\t GPT-reply: {response} \t|\t Status: {status}")    #DEBUG
    
    return result

In [42]:
result = await analogy_testing(known_df)

Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Retrying in 27.61 seconds...
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Ba

In [43]:
result_df = pd.DataFrame(data=result)
result_df.to_csv('results/result: person-birthdate.csv')
result_df.describe()

Unnamed: 0,Prompt,Correct Object,GPT-reply,Status
count,886,886,886,886
unique,886,846,844,2
top,Denzel Washington:(1954-12-28)::Mike Figgis:X;...,1943-01-01,1) 1943-02-22,True
freq,1,4,3,625


In [44]:
accuracy = result_df['Status'].sum()/len(result_df)
print(f"Accuracy: {accuracy*100}%")

Accuracy: 70.54176072234763%
