In [1]:
import pandas as pd
import numpy as np
from time import sleep
from tqdm import tqdm
import random

import asyncio
from openai import AsyncOpenAI
from private_data import API_KEY

In [2]:
# Loading Dataset 
df = pd.read_csv('../data/celebrity_deaths_4.csv', encoding = 'MacRoman')
df['death_date'] = df['death_month'].astype(str) + ' ' + df['death_year'].astype(str)
df = df[['name', 'death_date']]
# # Shuffle and resize the data
df = df.sample(frac=1).reset_index(drop=True).dropna().head(1200)



# Subject and Object labels
sub_label = 'name'
obj_label = 'death_date'

df.head(10)

Unnamed: 0,name,death_date
0,RentarÅç Mikuni,April 2013
1,Lyle Williams,November 2008
2,Eko Maulana Ali,July 2013
3,Arsenio Chirinos,October 2015
4,Reid Patterson,January 2014
5,John Cloake,July 2014
6,Cara McCollum,February 2016
7,LÃ¡zaro Blanco,May 2011
8,Raul Solnado,August 2009
9,Ken Orsatti,August 2010


In [3]:
## Query function



client = AsyncOpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)
async def true_false_query(query):
    my_query = [ {"role": "system", "content": "Answer directly only as True or False"},
        {"role": "user", "content": query} ] 
    completion = await client.chat.completions.create(
        model="gpt-4", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [4]:
def random_incorrect_object(column, correct_obj):
    while 1:    # assuming all objects are not equal
        try:
            guess = random.choice(column)
            if guess != correct_obj:
                return guess
        except Exception as e:
            print(f"An error occurred: {e}, obj: {correct_obj}")
            continue

In [6]:
def create_query(subject, object):  # object is a property of the subject
    return f"{subject} died in {object}."


backoff_time = 10
async def fetch_batch(queries, retries=7):
    global backoff_time
    for i in range(retries):
        try:
            pending_responses = []
            for q in queries:
                pending_responses.append(true_false_query(q))
            
            print("Batch Sent!")
            result =  await asyncio.gather(*pending_responses)
            if backoff_time>=20:
                backoff_time/=2
            return result
        
        except Exception as e:
            if i < retries - 1:
                backoff_time = 2.5*backoff_time + 2
                print(f"Retrying in {backoff_time:.2f} seconds...")
                if i>3: print("My-ERROR: ", e)
                await asyncio.sleep(backoff_time)
            else:
                print(f"Request failed after {retries} retries")
                raise e
    


BATCH_SIZE = 10    # each batch shouldn't have more than 10,000 tockens
async def preprocess(df):
    expected_responses = []
    responses = []
    queries = []
    
    for i, row in df.iterrows():
        if random.choice([True, False]):
            query = create_query(row[sub_label], row[obj_label])
            queries.append(query)
            expected_responses.append("true")
            
        else:
            incorrect_object = random_incorrect_object(df[obj_label], row[obj_label])
            query = create_query(row[sub_label], incorrect_object)
            queries.append(query)
            expected_responses.append("false")
            
        
        if len(queries)==BATCH_SIZE or i==len(df)-1:
            batch_responses = await fetch_batch(queries)
            responses.extend(batch_responses)
            queries = []
            print("Fetched a batch!")
            

    
    correct_rows = []
    for i, row in df.iterrows():
        if expected_responses[i] in responses[i].lower():
            correct_rows.append(row)
        
        
            
    new_df = pd.DataFrame(correct_rows)
    return new_df
        
        

In [7]:
known_df = await preprocess(df)
known_df    #DEBUG

Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fe

Unnamed: 0,name,death_date
1,Lyle Williams,November 2008
2,Eko Maulana Ali,July 2013
3,Arsenio Chirinos,October 2015
5,John Cloake,July 2014
6,Cara McCollum,February 2016
...,...,...
1189,Henk Hofs,October 2011
1191,Karl Baumgartner,March 2014
1192,Robin Fletcher,January 2016
1196,Bob Babbitt,July 2012


In [9]:
BATCH_SIZE = 10
async def analogy_testing(df):
    expected_responses = []
    responses = []
    answers = []
    prompts = []

    prompt_batch  = []
    
    for i in range(1, len(df)):
        sub1 = df.iloc[i-1][sub_label]
        obj1 = df.iloc[i-1][obj_label]
        
        sub2 = df.iloc[i][sub_label]
        obj2 = df.iloc[i][obj_label]
        
        answer = obj2
        query_type = "true"
        
        if not random.choice([True, False]):
            obj2 = random_incorrect_object(df[obj_label], answer)
            query_type = "false"
            
            
        prompt = f"{sub1}:({obj1})::{sub2}:X; X is {obj2}"
        
        prompt_batch.append(prompt)
        expected_responses.append(query_type)
        answers.append(answer)
        prompts.append(prompt)
        
        if len(prompt_batch)==BATCH_SIZE or i == len(df)-1:
            response_batch = await fetch_batch(prompt_batch)
            prompt_batch = []
            responses.extend(response_batch)
            print("Fetched a batch!")
        
    result = []
    for i in range(len(responses)):
        prompt, answer, response, query_type = prompts[i], answers[i], responses[i], expected_responses[i]
        status = query_type in response.lower()
        result.append({'Prompt': prompt, 'Correct Object': answer, 'GPT-reply': response, 'Status': status})
            
        
        # print(f"Prompt: {prompt} \t|\t Correct Object: {answer} \t|\t GPT-reply: {response} \t|\t Status: {status}")    #DEBUG
    
    return result

In [10]:
result = await analogy_testing(known_df)

Batch Sent!
Fetched a batch!
An error occurred: 87, obj: December 2011
An error occurred: 405, obj: December 2011
An error occurred: 750, obj: December 2016
An error occurred: 694, obj: December 2016
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 317, obj: April 2010
An error occurred: 343, obj: December 2014
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 418, obj: April 2008
An error occurred: 414, obj: May 2010
Batch Sent!
Fetched a batch!
An error occurred: 795, obj: August 2016
Batch Sent!
Fetched a batch!
An error occurred: 212, obj: February 2006
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 111, obj: November 2012
An error occurred: 849, obj: November 2012
An error occurred: 709, obj: November 2012
An error occurred: 625, obj: May 2014
Batch Sent!
Fetched a batch!
An error occurred: 524, obj: January 2009
An error occurred: 69, obj: January 2009
An error occurr

In [11]:
result_df = pd.DataFrame(data=result)
result_df.to_csv('results/result: person-deathdate.csv')
result_df

Unnamed: 0,Prompt,Correct Object,GPT-reply,Status
0,Lyle Williams:(November 2008)::Eko Maulana Ali...,July 2013,False,False
1,Eko Maulana Ali:(July 2013)::Arsenio Chirinos:...,October 2015,False,False
2,Arsenio Chirinos:(October 2015)::John Cloake:X...,July 2014,False,True
3,John Cloake:(July 2014)::Cara McCollum:X; X is...,February 2016,True,True
4,Cara McCollum:(February 2016)::LÃ¡zaro Blanco:...,May 2011,False,True
...,...,...,...,...
844,Glenn Frey:(January 2016)::Henk Hofs:X; X is F...,October 2011,False,True
845,Henk Hofs:(October 2011)::Karl Baumgartner:X; ...,March 2014,True,True
846,Karl Baumgartner:(March 2014)::Robin Fletcher:...,January 2016,False,True
847,Robin Fletcher:(January 2016)::Bob Babbitt:X; ...,July 2012,False,True


In [12]:
accuracy = result_df['Status'].sum()/len(result_df)
print(f"Accuracy: {accuracy*100}%")

Accuracy: 65.01766784452296%
