In [14]:
import pandas as pd
import numpy as np
from time import sleep
from tqdm import tqdm
import random

import asyncio
from openai import AsyncOpenAI
from private_data import API_KEY

In [15]:
# Loading Dataset 
# Loading Dataset 
df = pd.read_csv('../data/pantheon.tsv', sep='\t')
df = df[['name', 'occupation']]

# # Shuffle and resize the data
df = df.sample(frac=1).reset_index(drop=True).dropna().head(1200)



# Subject and Object labels
sub_label = 'name'
obj_label = 'occupation'

df.head(10)

Unnamed: 0,name,occupation
0,Sergiu Celibidache,CONDUCTOR
1,Henry Ian Cusick,ACTOR
2,César Cui,COMPOSER
3,Solomon Burke,SINGER
4,Hans Blix,DIPLOMAT
5,Kazuo Ishiguro,WRITER
6,Eduardo da Silva,SOCCER PLAYER
7,Anne Frank,WRITER
8,Béla Kun,POLITICIAN
9,Raymond Barre,ECONOMIST


In [16]:
## Query function



client = AsyncOpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)
async def true_false_query(query):
    my_query = [ {"role": "system", "content": "Answer directly only as True or False"},
        {"role": "user", "content": query} ] 
    completion = await client.chat.completions.create(
        model="gpt-4", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [17]:
def random_incorrect_object(column, correct_obj):
    while 1:    # assuming all objects are not equal
        try:
            guess = random.choice(column)
            if guess != correct_obj:
                return guess
        except Exception as e:
            print(f"An error occurred: {e}, obj: {correct_obj}")
            continue

In [18]:
def create_query(subject, object):  # object is a property of the subject
    return f"The occupation of {subject} was {object}."


backoff_time = 10
async def fetch_batch(queries, retries=7):
    global backoff_time
    for i in range(retries):
        try:
            pending_responses = []
            for q in queries:
                pending_responses.append(true_false_query(q))
            
            print("Batch Sent!") #DEBUG
            result =  await asyncio.gather(*pending_responses)
            if backoff_time>=20:
                backoff_time/=2
            return result
        
        except Exception as e:
            if i < retries - 1:
                backoff_time = 2.5*backoff_time + 2
                print(f"Retrying in {backoff_time:.2f} seconds...")
                if i>3: print("My-ERROR: ", e)
                await asyncio.sleep(backoff_time)
            else:
                print(f"Request failed after {retries} retries")
                raise e
    


BATCH_SIZE = 10    # each batch shouldn't have more than 10,000 tockens
async def preprocess(df):
    expected_responses = []
    responses = []
    queries = []
    
    for i, row in df.iterrows():
        if random.choice([True, False]):
            query = create_query(row[sub_label], row[obj_label])
            queries.append(query)
            expected_responses.append("true")
            
        else:
            incorrect_object = random_incorrect_object(df[obj_label], row[obj_label])
            query = create_query(row[sub_label], incorrect_object)
            queries.append(query)
            expected_responses.append("false")
            
        
        if len(queries)==BATCH_SIZE or i==len(df)-1:
            batch_responses = await fetch_batch(queries)
            responses.extend(batch_responses)
            queries = []
            print("Fetched a batch!") #DEBUG
            

    
    correct_rows = []
    for i, row in df.iterrows():
        if expected_responses[i] in responses[i].lower():
            correct_rows.append(row)
        
        
            
    new_df = pd.DataFrame(correct_rows)
    return new_df
        
        

In [19]:
known_df = await preprocess(df)
known_df    #DEBUG

Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fe

Unnamed: 0,name,occupation
0,Sergiu Celibidache,CONDUCTOR
1,Henry Ian Cusick,ACTOR
2,César Cui,COMPOSER
3,Solomon Burke,SINGER
4,Hans Blix,DIPLOMAT
...,...,...
1195,Charles Lyell,GEOLOGIST
1196,Jet Li,ACTOR
1197,Sissy Spacek,ACTOR
1198,Andrea del Verrocchio,PAINTER


In [20]:
BATCH_SIZE = 10
async def analogy_testing(df):
    expected_responses = []
    responses = []
    answers = []
    prompts = []

    prompt_batch  = []
    
    for i in range(1, len(df)):
        sub1 = df.iloc[i-1][sub_label]
        obj1 = df.iloc[i-1][obj_label]
        
        sub2 = df.iloc[i][sub_label]
        obj2 = df.iloc[i][obj_label]
        
        answer = obj2
        query_type = "true"
        
        if not random.choice([True, False]):
            obj2 = random_incorrect_object(df[obj_label], answer)
            query_type = "false"
            
            
        prompt = f"{sub1}:({obj1})::{sub2}:X; X is {obj2}"
        
        prompt_batch.append(prompt)
        expected_responses.append(query_type)
        answers.append(answer)
        prompts.append(prompt)
        
        if len(prompt_batch)==BATCH_SIZE or i == len(df)-1:
            response_batch = await fetch_batch(prompt_batch)
            prompt_batch = []
            responses.extend(response_batch)
            print("Fetched a batch!")
        
    result = []
    for i in range(len(responses)):
        prompt, answer, response, query_type = prompts[i], answers[i], responses[i], expected_responses[i]
        status = query_type in response.lower()
        result.append({'Prompt': prompt, 'Correct Object': answer, 'GPT-reply': response, 'Status': status})
            
        
        # print(f"Prompt: {prompt} \t|\t Correct Object: {answer} \t|\t GPT-reply: {response} \t|\t Status: {status}")    #DEBUG
    
    return result

In [21]:
result = await analogy_testing(known_df)

Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 461, obj: POLITICIAN
Batch Sent!
Fetched a batch!
An error occurred: 1035, obj: POLITICIAN
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 302, obj: BOXER
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 848, obj: POLITICIAN
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 1076, obj: WRITER
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
An error occurred: 392, obj: POLITICIAN
An error occurred: 553, obj: MODEL
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch Sent!
Fetched a batch!
Batch S

In [24]:
result_df = pd.DataFrame(data=result)
result_df.to_csv('results/result: person-occupation.csv')
result_df

Unnamed: 0,Prompt,Correct Object,GPT-reply,Status
0,Sergiu Celibidache:(CONDUCTOR)::Henry Ian Cusi...,ACTOR,True,True
1,Henry Ian Cusick:(ACTOR)::César Cui:X; X is CO...,COMPOSER,True,True
2,César Cui:(COMPOSER)::Solomon Burke:X; X is SI...,SINGER,True,True
3,Solomon Burke:(SINGER)::Hans Blix:X; X is MILI...,DIPLOMAT,False,True
4,Hans Blix:(DIPLOMAT)::Kazuo Ishiguro:X; X is W...,WRITER,True,True
...,...,...,...,...
1125,Alphonse de Lamartine:(WRITER)::Charles Lyell:...,GEOLOGIST,True,True
1126,Charles Lyell:(GEOLOGIST)::Jet Li:X; X is WRITER,ACTOR,False,True
1127,Jet Li:(ACTOR)::Sissy Spacek:X; X is ACTOR,ACTOR,True,True
1128,Sissy Spacek:(ACTOR)::Andrea del Verrocchio:X;...,PAINTER,False,True


In [23]:
accuracy = result_df['Status'].sum()/len(result_df)
print(f"Accuracy: {accuracy*100}%")

Accuracy: 95.57522123893806%
