In [2]:
import pandas as pd
import numpy as np
from time import sleep
from openai import OpenAI
from private_data import API_KEY
import re

In [3]:
# Main query function
client = OpenAI(api_key=API_KEY)
def ask_query(query):
    my_query = [ {"role": "system", "content": "Give direct answer without any explanation."},
        {"role": "user", "content": query} ] 
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [25]:
def occupation_analogy_testing(df1, df2, equiv_func):
    result = []
    
    for i in range(len(df1)):
        entity_1 = df1.iloc[i]['Entity']
        target_1 = df1.iloc[i]['Target']

        for j in range(len(df2)):
        
            entity_2 = df2.iloc[j]['Entity']
            target_2 = df2.iloc[j]['Target']
            correct_ans = df2.iloc[j]['Correct']
            
            prompt = f"{entity_1}:({target_1})::{entity_2}:X; what is X?"
           
            response = ask_query(prompt)
            
            status = "Fully Wrong"
            if equiv_func(response, target_2)==1:
                status = "Analogically Correct"
            elif equiv_func(response, correct_ans)==1:
                status = "Technically Correct"
            
            result.append({'Prompt': prompt, 'Analogical Answer': target_2, 'Technical Answer': correct_ans , 'GPT-reply': response, 'Status': status})
            print(f"Prompt: {prompt} \t|\t Analogical Answer: {target_2}, Technical Answer: {correct_ans} \t|\t Response: {response} \t|\t Status: {status}")
    
    return result

In [5]:
# Loading Dataset 
person_occupation_df = pd.read_csv('../data/pantheon.tsv', sep='\t')
person_occupation_df = person_occupation_df[['name', 'countryName', 'occupation']]
# Shuffle and resize the data
person_occupation_df = person_occupation_df.sample(frac=1).reset_index(drop=True).head(100)

person_occupation_df

Unnamed: 0,name,countryName,occupation
0,Paul Berg,UNITED STATES,CHEMIST
1,Kirsten Dunst,UNITED STATES,ACTOR
2,Ali al-Masudi,Iraq,WRITER
3,Carlo Borromeo,ITALY,RELIGIOUS FIGURE
4,Edvard Grieg,NORWAY,COMPOSER
...,...,...,...
95,Deryck Whibley,CANADA,SINGER
96,Deco,Brazil,SOCCER PLAYER
97,Pope Pius V,ITALY,RELIGIOUS FIGURE
98,Chrétien de Troyes,FRANCE,WRITER


In [6]:
# Preprocessing 

def normalize_occupation(occupation):
    occupation = occupation.lower()
    if 'player' in occupation:
        occupation = occupation.replace('player', '')
    if 'actor' in occupation:
        return 'actor'
    if 'actress' in occupation:
        return 'actor'
    if 'artist' in occupation:
        return 'painter'
    
    return occupation
    

def are_equivalent(response, answer):
    response = normalize_occupation(response)
    answer = normalize_occupation(answer)

    if answer in response:
        return 1
    if response in answer:
        return 1
    
    if 'known' in response: # part of responses like: "unknown", "don't know"
        return 0
    
    return 2    # incase of an alternative answer

    

def does_gpt_know(person, occupation):
    query = f"what is the occupation of {person}?"
    response = ask_query(query)
    print(f"Person: {person} \t|\t Occupation: {occupation} \t|\t Response: {response}")
    return are_equivalent(response, occupation)




def preprocess(df):
    correct_answers = []
    wrong_answers = []
    for index, row in df.iterrows():
        x = does_gpt_know(row['name'], row['occupation'])
        if x==1:
            correct_answers.append(row)
        elif x==2:
            wrong_answers.append(row)
        print(len(correct_answers), len(wrong_answers))
        if len(correct_answers) >= 10 and len(wrong_answers)>=10:
            break
    correct_df = pd.DataFrame(correct_answers)
    wrong_df = pd.DataFrame(wrong_answers)
    return correct_df, wrong_df

In [7]:
known_df = preprocess(person_occupation_df)
known_df

Person: Paul Berg 	|	 Occupation: CHEMIST 	|	 Response: Biochemist.
1 0
Person: Kirsten Dunst 	|	 Occupation: ACTOR 	|	 Response: Actress.
2 0
Person: Ali al-Masudi 	|	 Occupation: WRITER 	|	 Response: Historian.
2 1
Person: Carlo Borromeo 	|	 Occupation: RELIGIOUS FIGURE 	|	 Response: Roman Catholic Cardinal
2 2
Person: Edvard Grieg 	|	 Occupation: COMPOSER 	|	 Response: Composer.
3 2
Person: Gerardus 't Hooft 	|	 Occupation: PHYSICIST 	|	 Response: Physicist.
4 2
Person: Antoninus Pius 	|	 Occupation: POLITICIAN 	|	 Response: Emperor of Rome.
4 3
Person: William IX, Duke of Aquitaine 	|	 Occupation: NOBLEMAN 	|	 Response: Poet and troubadour.
4 4
Person: Pope Martin I 	|	 Occupation: RELIGIOUS FIGURE 	|	 Response: Pope Martin I was a clergyman.
4 5
Person: Kateryna Bondarenko 	|	 Occupation: TENNIS PLAYER 	|	 Response: Professional tennis player.
5 5
Person: Yannis Ritsos 	|	 Occupation: SOCIAL ACTIVIST 	|	 Response: Poet.
5 6
Person: Johannes Andreas Grib Fibiger 	|	 Occupation: PHY

(                   name     countryName     occupation
 0             Paul Berg   UNITED STATES        CHEMIST
 1         Kirsten Dunst   UNITED STATES          ACTOR
 4          Edvard Grieg          NORWAY       COMPOSER
 5     Gerardus 't Hooft     NETHERLANDS      PHYSICIST
 9   Kateryna Bondarenko         UKRAINE  TENNIS PLAYER
 12            John Hurt  UNITED KINGDOM          ACTOR
 17    Fernando Verdasco           SPAIN  TENNIS PLAYER
 18          Danny Trejo   United States          ACTOR
 19          Emil Cioran         ROMANIA    PHILOSOPHER
 22     Georges Brassens          France         SINGER,
                                 name     countryName        occupation
 2                      Ali al-Masudi            Iraq            WRITER
 3                     Carlo Borromeo           ITALY  RELIGIOUS FIGURE
 6                     Antoninus Pius           ITALY        POLITICIAN
 7      William IX, Duke of Aquitaine          France          NOBLEMAN
 8                     

In [8]:
correct_df, b = known_df
correct_df = correct_df.sample(frac=1).reset_index(drop=True).head(20)
correct_df.rename(columns={'name': 'Entity', 'occupation': 'Target'}, inplace=True)
correct_df

Unnamed: 0,Entity,countryName,Target
0,Kirsten Dunst,UNITED STATES,ACTOR
1,Georges Brassens,France,SINGER
2,Emil Cioran,ROMANIA,PHILOSOPHER
3,Gerardus 't Hooft,NETHERLANDS,PHYSICIST
4,Fernando Verdasco,SPAIN,TENNIS PLAYER
5,Edvard Grieg,NORWAY,COMPOSER
6,Kateryna Bondarenko,UKRAINE,TENNIS PLAYER
7,Paul Berg,UNITED STATES,CHEMIST
8,Danny Trejo,United States,ACTOR
9,John Hurt,UNITED KINGDOM,ACTOR


In [11]:
# Creating a dataset with all the provided rows, excluding the numbers.

all_data = {
    'name': ['Paul Berg', 'Kirsten Dunst', 'Ali al-Masudi', 'Carlo Borromeo', 'Edvard Grieg', 'Gerardus \'t Hooft', 'Antoninus Pius', 'William IX, Duke of Aquitaine', 'Pope Martin I', 'Kateryna Bondarenko', 'Yannis Ritsos', 'Johannes Andreas Grib Fibiger', 'John Hurt', 'Valdis Zatlers', 'Gianfranco Zola', 'Željko Joksimović', 'Ish-bosheth', 'Fernando Verdasco', 'Danny Trejo', 'Emil Cioran', 'Edward VII of the United Kingdom', 'Daniel Jarque', 'Georges Brassens'],
    'occupation': ['CHEMIST', 'ACTOR', 'WRITER', 'RELIGIOUS FIGURE', 'COMPOSER', 'PHYSICIST', 'POLITICIAN', 'NOBLEMAN', 'RELIGIOUS FIGURE', 'TENNIS PLAYER', 'SOCIAL ACTIVIST', 'PHYSICIAN', 'ACTOR', 'POLITICIAN', 'SOCCER PLAYER', 'MUSICIAN', 'POLITICIAN', 'TENNIS PLAYER', 'ACTOR', 'PHILOSOPHER', 'NOBLEMAN', 'SOCCER PLAYER', 'SINGER'],
    'response': ['Biochemist', 'Actress', 'Historian', 'Roman Catholic Cardinal', 'Composer', 'Physicist', 'Emperor of Rome', 'Poet and troubadour', 'Pope Martin I was a clergyman', 'Professional tennis player', 'Poet', 'Physiologist', 'Actor', 'Valdis Zatlers is a physician', 'Football manager', 'Singer and songwriter', 'King of Israel', 'Tennis player', 'Actor', 'Philosopher', 'King of the United Kingdom', 'Professional footballer', 'Singer-songwriter']
}

# Convert the all_data dictionary into a pandas DataFrame
df_all = pd.DataFrame(all_data)
df_all


Unnamed: 0,name,occupation,response
0,Paul Berg,CHEMIST,Biochemist
1,Kirsten Dunst,ACTOR,Actress
2,Ali al-Masudi,WRITER,Historian
3,Carlo Borromeo,RELIGIOUS FIGURE,Roman Catholic Cardinal
4,Edvard Grieg,COMPOSER,Composer
5,Gerardus 't Hooft,PHYSICIST,Physicist
6,Antoninus Pius,POLITICIAN,Emperor of Rome
7,"William IX, Duke of Aquitaine",NOBLEMAN,Poet and troubadour
8,Pope Martin I,RELIGIOUS FIGURE,Pope Martin I was a clergyman
9,Kateryna Bondarenko,TENNIS PLAYER,Professional tennis player


In [15]:
li = []
for index, row in df_all.iterrows():
    if are_equivalent(row['response'], row['occupation']) == 2:
        li.append(row)

In [19]:

# Create the updated DataFrame
wrong_df = pd.DataFrame(li)
wrong_df.rename(columns={'name': 'Entity', 'response': 'Target', 'occupation': 'Correct'}, inplace=True)

wrong_df


Unnamed: 0,Entity,Correct,Target
2,Ali al-Masudi,WRITER,Historian
3,Carlo Borromeo,RELIGIOUS FIGURE,Roman Catholic Cardinal
6,Antoninus Pius,POLITICIAN,Emperor of Rome
7,"William IX, Duke of Aquitaine",NOBLEMAN,Poet and troubadour
8,Pope Martin I,RELIGIOUS FIGURE,Pope Martin I was a clergyman
10,Yannis Ritsos,SOCIAL ACTIVIST,Poet
11,Johannes Andreas Grib Fibiger,PHYSICIAN,Physiologist
13,Valdis Zatlers,POLITICIAN,Valdis Zatlers is a physician
14,Gianfranco Zola,SOCCER PLAYER,Football manager
15,Željko Joksimović,MUSICIAN,Singer and songwriter


In [None]:
result = occupation_analogy_testing(correct_df, wrong_df, are_equivalent)

In [22]:
# result_df = pd.DataFrame(columns=['Prompt', 'Analogical Answer', 'Technical Answer', 'Response', 'Status'], data=result)
result_df.to_csv('../results/result: person-occupation-adversial.csv')
result_df

NameError: name 'result_df' is not defined

In [27]:

# Creating a pandas DataFrame with the given prompts and their evaluations (Part 1)
data_part1 = {
    "Prompt": [
        "Kirsten Dunst:(ACTOR)::Ali al-Masudi:X",
        "Kirsten Dunst:(ACTOR)::Carlo Borromeo:X",
        "Kirsten Dunst:(ACTOR)::Antoninus Pius:X",
        "Kirsten Dunst:(ACTOR)::William IX, Duke of Aquitaine:X",
        "Kirsten Dunst:(ACTOR)::Pope Martin I:X",
        "Kirsten Dunst:(ACTOR)::Yannis Ritsos:X",
        "Kirsten Dunst:(ACTOR)::Johannes Andreas Grib Fibiger:X",
        "Kirsten Dunst:(ACTOR)::Valdis Zatlers:X",
        "Kirsten Dunst:(ACTOR)::Gianfranco Zola:X",
        "Kirsten Dunst:(ACTOR)::Željko Joksimović:X",
        "Kirsten Dunst:(ACTOR)::Ish-bosheth:X",
        "Kirsten Dunst:(ACTOR)::Edward VII of the United Kingdom:X",
        "Kirsten Dunst:(ACTOR)::Daniel Jarque:X",
        "Georges Brassens:(SINGER)::Ali al-Masudi:X",
        "Georges Brassens:(SINGER)::Carlo Borromeo:X",
        "Georges Brassens:(SINGER)::Antoninus Pius:X",
        "Georges Brassens:(SINGER)::William IX, Duke of Aquitaine:X"
    ],
    "Analogical Answer": [
        "Historian",
        "Roman Catholic Cardinal",
        "Emperor of Rome",
        "Poet and troubadour",
        "Pope Martin I was a clergyman",
        "Poet",
        "Physiologist",
        "Valdis Zatlers is a physician",
        "Football manager",
        "Singer and songwriter",
        "King of Israel",
        "King of the United Kingdom",
        "Professional footballer",
        "Historian",
        "Roman Catholic Cardinal",
        "Emperor of Rome",
        "Poet and troubadour"
    ],
    "Technical Answer": [
        "WRITER",
        "RELIGIOUS FIGURE",
        "POLITICIAN",
        "NOBLEMAN",
        "RELIGIOUS FIGURE",
        "SOCIAL ACTIVIST",
        "PHYSICIAN",
        "POLITICIAN",
        "SOCCER PLAYER",
        "MUSICIAN",
        "POLITICIAN",
        "NOBLEMAN",
        "SOCCER PLAYER",
        "WRITER",
        "RELIGIOUS FIGURE",
        "POLITICIAN",
        "NOBLEMAN"
    ],
    "Response": [
        "HISTORIAN",
        "Saint",
        "Roman Emperor",
        "Troubadour",
        "POPE",
        "POET",
        "NOBEL PRIZE LAUREATE",
        "POLITICIAN",
        "FOOTBALLER",
        "MUSICIAN",
        "King",
        "MONARCH",
        "FOOTBALLER",
        "HISTORIAN",
        "Saint",
        "Roman emperor",
        "POET"
    ],
    "Status": [
        "Analogically Correct",
        "Fully Wrong",
        "Fully Wrong",
        "Analogically Correct",
        "Analogically Correct",
        "Analogically Correct",
        "Fully Wrong",
        "Technically Correct",
        "Fully Wrong",
        "Technically Correct",
        "Analogically Correct",
        "Fully Wrong",
        "Fully Wrong",
        "Analogically Correct",
        "Fully Wrong",
        "Fully Wrong",
        "Fully Wrong"
    ]
}

# Converting to DataFrame
df_part1 = pd.DataFrame(data_part1)
df_part1


Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Kirsten Dunst:(ACTOR)::Ali al-Masudi:X,Historian,WRITER,HISTORIAN,Analogically Correct
1,Kirsten Dunst:(ACTOR)::Carlo Borromeo:X,Roman Catholic Cardinal,RELIGIOUS FIGURE,Saint,Fully Wrong
2,Kirsten Dunst:(ACTOR)::Antoninus Pius:X,Emperor of Rome,POLITICIAN,Roman Emperor,Fully Wrong
3,"Kirsten Dunst:(ACTOR)::William IX, Duke of Aqu...",Poet and troubadour,NOBLEMAN,Troubadour,Analogically Correct
4,Kirsten Dunst:(ACTOR)::Pope Martin I:X,Pope Martin I was a clergyman,RELIGIOUS FIGURE,POPE,Analogically Correct
5,Kirsten Dunst:(ACTOR)::Yannis Ritsos:X,Poet,SOCIAL ACTIVIST,POET,Analogically Correct
6,Kirsten Dunst:(ACTOR)::Johannes Andreas Grib F...,Physiologist,PHYSICIAN,NOBEL PRIZE LAUREATE,Fully Wrong
7,Kirsten Dunst:(ACTOR)::Valdis Zatlers:X,Valdis Zatlers is a physician,POLITICIAN,POLITICIAN,Technically Correct
8,Kirsten Dunst:(ACTOR)::Gianfranco Zola:X,Football manager,SOCCER PLAYER,FOOTBALLER,Fully Wrong
9,Kirsten Dunst:(ACTOR)::Željko Joksimović:X,Singer and songwriter,MUSICIAN,MUSICIAN,Technically Correct


In [28]:
# Creating a pandas DataFrame with the given prompts and their evaluations (Part 2)
data_part2 = {
    "Prompt": [
        "Georges Brassens:(SINGER)::Pope Martin I:X",
        "Georges Brassens:(SINGER)::Yannis Ritsos:X",
        "Georges Brassens:(SINGER)::Johannes Andreas Grib Fibiger:X",
        "Georges Brassens:(SINGER)::Valdis Zatlers:X",
        "Georges Brassens:(SINGER)::Gianfranco Zola:X",
        "Georges Brassens:(SINGER)::Željko Joksimović:X",
        "Georges Brassens:(SINGER)::Ish-bosheth:X",
        "Georges Brassens:(SINGER)::Edward VII of the United Kingdom:X",
        "Georges Brassens:(SINGER)::Daniel Jarque:X",
        "Emil Cioran:(PHILOSOPHER)::Ali al-Masudi:X",
        "Emil Cioran:(PHILOSOPHER)::Carlo Borromeo:X",
        "Emil Cioran:(PHILOSOPHER)::Antoninus Pius:X",
        "Emil Cioran:(PHILOSOPHER)::William IX, Duke of Aquitaine:X",
        "Emil Cioran:(PHILOSOPHER)::Pope Martin I:X",
        "Emil Cioran:(PHILOSOPHER)::Yannis Ritsos:X",
        "Emil Cioran:(PHILOSOPHER)::Johannes Andreas Grib Fibiger:X",
        "Emil Cioran:(PHILOSOPHER)::Valdis Zatlers:X"
    ],
    "Analogical Answer": [
        "Pope Martin I was a clergyman",
        "Poet",
        "Physiologist",
        "Valdis Zatlers is a physician",
        "Football manager",
        "Singer and songwriter",
        "King of Israel",
        "King of the United Kingdom",
        "Professional footballer",
        "Historian",
        "Roman Catholic Cardinal",
        "Emperor of Rome",
        "Poet and troubadour",
        "Pope Martin I was a clergyman",
        "Poet",
        "Physiologist",
        "Valdis Zatlers is a physician"
    ],
    "Technical Answer": [
        "RELIGIOUS FIGURE",
        "SOCIAL ACTIVIST",
        "PHYSICIAN",
        "POLITICIAN",
        "SOCCER PLAYER",
        "MUSICIAN",
        "POLITICIAN",
        "NOBLEMAN",
        "SOCCER PLAYER",
        "WRITER",
        "RELIGIOUS FIGURE",
        "POLITICIAN",
        "NOBLEMAN",
        "RELIGIOUS FIGURE",
        "SOCIAL ACTIVIST",
        "PHYSICIAN",
        "POLITICIAN"
    ],
    "Response": [
        "Pope Martin I is a religious figure.",
        "POET",
        "Nobel laureate",
        "President",
        "SOCCER PLAYER",
        "MUSICIAN",
        "Saul (SON OF KING SAUL)",
        "KING",
        "Daniel Jarque:(FOOTBALLER)",
        "Historian.",
        "Saint",
        "Roman emperor",
        "POET",
        "Pope Martin I:(SAINT)",
        "POET",
        "Physiologist",
        "President"
    ],
    "Status": [
        "Technically Correct",
        "Analogically Correct",
        "Fully Wrong",
        "Fully Wrong",
        "Technically Correct",
        "Technically Correct",
        "Fully Wrong",
        "Analogically Correct",
        "Fully Wrong",
        "Analogically Correct",
        "Fully Wrong",
        "Fully Wrong",
        "Analogically Correct",
        "Fully Wrong",
        "Analogically Correct",
        "Analogically Correct",
        "Fully Wrong"
    ]
}

# Converting to DataFrame
df_part2 = pd.DataFrame(data_part2)
df_part2


Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Georges Brassens:(SINGER)::Pope Martin I:X,Pope Martin I was a clergyman,RELIGIOUS FIGURE,Pope Martin I is a religious figure.,Technically Correct
1,Georges Brassens:(SINGER)::Yannis Ritsos:X,Poet,SOCIAL ACTIVIST,POET,Analogically Correct
2,Georges Brassens:(SINGER)::Johannes Andreas Gr...,Physiologist,PHYSICIAN,Nobel laureate,Fully Wrong
3,Georges Brassens:(SINGER)::Valdis Zatlers:X,Valdis Zatlers is a physician,POLITICIAN,President,Fully Wrong
4,Georges Brassens:(SINGER)::Gianfranco Zola:X,Football manager,SOCCER PLAYER,SOCCER PLAYER,Technically Correct
5,Georges Brassens:(SINGER)::Željko Joksimović:X,Singer and songwriter,MUSICIAN,MUSICIAN,Technically Correct
6,Georges Brassens:(SINGER)::Ish-bosheth:X,King of Israel,POLITICIAN,Saul (SON OF KING SAUL),Fully Wrong
7,Georges Brassens:(SINGER)::Edward VII of the U...,King of the United Kingdom,NOBLEMAN,KING,Analogically Correct
8,Georges Brassens:(SINGER)::Daniel Jarque:X,Professional footballer,SOCCER PLAYER,Daniel Jarque:(FOOTBALLER),Fully Wrong
9,Emil Cioran:(PHILOSOPHER)::Ali al-Masudi:X,Historian,WRITER,Historian.,Analogically Correct


In [30]:
df_merged = pd.concat([df_part1, df_part2], ignore_index=True)
# result_df = pd.DataFrame(columns=['Prompt', 'Analogical Answer', 'Technical Answer', 'Response', 'Status'], data=result)
df_merged.to_csv('../results/result: person-occupation-adversial.csv')
df_merged

Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Kirsten Dunst:(ACTOR)::Ali al-Masudi:X,Historian,WRITER,HISTORIAN,Analogically Correct
1,Kirsten Dunst:(ACTOR)::Carlo Borromeo:X,Roman Catholic Cardinal,RELIGIOUS FIGURE,Saint,Fully Wrong
2,Kirsten Dunst:(ACTOR)::Antoninus Pius:X,Emperor of Rome,POLITICIAN,Roman Emperor,Fully Wrong
3,"Kirsten Dunst:(ACTOR)::William IX, Duke of Aqu...",Poet and troubadour,NOBLEMAN,Troubadour,Analogically Correct
4,Kirsten Dunst:(ACTOR)::Pope Martin I:X,Pope Martin I was a clergyman,RELIGIOUS FIGURE,POPE,Analogically Correct
5,Kirsten Dunst:(ACTOR)::Yannis Ritsos:X,Poet,SOCIAL ACTIVIST,POET,Analogically Correct
6,Kirsten Dunst:(ACTOR)::Johannes Andreas Grib F...,Physiologist,PHYSICIAN,NOBEL PRIZE LAUREATE,Fully Wrong
7,Kirsten Dunst:(ACTOR)::Valdis Zatlers:X,Valdis Zatlers is a physician,POLITICIAN,POLITICIAN,Technically Correct
8,Kirsten Dunst:(ACTOR)::Gianfranco Zola:X,Football manager,SOCCER PLAYER,FOOTBALLER,Fully Wrong
9,Kirsten Dunst:(ACTOR)::Željko Joksimović:X,Singer and songwriter,MUSICIAN,MUSICIAN,Technically Correct
