In [1]:
import pandas as pd
import numpy as np
from time import sleep
from openai import OpenAI
from private_data import API_KEY
import re

In [2]:
# Main query function
client = OpenAI(api_key=API_KEY)
def ask_query(query):
    my_query = [ {"role": "system", "content": "Give direct answer without any explanation."},
        {"role": "user", "content": query} ] 
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [4]:
# Loading Dataset 
person_death_df = pd.read_csv('../data/celebrity_deaths_4.csv', encoding = 'MacRoman')
person_death_df = person_death_df[['name', 'death_month' , 'death_year']]
person_death_df['death_date'] = person_death_df['death_month'].astype(str) + ' ' + person_death_df['death_year'].astype(str)
# # Shuffle and resize the data
person_death_df = person_death_df.sample(frac=1).reset_index(drop=True).head(100)

person_death_df.head(10)

Unnamed: 0,name,death_month,death_year,death_date
0,Arnold Fine,September,2014,September 2014
1,Beverly Fawell,June,2013,June 2013
2,Raymond Daniels,January,2008,January 2008
3,Dickie Jeeps,October,2016,October 2016
4,Collin Wilcox,October,2009,October 2009
5,Larry Turner,November,2009,November 2009
6,Motoichi Kumagai,November,2010,November 2010
7,Bob Welch,June,2012,June 2012
8,JoaquÃ_n PiÃ±a Batllevell,July,2013,July 2013
9,Mimi Darwish,December,2011,December 2011


In [10]:
# Preprocessing 

def normalize(entity):
    entity = entity.lower()
    
    return entity
    

def are_equivalent(response, answer):
    response = normalize(response)
    answer = normalize(answer)

    if answer in response:
        return True
    if response in answer:
        return True
    return False

def contains_date(response):
    x = response.replace('-', ' ')
    x = x.replace('.', ' ')
    
    for w in x.split():
        if len(w)==4 and w.isnumeric():
            return True
    return False
    
def does_gpt_know(person, deathdate):
    # check if birthdate is nan
    if pd.isnull(deathdate) or pd.isnull(person):
        return 0
    
    query = f"what is the death date(Month YYYY) of {person}?"
    response = ask_query(query)
    print(f"Person: {person} \t|\t Death Date: {deathdate} \t|\t Response: {response}")
    
    if are_equivalent(response, deathdate):
        return 1
    elif contains_date(response):
        return 2
    
    return 0
    



def preprocess(df):
    correct_answers = []
    wrong_answers = []
    for index, row in df.iterrows():
        x = does_gpt_know(row['name'], row['death_date'])
        if x==1:
            correct_answers.append(row)
        elif x==2:
            ###############################################
            # add new column: reply
            wrong_answers.append(row)
        print(len(correct_answers), len(wrong_answers))
        if len(correct_answers) >= 10 and len(wrong_answers)>=10:
            break
    correct_df = pd.DataFrame(correct_answers)
    wrong_df = pd.DataFrame(wrong_answers)
    return correct_df, wrong_df

In [11]:
known_df = preprocess(person_death_df)
known_df

Person: Arnold Fine 	|	 Death Date: September 2014 	|	 Response: July 2012
0 1
Person: Beverly Fawell 	|	 Death Date: June 2013 	|	 Response: August 2012
0 2
Person: Raymond Daniels 	|	 Death Date: January 2008 	|	 Response: Raymond Daniels passed away in June 2022.
0 3
Person: Dickie Jeeps 	|	 Death Date: October 2016 	|	 Response: October 2016
1 3
Person: Collin Wilcox 	|	 Death Date: October 2009 	|	 Response: October 2009.
2 3
Person: Larry Turner 	|	 Death Date: November 2009 	|	 Response: December 2020
2 4
Person: Motoichi Kumagai 	|	 Death Date: November 2010 	|	 Response: February 2020
2 5
Person: Bob Welch 	|	 Death Date: June 2012 	|	 Response: June 2012.
3 5
Person: JoaquÃ_n PiÃ±a Batllevell 	|	 Death Date: July 2013 	|	 Response: October 2010
3 6
Person: Mimi Darwish 	|	 Death Date: December 2011 	|	 Response: June 2021.
3 7
Person: Vladimir Kesarev 	|	 Death Date: January 2015 	|	 Response: April 1966
3 8
Person: Patrick Francis Sheehan 	|	 Death Date: November 2012 	|	 Re

(                      name death_month  death_year      death_date
 3             Dickie Jeeps     October        2016    October 2016
 4            Collin Wilcox     October        2009    October 2009
 7                Bob Welch        June        2012       June 2012
 21             Molly Glynn   September        2014  September 2014
 22          Vilgot SjÃ¶man       April        2006      April 2006
 23             Dennis Byrd     October        2016    October 2016
 26  Guillermo Luksic Craig       March        2013      March 2013
 38        Reginald Turnill    February        2013   February 2013
 39             Clay Felker        July        2008       July 2008
 41              Bowie Kuhn       March        2007      March 2007,
                               name death_month  death_year      death_date
 0                      Arnold Fine   September        2014  September 2014
 1                   Beverly Fawell        June        2013       June 2013
 2                  Ray

In [13]:
correct_df, b = known_df
correct_df = correct_df.sample(frac=1).reset_index(drop=True).head(20)
correct_df.rename(columns={'name': 'Entity', 'death_date': 'Target'}, inplace=True)
correct_df

Unnamed: 0,Entity,death_month,death_year,Target
0,Bowie Kuhn,March,2007,March 2007
1,Reginald Turnill,February,2013,February 2013
2,Molly Glynn,September,2014,September 2014
3,Vilgot SjÃ¶man,April,2006,April 2006
4,Guillermo Luksic Craig,March,2013,March 2013
5,Bob Welch,June,2012,June 2012
6,Collin Wilcox,October,2009,October 2009
7,Dennis Byrd,October,2016,October 2016
8,Dickie Jeeps,October,2016,October 2016
9,Clay Felker,July,2008,July 2008


In [21]:


# Data
data = {
    "Person": ["Arnold Fine", "Beverly Fawell", "Raymond Daniels", "Collin Wilcox",
               "Larry Turner", "Motoichi Kumagai", "Joaquín Piña Batllevell", "Mimi Darwish",
               "Vladimir Kesarev", "Patrick Francis Sheehan", "George Ardisson",
               "Marian Przykucki", "Joe Rogers"],
    "Death Date": ["September 2014", "June 2013", "January 2008", "October 2009",
                   "November 2009", "November 2010", "July 2013", "December 2011",
                   "January 2015", "November 2012", "December 2014", "October 2009",
                   "October 2013"],
    "Response": ["July 2012", "August 2012", "June 2022",
                 "October 2009", "December 2020", "February 2020", "October 2010",
                 "June 2021", "April 1966", "March 2024", "August 2014", "December 2019",
                 "June 2021"]
}

# Create the updated DataFrame
wrong_df = pd.DataFrame(data)
wrong_df.rename(columns={'Person': 'Entity', 'Death Date': 'Correct', 'Response': 'Target'}, inplace=True)

wrong_df


Unnamed: 0,Entity,Correct,Target
0,Arnold Fine,September 2014,July 2012
1,Beverly Fawell,June 2013,August 2012
2,Raymond Daniels,January 2008,June 2022
3,Collin Wilcox,October 2009,October 2009
4,Larry Turner,November 2009,December 2020
5,Motoichi Kumagai,November 2010,February 2020
6,Joaquín Piña Batllevell,July 2013,October 2010
7,Mimi Darwish,December 2011,June 2021
8,Vladimir Kesarev,January 2015,April 1966
9,Patrick Francis Sheehan,November 2012,March 2024


In [22]:
def date_analogy_testing(df1, df2, equiv_func):
    result = []
    n, m = len(df1), len(df2)
    
    for i in range(20):
        entity_1 = df1.iloc[i%n]['Entity']
        target_1 = df1.iloc[i%n]['Target']
        
        j = i%m 
        entity_2 = df2.iloc[j]['Entity']
        target_2 = df2.iloc[j]['Target']
        correct_ans = df2.iloc[j]['Correct']
        
        prompt = f"{entity_1}:({target_1})::{entity_2}:X; what is X?"
        
        response = ask_query(prompt)
        
        status = "Fully Wrong"
        if equiv_func(response, target_2):
            status = "Analogically Correct"
        elif equiv_func(response, correct_ans):
            status = "Technically Correct"
        
        result.append({'Prompt': prompt, 'Analogical Answer': target_2, 'Technical Answer': correct_ans , 'GPT-reply': response, 'Status': status})
        print(f"Prompt: {prompt} \t|\t Analogical Answer: {target_2}, Technical Answer: {correct_ans} \t|\t Response: {response} \t|\t Status: {status}")
    
    return result

In [23]:
result = date_analogy_testing(correct_df, wrong_df, are_equivalent)

Prompt: Bowie Kuhn:(March 2007)::Arnold Fine:X; what is X? 	|	 Analogical Answer: July 2012, Technical Answer: September 2014 	|	 Response: April 2008 	|	 Status: Fully Wrong
Prompt: Reginald Turnill:(February 2013)::Beverly Fawell:X; what is X? 	|	 Analogical Answer: August 2012, Technical Answer: June 2013 	|	 Response: December 2012 	|	 Status: Fully Wrong
Prompt: Molly Glynn:(September 2014)::Raymond Daniels:X; what is X? 	|	 Analogical Answer: June 2022, Technical Answer: January 2008 	|	 Response: September 2014 	|	 Status: Fully Wrong
Prompt: Vilgot SjÃ¶man:(April 2006)::Collin Wilcox:X; what is X? 	|	 Analogical Answer: October 2009, Technical Answer: October 2009 	|	 Response: To Kill a Mockingbird 	|	 Status: Fully Wrong
Prompt: Guillermo Luksic Craig:(March 2013)::Larry Turner:X; what is X? 	|	 Analogical Answer: December 2020, Technical Answer: November 2009 	|	 Response: Larry Turner died. 	|	 Status: Fully Wrong
Prompt: Bob Welch:(June 2012)::Motoichi Kumagai:X; what is X

In [26]:
result_df = pd.DataFrame(columns=['Prompt', 'Analogical Answer', 'Technical Answer', 'GPT-reply', 'Status'], data=result)
result_df.to_csv('../results/result: person-deathdate-adversial.csv')
result_df

Unnamed: 0,Prompt,Analogical Answer,Technical Answer,GPT-reply,Status
0,Bowie Kuhn:(March 2007)::Arnold Fine:X; what i...,July 2012,September 2014,April 2008,Fully Wrong
1,Reginald Turnill:(February 2013)::Beverly Fawe...,August 2012,June 2013,December 2012,Fully Wrong
2,Molly Glynn:(September 2014)::Raymond Daniels:...,June 2022,January 2008,September 2014,Fully Wrong
3,Vilgot SjÃ¶man:(April 2006)::Collin Wilcox:X; ...,October 2009,October 2009,To Kill a Mockingbird,Fully Wrong
4,Guillermo Luksic Craig:(March 2013)::Larry Tur...,December 2020,November 2009,Larry Turner died.,Fully Wrong
5,Bob Welch:(June 2012)::Motoichi Kumagai:X; wha...,February 2020,November 2010,May 2021,Fully Wrong
6,Collin Wilcox:(October 2009)::Joaquín Piña Bat...,October 2010,July 2013,January 1968,Fully Wrong
7,Dennis Byrd:(October 2016)::Mimi Darwish:X; wh...,June 2021,December 2011,December 2017,Fully Wrong
8,Dickie Jeeps:(October 2016)::Vladimir Kesarev:...,April 1966,January 2015,June 2021,Fully Wrong
9,Clay Felker:(July 2008)::Patrick Francis Sheeh...,March 2024,November 2012,May 1988,Fully Wrong


In [65]:
# Define the data for the DataFrame
data = {
    "Prompt": [
        "Jorg Haider:(1950-01-26)::George Vecsey:X",
        "Jorg Haider:(1950-01-26)::Alicia Keys:X",
        "Jorg Haider:(1950-01-26)::William T Esrey:X",
        "Jorg Haider:(1950-01-26)::Arthur Laurents:X",
        "Jorg Haider:(1950-01-26)::Christoph von Dohnanyi:X",
        "Jorg Haider:(1950-01-26)::Donald L Evans:X",
        "Jorg Haider:(1950-01-26)::Kathleen Hall Jamieson:X",
        "Jorg Haider:(1950-01-26)::Elizabeth S Grubman:X",
        "Petr Korda:(1968-01-23)::George Vecsey:X",
        "Petr Korda:(1968-01-23)::Alicia Keys:X",
        "Petr Korda:(1968-01-23)::William T Esrey:X",
        "Petr Korda:(1968-01-23)::Arthur Laurents:X",
        "Petr Korda:(1968-01-23)::Christoph von Dohnanyi:X",
        "Petr Korda:(1968-01-23)::Donald L Evans:X",
        "Petr Korda:(1968-01-23)::Kathleen Hall Jamieson:X",
        "Petr Korda:(1968-01-23)::Elizabeth S Grubman:X",
        "Ariel Sharon:(1928-02-26)::George Vecsey:X",
        "Ariel Sharon:(1928-02-26)::Alicia Keys:X",
        "Ariel Sharon:(1928-02-26)::William T Esrey:X"
    ],
    "Analogical Answer": [
        "1939-07-04", "1980-01-25", "1940-01-01", "1918-07-14", "1929-09-08",
        "1946-07-26", "1946-01-01", "1971-01-30", "1939-07-04", "1980-01-25",
        "1940-01-01", "1918-07-14", "1929-09-08", "1946-07-26", "1946-01-01",
        "1971-01-30", "1939-07-04", "1980-01-25", "1940-01-01"
    ],
    "Technical Answer": [
        "1940-07-04", "1981-01-25", "1940-11-15", "1917-07-14", "1929-06-08",
        "1946-07-27", "1946-02-22", "1976-01-30", "1940-07-04", "1981-01-25",
        "1940-11-15", "1917-07-14", "1929-06-08", "1946-07-27", "1946-02-22",
        "1976-01-30", "1940-07-04", "1981-01-25", "1940-11-15"
    ],
    "Response": [
        "X is the birth date of George Vecsey, which is not provided in the question.",
        "1975-01-25", "Unknown", "Arthur Laurents:(1917-07-14)", "Unknown",
        "X is the date of birth of Donald L Evans.", "There is not enough information to determine what X represents.",
        "X is the birthdate of Elizabeth S Grubman, which is unknown based on the provided information.",
        "X is unknown or unspecified.", "1981-01-25", "Unknown.", "Unknown",
        "1949-09-08", "X is Donald L Evans's birth date.", "X is unknown or not given in the provided information.",
        "Unknown", "World War II or 1939-1945", "-\nnot applicable.", "Unknown or not given."
    ],
    "Status": [
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Technically Correct", "Fully Wrong",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong", "Technically Correct",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong"
    ]
}

# Create the DataFrame
result_df = pd.DataFrame(data)
result_df




Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Jorg Haider:(1950-01-26)::George Vecsey:X,1939-07-04,1940-07-04,"X is the birth date of George Vecsey, which is...",Fully Wrong
1,Jorg Haider:(1950-01-26)::Alicia Keys:X,1980-01-25,1981-01-25,1975-01-25,Fully Wrong
2,Jorg Haider:(1950-01-26)::William T Esrey:X,1940-01-01,1940-11-15,Unknown,Fully Wrong
3,Jorg Haider:(1950-01-26)::Arthur Laurents:X,1918-07-14,1917-07-14,Arthur Laurents:(1917-07-14),Technically Correct
4,Jorg Haider:(1950-01-26)::Christoph von Dohnan...,1929-09-08,1929-06-08,Unknown,Fully Wrong
5,Jorg Haider:(1950-01-26)::Donald L Evans:X,1946-07-26,1946-07-27,X is the date of birth of Donald L Evans.,Fully Wrong
6,Jorg Haider:(1950-01-26)::Kathleen Hall Jamies...,1946-01-01,1946-02-22,There is not enough information to determine w...,Fully Wrong
7,Jorg Haider:(1950-01-26)::Elizabeth S Grubman:X,1971-01-30,1976-01-30,"X is the birthdate of Elizabeth S Grubman, whi...",Fully Wrong
8,Petr Korda:(1968-01-23)::George Vecsey:X,1939-07-04,1940-07-04,X is unknown or unspecified.,Fully Wrong
9,Petr Korda:(1968-01-23)::Alicia Keys:X,1980-01-25,1981-01-25,1981-01-25,Technically Correct
