In [30]:
import pandas as pd
import numpy as np
from time import sleep
from openai import OpenAI
from private_data import API_KEY
import re

In [7]:
# Main query function
client = OpenAI(api_key=API_KEY)
def ask_query(query):
    my_query = [ {"role": "system", "content": "Give direct answer without any explanation."},
        {"role": "user", "content": query} ] 
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo", messages=my_query
    )
    return str(completion.choices[0].message.content)

In [62]:
def date_analogy_testing(df1, df2, equiv_func):
    result = []
    
    for i in range(len(df1)):
        entity_1 = df1.iloc[i]['Entity']
        target_1 = df1.iloc[i]['Target']

        for j in range(len(df2)):
        
            entity_2 = df2.iloc[j]['Entity']
            target_2 = df2.iloc[j]['Target']
            correct_ans = df2.iloc[j]['Correct']
            
            prompt = f"{entity_1}:({target_1})::{entity_2}:X; what is X?"
           
            response = ask_query(prompt)
            
            status = "Fully Wrong"
            if equiv_func(response, target_2):
                status = "Analogically Correct"
            elif equiv_func(response, correct_ans):
                status = "Technically Correct"
            
            result.append({'Prompt': prompt, 'Analogical Answer': target_2, 'Technical Answer': correct_ans , 'GPT-reply': response, 'Status': status})
            print(f"Prompt: {prompt} \t|\t Analogical Answer: {target_2}, Technical Answer: {correct_ans} \t|\t Response: {response} \t|\t Status: {status}")
    
    return result

In [9]:
# Loading Dataset 
person_birthdate_df = pd.read_csv('../data/famous-birthdates.csv', sep = ' ')
person_birthdate_df = person_birthdate_df[['firstname', 'lastname' , 'birthDate']]
person_birthdate_df['name'] = person_birthdate_df['firstname'] + ' ' + person_birthdate_df['lastname']
# Shuffle and resize the data
person_birthdate_df = person_birthdate_df.sample(frac=1).reset_index(drop=True).head(100)

person_birthdate_df.head(10)

Unnamed: 0,firstname,lastname,birthDate,name
0,,Rembrandt Harmenszoon van Rijn,1606-07-15,
1,Jack,Kevorkian,1928-05-26,Jack Kevorkian
2,Richard,Meier,1934-10-12,Richard Meier
3,Joe,Montana,1956-06-11,Joe Montana
4,George,Vecsey,1939-07-04,George Vecsey
5,Robert,Coover,1932-02-04,Robert Coover
6,Alicia,Keys,1980-01-25,Alicia Keys
7,Sam,Sheppard,,Sam Sheppard
8,Pete,Seeger,1919-05-03,Pete Seeger
9,Tim,Hardaway,1966-09-01,Tim Hardaway


In [46]:
# Preprocessing 

def normalize(entity):
    entity = entity.lower()
    
    return entity
    

def are_equivalent(response, answer):
    response = normalize(response)
    answer = normalize(answer)

    if answer in response:
        return True
    if response in answer:
        return True
    return False

def contains_date(response):
    x = response.replace('-', ' ')
    
    for w in x.split():
        if len(w)==4 and w.isnumeric():
            return True
    return False
    
def does_gpt_know(person, birthdate):
    # check if birthdate is nan
    if pd.isnull(birthdate) or pd.isnull(person):
        return 0
    
    query = f"what is the birthdate(YYYY-MM-DD) of {person}?"
    response = ask_query(query)
    print(f"Person: {person} \t|\t Birth Date: {birthdate} \t|\t Response: {response}")
    
    if are_equivalent(response, birthdate):
        return 1
    elif contains_date(response):
        return 2
    
    return 0
    



def preprocess(df):
    correct_answers = []
    wrong_answers = []
    for index, row in df.iterrows():
        x = does_gpt_know(row['name'], row['birthDate'])
        if x==1:
            correct_answers.append(row)
        elif x==2:
            wrong_answers.append(row)
        print(len(correct_answers), len(wrong_answers))
        if len(correct_answers) >= 10 and len(wrong_answers)>=10:
            break
    correct_df = pd.DataFrame(correct_answers)
    wrong_df = pd.DataFrame(wrong_answers)
    return correct_df, wrong_df

In [47]:
known_df = preprocess(person_birthdate_df)
known_df

0 0
Person: Jack Kevorkian 	|	 Birth Date: 1928-05-26 	|	 Response: 1928-05-26
1 0
Person: Richard Meier 	|	 Birth Date: 1934-10-12 	|	 Response: 1934-10-12
2 0
Person: Joe Montana 	|	 Birth Date: 1956-06-11 	|	 Response: 1956-06-11
3 0
Person: George Vecsey 	|	 Birth Date: 1939-07-04 	|	 Response: 1940-07-04
3 1
Person: Robert Coover 	|	 Birth Date: 1932-02-04 	|	 Response: 1932-02-04
4 1
Person: Alicia Keys 	|	 Birth Date: 1980-01-25 	|	 Response: 1981-01-25
4 2
4 2
Person: Pete Seeger 	|	 Birth Date: 1919-05-03 	|	 Response: 1919-05-03
5 2
Person: Tim Hardaway 	|	 Birth Date: 1966-09-01 	|	 Response: 1966-09-01
6 2
Person: Clarence Thomas 	|	 Birth Date: 1948-06-23 	|	 Response: 1948-06-23
7 2
Person: Richard J Codey 	|	 Birth Date: 1946-11-27 	|	 Response: 1946-11-27
8 2
8 2
Person: William T Esrey 	|	 Birth Date: 1940-01-01 	|	 Response: 1940-11-15
8 3
Person: David E Bonior 	|	 Birth Date: 1945-06-06 	|	 Response: 1945-06-06
9 3
9 3
Person: Mark Wahlberg 	|	 Birth Date: 1971-06-0

(       firstname   lastname   birthDate                  name
 1           Jack  Kevorkian  1928-05-26        Jack Kevorkian
 2        Richard      Meier  1934-10-12         Richard Meier
 3            Joe    Montana  1956-06-11           Joe Montana
 5         Robert     Coover  1932-02-04         Robert Coover
 8           Pete     Seeger  1919-05-03           Pete Seeger
 ..           ...        ...         ...                   ...
 94          Jane     Harman  1945-06-28           Jane Harman
 95         David    Cameron  1966-10-09         David Cameron
 96        Robert      Frost  1874-03-26          Robert Frost
 97        Virgil    Thomson  1896-11-25        Virgil Thomson
 99  David Foster    Wallace  1962-02-21  David Foster Wallace
 
 [78 rows x 4 columns],
         firstname  lastname   birthDate                    name
 4          George    Vecsey  1939-07-04           George Vecsey
 6          Alicia      Keys  1980-01-25             Alicia Keys
 13      William T     

In [60]:
correct_df, b = known_df
correct_df = correct_df.sample(frac=1).reset_index(drop=True).head(20)
correct_df.rename(columns={'name': 'Entity', 'birthDate': 'Target'}, inplace=True)
correct_df

Unnamed: 0,firstname,lastname,Target,Entity
0,Jorg,Haider,1950-01-26,Jorg Haider
1,Petr,Korda,1968-01-23,Petr Korda
2,Ariel,Sharon,1928-02-26,Ariel Sharon
3,Brett,Favre,1969-10-10,Brett Favre
4,Henry G,Cisneros,1947-06-11,Henry G Cisneros
5,Janice Rogers,Brown,1949-05-11,Janice Rogers Brown
6,Christopher S,Bond,1939-03-06,Christopher S Bond
7,Bill,Clinton,1946-08-19,Bill Clinton
8,Richard,Meier,1934-10-12,Richard Meier
9,Robert,Coover,1932-02-04,Robert Coover


In [61]:
# Update the data with additional entries
data = {
    'Person': [
        'George Vecsey', 'Alicia Keys', 'William T Esrey', 'Arthur Laurents',
        'Christoph von Dohnanyi', 'Donald L Evans', 'Kathleen Hall Jamieson', 'Elizabeth S Grubman'
    ],
    'Birth Date': [
        '1939-07-04', '1980-01-25', '1940-01-01', '1918-07-14',
        '1929-09-08', '1946-07-26', '1946-01-01', '1971-01-30'
    ],
    'Response': [
        '1940-07-04', '1981-01-25', '1940-11-15', '1917-07-14',
        '1929-06-08', '1946-07-27', '1946-02-22', '1976-01-30'
    ]
}

# Create the updated DataFrame
wrong_df = pd.DataFrame(data)
wrong_df.rename(columns={'Person': 'Entity', 'Birth Date': 'Target', 'Response': 'Correct'}, inplace=True)

wrong_df


Unnamed: 0,Entity,Target,Correct
0,George Vecsey,1939-07-04,1940-07-04
1,Alicia Keys,1980-01-25,1981-01-25
2,William T Esrey,1940-01-01,1940-11-15
3,Arthur Laurents,1918-07-14,1917-07-14
4,Christoph von Dohnanyi,1929-09-08,1929-06-08
5,Donald L Evans,1946-07-26,1946-07-27
6,Kathleen Hall Jamieson,1946-01-01,1946-02-22
7,Elizabeth S Grubman,1971-01-30,1976-01-30


In [63]:
result = date_analogy_testing(correct_df, wrong_df, are_equivalent)

Prompt: Jorg Haider:(1950-01-26)::George Vecsey:X; what is X? 	|	 Analogical Answer: 1939-07-04, Technical Answer: 1940-07-04 	|	 Response: X is the birth date of George Vecsey, which is not provided in the question. 	|	 Status: Fully Wrong
Prompt: Jorg Haider:(1950-01-26)::Alicia Keys:X; what is X? 	|	 Analogical Answer: 1980-01-25, Technical Answer: 1981-01-25 	|	 Response: 1975-01-25 	|	 Status: Fully Wrong
Prompt: Jorg Haider:(1950-01-26)::William T Esrey:X; what is X? 	|	 Analogical Answer: 1940-01-01, Technical Answer: 1940-11-15 	|	 Response: Unknown 	|	 Status: Fully Wrong
Prompt: Jorg Haider:(1950-01-26)::Arthur Laurents:X; what is X? 	|	 Analogical Answer: 1918-07-14, Technical Answer: 1917-07-14 	|	 Response: Arthur Laurents:(1917-07-14) 	|	 Status: Technically Correct
Prompt: Jorg Haider:(1950-01-26)::Christoph von Dohnanyi:X; what is X? 	|	 Analogical Answer: 1929-09-08, Technical Answer: 1929-06-08 	|	 Response: Unknown 	|	 Status: Fully Wrong
Prompt: Jorg Haider:(1950-01

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo in organization org-0bYVbCQ1FzkiQda54FsQay7Y on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}

In [67]:
# result_df = pd.DataFrame(columns=['Prompt', 'Analogical Answer', 'Technical Answer', 'Response', 'Status'], data=result)
result_df.to_csv('../results/result: person-birthdate-adversial.csv')
result_df

Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Jorg Haider:(1950-01-26)::George Vecsey:X,1939-07-04,1940-07-04,"X is the birth date of George Vecsey, which is...",Fully Wrong
1,Jorg Haider:(1950-01-26)::Alicia Keys:X,1980-01-25,1981-01-25,1975-01-25,Fully Wrong
2,Jorg Haider:(1950-01-26)::William T Esrey:X,1940-01-01,1940-11-15,Unknown,Fully Wrong
3,Jorg Haider:(1950-01-26)::Arthur Laurents:X,1918-07-14,1917-07-14,Arthur Laurents:(1917-07-14),Technically Correct
4,Jorg Haider:(1950-01-26)::Christoph von Dohnan...,1929-09-08,1929-06-08,Unknown,Fully Wrong
5,Jorg Haider:(1950-01-26)::Donald L Evans:X,1946-07-26,1946-07-27,X is the date of birth of Donald L Evans.,Fully Wrong
6,Jorg Haider:(1950-01-26)::Kathleen Hall Jamies...,1946-01-01,1946-02-22,There is not enough information to determine w...,Fully Wrong
7,Jorg Haider:(1950-01-26)::Elizabeth S Grubman:X,1971-01-30,1976-01-30,"X is the birthdate of Elizabeth S Grubman, whi...",Fully Wrong
8,Petr Korda:(1968-01-23)::George Vecsey:X,1939-07-04,1940-07-04,X is unknown or unspecified.,Fully Wrong
9,Petr Korda:(1968-01-23)::Alicia Keys:X,1980-01-25,1981-01-25,1981-01-25,Technically Correct


In [65]:
# Define the data for the DataFrame
data = {
    "Prompt": [
        "Jorg Haider:(1950-01-26)::George Vecsey:X",
        "Jorg Haider:(1950-01-26)::Alicia Keys:X",
        "Jorg Haider:(1950-01-26)::William T Esrey:X",
        "Jorg Haider:(1950-01-26)::Arthur Laurents:X",
        "Jorg Haider:(1950-01-26)::Christoph von Dohnanyi:X",
        "Jorg Haider:(1950-01-26)::Donald L Evans:X",
        "Jorg Haider:(1950-01-26)::Kathleen Hall Jamieson:X",
        "Jorg Haider:(1950-01-26)::Elizabeth S Grubman:X",
        "Petr Korda:(1968-01-23)::George Vecsey:X",
        "Petr Korda:(1968-01-23)::Alicia Keys:X",
        "Petr Korda:(1968-01-23)::William T Esrey:X",
        "Petr Korda:(1968-01-23)::Arthur Laurents:X",
        "Petr Korda:(1968-01-23)::Christoph von Dohnanyi:X",
        "Petr Korda:(1968-01-23)::Donald L Evans:X",
        "Petr Korda:(1968-01-23)::Kathleen Hall Jamieson:X",
        "Petr Korda:(1968-01-23)::Elizabeth S Grubman:X",
        "Ariel Sharon:(1928-02-26)::George Vecsey:X",
        "Ariel Sharon:(1928-02-26)::Alicia Keys:X",
        "Ariel Sharon:(1928-02-26)::William T Esrey:X"
    ],
    "Analogical Answer": [
        "1939-07-04", "1980-01-25", "1940-01-01", "1918-07-14", "1929-09-08",
        "1946-07-26", "1946-01-01", "1971-01-30", "1939-07-04", "1980-01-25",
        "1940-01-01", "1918-07-14", "1929-09-08", "1946-07-26", "1946-01-01",
        "1971-01-30", "1939-07-04", "1980-01-25", "1940-01-01"
    ],
    "Technical Answer": [
        "1940-07-04", "1981-01-25", "1940-11-15", "1917-07-14", "1929-06-08",
        "1946-07-27", "1946-02-22", "1976-01-30", "1940-07-04", "1981-01-25",
        "1940-11-15", "1917-07-14", "1929-06-08", "1946-07-27", "1946-02-22",
        "1976-01-30", "1940-07-04", "1981-01-25", "1940-11-15"
    ],
    "Response": [
        "X is the birth date of George Vecsey, which is not provided in the question.",
        "1975-01-25", "Unknown", "Arthur Laurents:(1917-07-14)", "Unknown",
        "X is the date of birth of Donald L Evans.", "There is not enough information to determine what X represents.",
        "X is the birthdate of Elizabeth S Grubman, which is unknown based on the provided information.",
        "X is unknown or unspecified.", "1981-01-25", "Unknown.", "Unknown",
        "1949-09-08", "X is Donald L Evans's birth date.", "X is unknown or not given in the provided information.",
        "Unknown", "World War II or 1939-1945", "-\nnot applicable.", "Unknown or not given."
    ],
    "Status": [
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Technically Correct", "Fully Wrong",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong", "Technically Correct",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong",
        "Fully Wrong", "Fully Wrong", "Fully Wrong", "Fully Wrong"
    ]
}

# Create the DataFrame
result_df = pd.DataFrame(data)
result_df




Unnamed: 0,Prompt,Analogical Answer,Technical Answer,Response,Status
0,Jorg Haider:(1950-01-26)::George Vecsey:X,1939-07-04,1940-07-04,"X is the birth date of George Vecsey, which is...",Fully Wrong
1,Jorg Haider:(1950-01-26)::Alicia Keys:X,1980-01-25,1981-01-25,1975-01-25,Fully Wrong
2,Jorg Haider:(1950-01-26)::William T Esrey:X,1940-01-01,1940-11-15,Unknown,Fully Wrong
3,Jorg Haider:(1950-01-26)::Arthur Laurents:X,1918-07-14,1917-07-14,Arthur Laurents:(1917-07-14),Technically Correct
4,Jorg Haider:(1950-01-26)::Christoph von Dohnan...,1929-09-08,1929-06-08,Unknown,Fully Wrong
5,Jorg Haider:(1950-01-26)::Donald L Evans:X,1946-07-26,1946-07-27,X is the date of birth of Donald L Evans.,Fully Wrong
6,Jorg Haider:(1950-01-26)::Kathleen Hall Jamies...,1946-01-01,1946-02-22,There is not enough information to determine w...,Fully Wrong
7,Jorg Haider:(1950-01-26)::Elizabeth S Grubman:X,1971-01-30,1976-01-30,"X is the birthdate of Elizabeth S Grubman, whi...",Fully Wrong
8,Petr Korda:(1968-01-23)::George Vecsey:X,1939-07-04,1940-07-04,X is unknown or unspecified.,Fully Wrong
9,Petr Korda:(1968-01-23)::Alicia Keys:X,1980-01-25,1981-01-25,1981-01-25,Technically Correct
