In [9]:
import pandas as pd
import json
from ast import literal_eval

# Clean incorrect responses from GPT4

In [39]:
with open('../datasets/gpt4/arc_challenge_train_gpt4_response_parsed.json', 'r') as f:
    arc_parsed_data = json.load(f)
len(arc_parsed_data)

1000

In [40]:
with open('../datasets/gpt4/winogrande_train_gpt4_response_parsed.json', 'r') as f:
    wino_parsed_data = json.load(f)
len(wino_parsed_data)

1000

In [7]:
arc_df = pd.read_csv('../datasets/arc_challenge_train_with_prompt.csv')
arc_df.head()

Unnamed: 0,id,question,choices,answerKey,options,prompt
0,Mercury_SC_415702,George wants to warm his hands quickly by rubb...,"{'text': array(['dry palms', 'wet palms', 'pal...",A,"['dry palms', 'wet palms', 'palms covered with...",Question: - George wants to warm his hands qui...
1,MCAS_2009_5_6516,Which of the following statements best explain...,{'text': array(['The refrigerator door is smoo...,B,"['The refrigerator door is smooth.', 'The refr...",Question: - Which of the following statements ...
2,Mercury_7233695,A fold observed in layers of sedimentary rock ...,"{'text': array(['cooling of flowing magma.', '...",B,"['cooling of flowing magma.', 'converging of c...",Question: - A fold observed in layers of sedim...
3,Mercury_7041615,Which of these do scientists offer as the most...,"{'text': array(['worldwide disease', 'global m...",D,"['worldwide disease', 'global mountain buildin...",Question: - Which of these do scientists offer...
4,Mercury_7041860,A boat is acted on by a river current flowing ...,"{'text': array(['west', 'east', 'north', 'sout...",B,"['west', 'east', 'north', 'south']",Question: - A boat is acted on by a river curr...


In [8]:
wino_df = pd.read_csv('../datasets/winogrande_train_with_prompt.csv')
wino_df.head()

Unnamed: 0,question,option1,option2,answer,prompt,id
0,John moved the couch from the garage to the ba...,garage,backyard,1,Question: - John moved the couch from the gara...,GO3wkLp9TC
1,The doctor diagnosed Justin with bipolar and R...,Justin,Robert,2,Question: - The doctor diagnosed Justin with b...,IPQZOkoLLz
2,Dennis drew up a business proposal to present ...,Dennis,Logan,1,Question: - Dennis drew up a business proposal...,W85KDUn8gA
3,Felicia unexpectedly made fried eggs for break...,Felicia,Katrina,2,Question: - Felicia unexpectedly made fried eg...,UIUCGI68OY
4,My shampoo did not lather easily on my Afro ha...,shampoo,hair,2,Question: - My shampoo did not lather easily o...,WFAUCgvz91


## Extracting answer

In [10]:
key_to_answer = {'A' : 0, 'B' : 1, 'C' : 2, 'D' : 3, 'E': 4}

In [19]:
def get_answer_arc(row):
    question = row['question']
    options = row['options']
    list_options = literal_eval(options)
    try:
        answer = list_options[key_to_answer[row['answerKey']]]
    except:
        answer = list_options[int(row['answerKey'])-1]
    return answer.strip().lower()

In [20]:
arc_df['answer_text'] = arc_df.apply(lambda row: get_answer_arc(row), axis=1)
arc_df.head()

Unnamed: 0,id,question,choices,answerKey,options,prompt,answer_text
0,Mercury_SC_415702,George wants to warm his hands quickly by rubb...,"{'text': array(['dry palms', 'wet palms', 'pal...",A,"['dry palms', 'wet palms', 'palms covered with...",Question: - George wants to warm his hands qui...,dry palms
1,MCAS_2009_5_6516,Which of the following statements best explain...,{'text': array(['The refrigerator door is smoo...,B,"['The refrigerator door is smooth.', 'The refr...",Question: - Which of the following statements ...,the refrigerator door contains iron.
2,Mercury_7233695,A fold observed in layers of sedimentary rock ...,"{'text': array(['cooling of flowing magma.', '...",B,"['cooling of flowing magma.', 'converging of c...",Question: - A fold observed in layers of sedim...,converging of crustal plates.
3,Mercury_7041615,Which of these do scientists offer as the most...,"{'text': array(['worldwide disease', 'global m...",D,"['worldwide disease', 'global mountain buildin...",Question: - Which of these do scientists offer...,impact of an asteroid created dust that blocke...
4,Mercury_7041860,A boat is acted on by a river current flowing ...,"{'text': array(['west', 'east', 'north', 'sout...",B,"['west', 'east', 'north', 'south']",Question: - A boat is acted on by a river curr...,east


In [22]:
def get_answer_wino(row):
    answer = row['option1']
    if row['answer'] == 2:
        answer = row['option2']
    return answer.strip().lower()

In [23]:
wino_df['answer_text'] = wino_df.apply(lambda row: get_answer_wino(row), axis=1)
wino_df.head()

Unnamed: 0,question,option1,option2,answer,prompt,id,answer_text
0,John moved the couch from the garage to the ba...,garage,backyard,1,Question: - John moved the couch from the gara...,GO3wkLp9TC,garage
1,The doctor diagnosed Justin with bipolar and R...,Justin,Robert,2,Question: - The doctor diagnosed Justin with b...,IPQZOkoLLz,robert
2,Dennis drew up a business proposal to present ...,Dennis,Logan,1,Question: - Dennis drew up a business proposal...,W85KDUn8gA,dennis
3,Felicia unexpectedly made fried eggs for break...,Felicia,Katrina,2,Question: - Felicia unexpectedly made fried eg...,UIUCGI68OY,katrina
4,My shampoo did not lather easily on my Afro ha...,shampoo,hair,2,Question: - My shampoo did not lather easily o...,WFAUCgvz91,hair


## Removing rows with incorrect answer

In [45]:
def check_answer(id_, answer_text, dataset='arc'):
    if dataset == 'arc':
        parsed_data = arc_parsed_data
    else:
        parsed_data = wino_parsed_data
    if id_ not in parsed_data:
        return False
    data = parsed_data[id_]
    # print(id_)
    model_answer = data['parsed_response']['final_answer']['answer'].strip().lower()
    if model_answer == answer_text:
        return True
    return False

In [46]:
len(arc_df)

1119

In [52]:
wrong_answer_filter = arc_df.apply(lambda row: check_answer(row.id, row.answer_text), axis=1)
arc_df_cleaned = arc_df[wrong_answer_filter]

In [53]:
wrong_answer_filter = wino_df.apply(lambda row: check_answer(row.id, row.answer_text, dataset='wino'), axis=1)
wino_df_cleaned = wino_df[wrong_answer_filter]

In [54]:
len(arc_df_cleaned), len(wino_df_cleaned)

(940, 807)