## Race Dataset - MCQ Questions

In [1]:
import pandas as pd
import openai
from datasets import load_dataset
from dotenv import load_dotenv
import os

In [2]:
# Load environment variable
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

openai.api_key = openai_api_key

In [3]:
RaceDataset = load_dataset("race", "all")

In [4]:
RaceDataset

DatasetDict({
    test: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 4934
    })
    train: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 87866
    })
    validation: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 4887
    })
})

In [5]:
RaceDataset.shape

{'test': (4934, 5), 'train': (87866, 5), 'validation': (4887, 5)}

In [6]:
df = pd.DataFrame(RaceDataset["train"])

In [7]:
randomQuestions = df.sample(n=50)

In [8]:
randomQuestions

Unnamed: 0,example_id,article,answer,question,options
58432,high8007.txt,"On March 19, 2013, Malala Yousafzai, a 15-year...",D,We can learn from the passage that Malala _ .,"[is now attending school in Pakistan, is still..."
29788,high13164.txt,Kendall Ciesemier is the founder of charity Ki...,D,Ciesemier's own illness .,"[caused her to lose some close friends, made h..."
3650,high10725.txt,The earth is dying before us yet we sit and wa...,D,What is the main purpose of the passage?,[To criticize those big companies that ruin th...
65085,middle8130.txt,Last Friday a storm swept through two villages...,D,How many homes altogether were destroyed in th...,"[Fourteen., Twenty-one, Twenty-nine., Thirty-s..."
37910,high22198.txt,I had in my hand a sheet of paper with handwri...,A,Why was the author surprised at not recognizin...,[He had worked with his colleague long enough....
70035,middle7394.txt,"Dear students,\nVolunteering has never been an...",B,If you want to become a volunteer in the schoo...,"[get a work visa, complete a volunteer form, c..."
5842,high20603.txt,Parents often avoid talking about the issue of...,A,Whom is the passage written for?,"[Parents., Children., Teachers., Scientists.]"
35406,high12597.txt,I woke up late and had breakfast in a hurry. I...,D,Which of the following is NOT the reason of th...,"[She got up late in the morning., She changed ..."
79326,middle4157.txt,John liked to wear his hair very long. Some of...,C,A barber is a _ .,"[man who looks after the babies, man who makes..."
43557,high21515.txt,"Once home to the world's largest trading port,...",A,We can learn from the passage that _,"[on Christmas Day,you cannot visit Calgary Tra..."


In [13]:
def getGPTAnswers(row):
    article = row['article']
    question = row['question']
    options = row['options']
    
    prompt = f"I want to perform extractive question answering from the article. Only select the correct option exactly in the form of A, B, C, or D. \nArticle: {article}\nQuestion: {question}\nOptions: {', '.join(options)}\nAnswer:"
    
    response = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt=prompt, max_tokens=100)
    
    answer = response.choices[0].text.strip()
    
    return answer

In [14]:
testAnswer = getGPTAnswers(df.loc[0])
testAnswer

'C. teacher'

In [15]:
randomQuestions['gpt_3.5_answers'] = randomQuestions.apply(getGPTAnswers, axis=1)

In [16]:
randomQuestions

Unnamed: 0,example_id,article,answer,question,options,gpt_3.5_answers
58432,high8007.txt,"On March 19, 2013, Malala Yousafzai, a 15-year...",D,We can learn from the passage that Malala _ .,"[is now attending school in Pakistan, is still...",D. cares about the situation in her home country
29788,high13164.txt,Kendall Ciesemier is the founder of charity Ki...,D,Ciesemier's own illness .,"[caused her to lose some close friends, made h...",helped her easily understand other patients' f...
3650,high10725.txt,The earth is dying before us yet we sit and wa...,D,What is the main purpose of the passage?,[To criticize those big companies that ruin th...,D. To call on people to stop ruining the earth
65085,middle8130.txt,Last Friday a storm swept through two villages...,D,How many homes altogether were destroyed in th...,"[Fourteen., Twenty-one, Twenty-nine., Thirty-s...",A. Fourteen.
37910,high22198.txt,I had in my hand a sheet of paper with handwri...,A,Why was the author surprised at not recognizin...,[He had worked with his colleague long enough....,A
70035,middle7394.txt,"Dear students,\nVolunteering has never been an...",B,If you want to become a volunteer in the schoo...,"[get a work visa, complete a volunteer form, c...",B. complete a volunteer form
5842,high20603.txt,Parents often avoid talking about the issue of...,A,Whom is the passage written for?,"[Parents., Children., Teachers., Scientists.]",A. Parents.
35406,high12597.txt,I woke up late and had breakfast in a hurry. I...,D,Which of the following is NOT the reason of th...,"[She got up late in the morning., She changed ...",D
79326,middle4157.txt,John liked to wear his hair very long. Some of...,C,A barber is a _ .,"[man who looks after the babies, man who makes...","C, person who cuts men's hair"
43557,high21515.txt,"Once home to the world's largest trading port,...",A,We can learn from the passage that _,"[on Christmas Day,you cannot visit Calgary Tra...",None of the above.


In [17]:
randomQuestions.to_csv("RaceAnalysis.csv", index=False)