## Race Dataset - MCQ Questions

In [3]:
import pandas as pd
import openai
from datasets import load_dataset
from dotenv import load_dotenv
import os

In [4]:
# Load environment variable
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

openai.api_key = openai_api_key

In [2]:
RaceDataset = load_dataset("race", "all")

In [3]:
RaceDataset

DatasetDict({
    test: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 4934
    })
    train: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 87866
    })
    validation: Dataset({
        features: ['example_id', 'article', 'answer', 'question', 'options'],
        num_rows: 4887
    })
})

In [4]:
RaceDataset.shape

{'test': (4934, 5), 'train': (87866, 5), 'validation': (4887, 5)}

In [5]:
df = pd.DataFrame(RaceDataset["train"])

In [6]:
randomQuestions = df.sample(n=50)

In [7]:
randomQuestions

Unnamed: 0,example_id,article,answer,question,options
46394,high16113.txt,"Cyberspace, data superhighway, multimedia, for...",B,The author's attitude towards the communicatio...,"[positive, critical, indifferent, tolerant]"
4253,high14123.txt,"Pocket Tape-Recorders ""Family and Home Magazin...",D,Which machine is unsuitable for general use?,"[Pearlcorder S702, Imperial OEM MC7, Sony M400..."
1902,high3330.txt,I usually doubt about any research that conclu...,B,"What does the author mean by saying "" we can't...",[It's impossible to slow down the pace of the ...
12257,high10056.txt,When someone told 85-year-old Johnny Long that...,A,What Long got most from bands was _ .,"[pleasure, health, wealth, honor]"
57181,high18400.txt,There is a definite possibility that the clima...,A,"According to the text, the climate of the worl...","[be getting colder, be getting warmer, remain ..."
65110,middle7009.txt,"When an earthquake hit a small town,many house...",A,What's the best title of this passage?,"[The Spirit of Giving., A terrible earthquake...."
17318,high4197.txt,Market analysts in the United States have rece...,C,What does this passage mainly talk about?,"[Nobody likes luxury goods any more., Luxury g..."
59797,high2534.txt,"Paula Radcliffe, chasing a third London mara...",D,Radcliffe's failure in Athens made her _ .,"[develop respect for Liz., love people around ..."
82333,middle2043.txt,Wang Tao is a little boy. He often goes for a ...,D,Who can swim like a fish?,"[Wang Tao., Wang Tao's father., Li Qiang., Li ..."
55289,high22729.txt,Not drying your hands thoroughly after washing...,D,"According to the research, rubbing our hands w...",[reduce as many as bacteria as washing our han...


In [9]:
def getGPTAnswers(row):
    article = row['article']
    question = row['question']
    options = row['options']
    
    prompt = f"Article: {article}\nQuestion: {question}\nOptions: {', '.join(options)}\nAnswer:"
    
    response = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt=prompt, max_tokens=100)
    
    answer = response.choices[0].text.strip()
    
    return answer

In [12]:
randomQuestions['gpt_3.5_answers'] = randomQuestions.apply(getGPTAnswers, axis=1)

In [13]:
randomQuestions

Unnamed: 0,example_id,article,answer,question,options,gpt_3.5_answers
46394,high16113.txt,"Cyberspace, data superhighway, multimedia, for...",B,The author's attitude towards the communicatio...,"[positive, critical, indifferent, tolerant]",critical
4253,high14123.txt,"Pocket Tape-Recorders ""Family and Home Magazin...",D,Which machine is unsuitable for general use?,"[Pearlcorder S702, Imperial OEM MC7, Sony M400...",Philips 585
1902,high3330.txt,I usually doubt about any research that conclu...,B,"What does the author mean by saying "" we can't...",[It's impossible to slow down the pace of the ...,The social reality children are facing cannot ...
12257,high10056.txt,When someone told 85-year-old Johnny Long that...,A,What Long got most from bands was _ .,"[pleasure, health, wealth, honor]",pleasure
57181,high18400.txt,There is a definite possibility that the clima...,A,"According to the text, the climate of the worl...","[be getting colder, be getting warmer, remain ...",be getting colder
65110,middle7009.txt,"When an earthquake hit a small town,many house...",A,What's the best title of this passage?,"[The Spirit of Giving., A terrible earthquake....",The Spirit of Giving.
17318,high4197.txt,Market analysts in the United States have rece...,C,What does this passage mainly talk about?,"[Nobody likes luxury goods any more., Luxury g...","Tech products become the new ""luxury goods""."
59797,high2534.txt,"Paula Radcliffe, chasing a third London mara...",D,Radcliffe's failure in Athens made her _ .,"[develop respect for Liz., love people around ...",face criticism calmly.
82333,middle2043.txt,Wang Tao is a little boy. He often goes for a ...,D,Who can swim like a fish?,"[Wang Tao., Wang Tao's father., Li Qiang., Li ...",Li Qiang's father.
55289,high22729.txt,Not drying your hands thoroughly after washing...,D,"According to the research, rubbing our hands w...",[reduce as many as bacteria as washing our han...,cause bacteria to spread to other surfaces


In [14]:
randomQuestions.to_csv("RaceAnalysis.csv", index=False)