## SQuAD dataset - Crowdworkers posted questions

In [50]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from dotenv import load_dotenv
import os

In [52]:
# Load environment variables from .env
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

openai.api_key = openai_api_key

In [2]:
dataset = load_dataset("squad")

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

In [4]:
dataset.shape

{'train': (87599, 5), 'validation': (10570, 5)}

In [5]:
# Pandas Dataframe 
df = pd.DataFrame(dataset['train'])

In [43]:
# Selecting 10 random rows from the dataframe
randomQuestions = df.sample(n=50)

In [33]:
randomQuestions.head()

Unnamed: 0,id,title,context,question,answers
82499,57312c4e497a881900248c0a,Qing_dynasty,Li Zicheng then led a coalition of rebel force...,Who beat Li Zicheng's army?,"{'text': ['Wu and Dorgon'], 'answer_start': [6..."
41364,57290ee56aef051400154a04,States_of_Germany,"In 1952, following a referendum, Baden, Württe...","Which city did Baden, Württemberg-Baden, and W...","{'text': ['Baden-Württemberg'], 'answer_start'..."
83550,5731882fe6313a140071d049,Mosaic,The ban on figurative depiction was not taken ...,What is the central figure doing in the mosaic...,"{'text': ['playing a lyre'], 'answer_start': [..."
52612,5726e7c5dd62a815002e94a0,Chinese_characters,The Shang dynasty oracle bone script and the Z...,Who work in the style of carving a traditional...,"{'text': ['calligraphers'], 'answer_start': [5..."
41068,5726144d271a42140099d444,Clothing,According to archaeologists and anthropologist...,Where have flax fibers which may have been use...,"{'text': ['a prehistoric cave'], 'answer_start..."


In [15]:
examplePrompt = """I want you to perform extractive question answering. I will send you an example that contains the context, question, and answer. The answer contains the answer and the start character index of the answer. 
Context: Conversely, most heat-treatable alloys are precipitation hardening alloys, which produce the opposite effects that steel does. When heated to form a solution and then cooled quickly, these alloys become much softer than normal, during the diffusionless transformation, and then harden as they age. The solutes in these alloys will precipitate over time, forming intermetallic phases, which are difficult to discern from the base metal. Unlike steel, in which the solid solution separates to form different crystal phases, precipitation hardening alloys separate to form different phases within the same crystal. These intermetallic alloys appear homogeneous in crystal structure, but tend to behave heterogeneous, becoming hard and somewhat brittle.
Question: What makes alloys more softer than normal?
Answer: {'text': ['When heated to form a solution and then cooled quickly'], 'answer_start': [127]}"""


result = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt=examplePrompt, max_tokens=100)

print(result['choices'][0]['text'])




In [16]:
result.choices[0].text

''

In [42]:
# Using OpenAI Api to feed the questions into gpt 3.5 turbo model and get the answer from gpt.

def getGPTAnswers(row):
    question = row['question']
    context = row['context']
    
    prompt = f"Perform extractive question answering from the context. The answer should be in an array containing the answer and the start character index of the answer in format of {{'text': [], 'answer_start': []}}.\nContext: {context}\n\nQuestion: {question}\nAnswer:?"
    
    response = openai.Completion.create(model="gpt-3.5-turbo-instruct", prompt=prompt, max_tokens=100)
    
#   answer = response.choices[0].text.strip()
#   answer_start = context.find(answer)
    
#   return {'text': [answer], 'answer_start': [answer_start]}
    return response.choices[0].text.strip()

In [55]:
# Testing a single row before trying on the randomQuestion dataframe
testAnswer = getGPTAnswers(df.loc[0])

In [56]:
testAnswer

"{\n  'text': ['Saint Bernadette Soubirous'],\n  'answer_start': [255]\n}"

In [44]:
# adding the GPT Answers to the randomQuestions dataframe

randomQuestions['gpt_3.5_answers'] = randomQuestions.apply(getGPTAnswers, axis=1)

In [45]:
randomQuestions

Unnamed: 0,id,title,context,question,answers,gpt_3.5_answers
85068,57321849b9d445190005e7dd,Economy_of_Greece,Greece has tended to lag behind its European U...,What has the been a massive increase in the nu...,"{'text': ['a broadband connection'], 'answer_s...","{'text': ['a broadband connection'], 'answer_s..."
52407,5726e2bf708984140094d4c3,Avicenna,Avicenna was a devout Muslim and sought to rec...,What religion was Avicenna?,"{'text': ['Muslim'], 'answer_start': [22]}","{'text': ['Muslim'], 'answer_start': [143]}"
2834,56cd8dbf62d2951400fa66f4,The_Legend_of_Zelda:_Twilight_Princess,The GameCube and Wii versions feature several ...,What part of the Wii is employed to use weapons?,"{'text': ['Wii Remote'], 'answer_start': [171]}",{'text': ['motion sensors and built-in speaker...
51172,5727b0983acd2414000de9d7,Switzerland,"In many places in Switzerland, household rubbi...",What is the charge for recycling in Switzerland?,"{'text': ['free'], 'answer_start': [357]}","{'text': ['free'], 'answer_start': [186]}"
71514,573195b3e6313a140071d0d7,Steven_Spielberg,"In early 2009, Spielberg shot the first film i...",Where did Adventures of Tintin debut?,"{'text': ['Brussels, Belgium'], 'answer_start'...","{'text': ['Brussels, Belgium'], 'answer_start'..."
47605,57288c223acd2414000dfad2,Myanmar,The government has responded by imposing curfe...,What happened in Rakhine in the summer of 2012...,"{'text': ['On 10 June 2012, a state of emergen...","On 10 June 2012, a state of emergency was decl..."
60406,5727c1593acd2414000debbb,Exhibition_game,National Basketball Association teams play eig...,How many preseason games do NBA teams play?,"{'text': ['eight'], 'answer_start': [43]}","{'text': ['eight'], 'answer_start': [41]}"
8061,56d632371c85041400946fe4,Dog,A Colorado study found bites in children were ...,A dog scratch can lead to what medical condition?,"{'text': ['infections.'], 'answer_start': [396]}","{'text': ['serious infections'], 'answer_start..."
87020,5735ec17012e2f140011a0dd,Hunting,Even as animal domestication became relatively...,Where is hunting still vital?,"{'text': ['marginal climates'], 'answer_start'...",Hunting is still vital in marginal climates
24100,5706a58a75f01819005e7cba,Black_people,"In 2008, the High Court in South Africa ruled ...",Why did some Chinese citizens qualify for bene...,"{'text': ['they were also ""disadvantaged"" by r...","{'text': ['because they were also ""disadvantag..."


In [46]:
randomQuestions.to_csv("SquadAnalysis2.csv", index=False)