In [7]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")

import json
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

In [2]:
# Load the SQuAD JSON file
with open('train-v2.0.json', 'r') as file:
    data = json.load(file)

# Flatten the data structure into a list of dictionaries
flattened_data = []

for entry in data['data']:
    for paragraph in entry['paragraphs']:
        context = paragraph['context']
        for qa in paragraph['qas']:
            question = qa['question']
            for answer in qa['answers']:
                flattened_data.append({
                    'context': context,
                    'question': question,
                    'answer': answer['text'],
                    'answer_start': answer['answer_start']
                })

# Convert the flattened data into a pandas DataFrame
df = pd.DataFrame(flattened_data)

# Preview the first few rows of the DataFrame
print(df.head())

# Convert the DataFrame to a HuggingFace Dataset
dataset = Dataset.from_pandas(df)

# Preview a sample from the Dataset
print(dataset[0])


                                             context  \
0  Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...   
1  Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...   
2  Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...   
3  Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...   
4  Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...   

                                            question               answer  \
0           When did Beyonce start becoming popular?    in the late 1990s   
1  What areas did Beyonce compete in when she was...  singing and dancing   
2  When did Beyonce leave Destiny's Child and bec...                 2003   
3      In what city and state did Beyonce  grow up?        Houston, Texas   
4         In which decade did Beyonce become famous?           late 1990s   

   answer_start  
0           269  
1           207  
2           526  
3           166  
4           276  
{'context': 'Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 19

In [8]:
# Load the pre-trained BERT model and tokenizer
model_name = "distilbert-base-uncased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name, force_download=True)
model = AutoModelForQuestionAnswering.from_pretrained(model_name, force_download=True)



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

In [9]:
# Create a pipeline for question answering
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

In [10]:
# Use the first row of the dataset as the context
context = dataset[0]['context']
question = dataset[0]['question']

# Get the answer using the QA pipeline
answer = qa_pipeline(question=question, context=context)
print(f"Question: {question}")
print(f"Answer: {answer['answer']}")

Question: When did Beyonce start becoming popular?
Answer: late 1990s


In [11]:
# Test the model on multiple questions
for i in range(5):  # Test on the first 5 examples in the dataset
    context = dataset[i]['context']
    question = dataset[i]['question']
    answer = qa_pipeline(question=question, context=context)
    print(f"Question: {question}")
    print(f"Answer: {answer['answer']}\n")

Question: When did Beyonce start becoming popular?
Answer: late 1990s

Question: What areas did Beyonce compete in when she was growing up?
Answer: singing and dancing

Question: When did Beyonce leave Destiny's Child and become a solo singer?
Answer: 2003

Question: In what city and state did Beyonce  grow up? 
Answer: Houston

Question: In which decade did Beyonce become famous?
Answer: 1990s

