In [1]:
%%capture

!pip install transformers

In [2]:
%%time

# Importing some necessary libraries and loading a pre-existing fine-tuned model

import torch
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering


tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-distilled-squad')
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')
model.eval()

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

CPU times: user 4.59 s, sys: 1.59 s, total: 6.18 s
Wall time: 10.9 s


DistilBertForQuestionAnswering(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            

In [3]:
%%time

import string, re

# Some useful functions for the evaluation of SQuAD 1.0


def predict(context, query):
    inputs = tokenizer.encode_plus(query, context, return_tensors='pt')
    outputs = model(**inputs)
    answer_start = torch.argmax(outputs[0])
    answer_end = torch.argmax(outputs[1]) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(\
                                                                inputs['input_ids'][0][answer_start : answer_end]))
    return answer
    

'''
Removing articles and punctuation and standardizing whitespace
'''
def normalize_text(s):
    
    def remove_articles(text):
        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
        return re.sub(regex, ' ', text)
        
    def white_space_fix(text):
        return ' '.join(text.split())
        
    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
        
    def lower(text):
        return text.lower()
        
    return white_space_fix(remove_articles(remove_punc(lower(s))))
    
    
def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))
    

def compute_f1(prediction,truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()
    # if either the prediction or the truth is no-answer, then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)
    common_tokens = set(pred_tokens) & set(truth_tokens)
    if len(common_tokens) == 0:
        return 0
    precision = len(common_tokens) / len(pred_tokens)
    recall = len(common_tokens) / len(truth_tokens)
    return 2 * precision * recall / (precision + recall)
    
    
def give_an_answer(context, query, answer):
    prediction = predict(context, query)
    em_score = compute_exact_match(prediction, answer)
    f1_score = compute_f1(prediction, answer)
    print (f'Question : {query}')
    print (f'Prediction : {prediction}')
    print (f'True Answer : {answer}')
    print (f'EM : {em_score}')
    print (f'F1 : {f1_score}')
    print ('\n')
    return f1_score

CPU times: user 13 µs, sys: 0 ns, total: 13 µs
Wall time: 18.6 µs


In [4]:
%%time

context = "Hi! My name is Alexa and I am 21 years old. I used to live in Peristeri of Athens, but now I moved on in Kaisariani of Athens."

queries = ["How old is Alexa?",
           "Where does Alexa live now?",
           "Where did Alexa use to live?"
          ]
answers = ["21",
           "Kaisariani of Athens",
           "Peristeri of Athens"
          ]

for q,a in zip(queries,answers):
    give_an_answer(context,q,a)

Question : How old is Alexa?
Prediction : 21
True Answer : 21
EM : 1
F1 : 1.0


Question : Where does Alexa live now?
Prediction : kaisariani of athens
True Answer : Kaisariani of Athens
EM : 1
F1 : 1.0


Question : Where did Alexa use to live?
Prediction : peristeri of athens
True Answer : Peristeri of Athens
EM : 1
F1 : 1.0


CPU times: user 461 ms, sys: 10.1 ms, total: 471 ms
Wall time: 294 ms


In [5]:
%%time

context = """ Queen are a British rock band formed in London in 1970. Their classic line-up was 
              Freddie Mercury (lead vocals, piano), Brian May (guitar, vocals), Roger Taylor 
              (drums, vocals) and John Deacon (bass). Their earliest works were influenced by 
              progressive rock, hard rock and heavy metal, but the band gradually ventured into 
              more conventional and radio-friendly works by incorporating further styles, such as 
              arena rock and pop rock. """

queries = ["When was Queen found?", 
           "Who were the classic members of Queen band?", 
           "What kind of band they are?"]

answers = ["1970", 
           "Freddie Mercury, Brian May, Roger Taylor and John Deacon", 
           "rock"]


for q,a in zip(queries, answers):
    give_an_answer(context, q, a)

Question : When was Queen found?
Prediction : 1970
True Answer : 1970
EM : 1
F1 : 1.0


Question : Who were the classic members of Queen band?
Prediction : freddie mercury ( lead vocals , piano ) , brian may ( guitar , vocals ) , roger taylor ( drums , vocals ) and john deacon ( bass )
True Answer : Freddie Mercury, Brian May, Roger Taylor and John Deacon
EM : 0
F1 : 0.6923076923076924


Question : What kind of band they are?
Prediction : queen are a british rock
True Answer : rock
EM : 0
F1 : 0.4


CPU times: user 671 ms, sys: 654 µs, total: 671 ms
Wall time: 337 ms


In [6]:
%%time

context = """ Mount Olympus is the highest mountain in Greece. It is part of the Olympus massif near 
              the Gulf of Thérmai of the Aegean Sea, located in the Olympus Range on the border 
              between Thessaly and Macedonia, between the regional units of Pieria and Larissa, 
              about 80 km (50 mi) southwest from Thessaloniki. Mount Olympus has 52 peaks and deep 
              gorges. The highest peak, Mytikas, meaning "nose", rises to 2917 metres (9,570 ft). 
              It is one of the highest peaks in Europe in terms of topographic prominence. """

queries = ["How many metres high is Mount Olympus?", 
           "What famous landmarks are near Mount Olympus?",
           "How far away is Olympus from Thessaloniki?"]

answers = ["2917", 
           "Gulf of Thérmai of the Aegean Sea", 
           "80 km (50 mi)"]


for q, a in zip(queries, answers):
    give_an_answer(context, q, a)

Question : How many metres high is Mount Olympus?
Prediction : 2917
True Answer : 2917
EM : 1
F1 : 1.0


Question : What famous landmarks are near Mount Olympus?
Prediction : 52 peaks and deep gorges
True Answer : Gulf of Thérmai of the Aegean Sea
EM : 0
F1 : 0


Question : How far away is Olympus from Thessaloniki?
Prediction : 80 km ( 50 mi ) southwest
True Answer : 80 km (50 mi)
EM : 0
F1 : 0.888888888888889


CPU times: user 786 ms, sys: 1.57 ms, total: 787 ms
Wall time: 395 ms


In [7]:
%%time

context = """ The COVID-19 pandemic, also known as the coronavirus pandemic, is an ongoing pandemic 
              of coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome 
              coronavirus 2 (SARS-CoV-2). It was first identified in December 2019 in Wuhan, China. 
              The World Health Organization declared the outbreak a Public Health Emergency of 
              International Concern in January 2020 and a pandemic in March 2020. As of 6 February 
              2021, more than 105 million cases have been confirmed, with more than 2.3 million deaths 
              attributed to COVID-19. Symptoms of COVID-19 are highly variable, ranging from none to 
              severe illness. The virus spreads mainly through the air when people are near each 
              other.[b] It leaves an infected person as they breathe, cough, sneeze, or speak and 
              enters another person via their mouth, nose, or eyes. It may also spread via 
              contaminated surfaces. People remain infectious for up to two weeks, and can spread 
              the virus even if they do not show symptoms.[9]"""

queries = ["What is COVID-19?",
           "What is caused by COVID-19?",
           "How many cases have been confirmed from COVID-19?",
           "How many deaths have been confirmed from COVID-19?",
           "How is COVID-19 spread?",
           "How long can an infected person remain infected?",
           "Can a infected person spread the virus even if they don't have symptoms?",
           "What do elephants eat?"]

answers = ["an ongoing pandemic of coronavirus disease 2019",
           "severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
           "more than 105 million cases", 
           "more than 2.3 million deaths",
           "mainly through the air when people are near each other. It leaves an infected person as they breathe, cough, sneeze, or speak and enters another person via their mouth, nose, or eyes. It may also spread via contaminated surfaces.", 
           "up to two weeks", 
           "yes", 
           ""]


for q,a in zip(queries,answers):
    give_an_answer(context,q,a)

Question : What is COVID-19?
Prediction : coronavirus disease 2019
True Answer : an ongoing pandemic of coronavirus disease 2019
EM : 0
F1 : 0.6666666666666666


Question : What is caused by COVID-19?
Prediction : severe acute respiratory syndrome coronavirus 2
True Answer : severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)
EM : 0
F1 : 0.923076923076923


Question : How many cases have been confirmed from COVID-19?
Prediction : more than 105 million
True Answer : more than 105 million cases
EM : 0
F1 : 0.888888888888889


Question : How many deaths have been confirmed from COVID-19?
Prediction : 2 . 3 million
True Answer : more than 2.3 million deaths
EM : 0
F1 : 0.25


Question : How is COVID-19 spread?
Prediction : mainly through the air when people are near each other . [ b ] it leaves an infected person as they breathe , cough , sneeze , or speak and enters another person via their mouth , nose , or eyes . it may also spread via contaminated surfaces
True Answer : mainly

In [8]:
%%time

context = """ Harry Potter is a series of seven fantasy novels written by British author, J. K. Rowling. 
              The novels chronicle the lives of a young wizard, Harry Potter, and his friends Hermione Granger and 
              Ron Weasley, all of whom are students at Hogwarts School of Witchcraft and Wizardry. 
              The main story arc concerns Harry's struggle against Lord Voldemort, a dark wizard who 
              intends to become immortal, overthrow the wizard governing body known as the Ministry of 
              Magic and subjugate all wizards and Muggles (non-magical people). Since the release of 
              the first novel, Harry Potter and the Philosopher's Stone, on 26 June 1997, the books 
              have found immense popularity, positive reviews, and commercial success worldwide. They 
              have attracted a wide adult audience as well as younger readers and are often considered 
              cornerstones of modern young adult literature.[2] As of February 2018, the books have 
              sold more than 500 million copies worldwide, making them the best-selling book series in 
              history, and have been translated into eighty languages.[3] The last four books 
              consecutively set records as the fastest-selling books in history, with the final 
              installment selling roughly eleven million copies in the United States within twenty-four 
              hours of its release.  """

queries = ["Who wrote Harry Potter's novels?",
           "Who are Harry Potter's friends?",
           "Who is the enemy of Harry Potter?",
           "What are Muggles?",
           "Which is the name of Harry Poter's first novel?",
           "When did the first novel release?",
           "Who was attracted by Harry Potter novels?",
           "How many languages Harry Potter has been translated into? "]

answers = ["J. K. Rowling",
           "Hermione Granger and Ron Weasley",
           "Lord Voldemort",
           "non-magical people",
           "Harry Potter and the Philosopher's Stone",
           "26 June 1997",
           "a wide adult audience as well as younger readers",
           "eighty"]


for q,a in zip(queries,answers):
    give_an_answer(context,q,a)     

Question : Who wrote Harry Potter's novels?
Prediction : j . k . rowling
True Answer : J. K. Rowling
EM : 1
F1 : 1.0


Question : Who are Harry Potter's friends?
Prediction : hermione granger and ron weasley
True Answer : Hermione Granger and Ron Weasley
EM : 1
F1 : 1.0


Question : Who is the enemy of Harry Potter?
Prediction : lord voldemort
True Answer : Lord Voldemort
EM : 1
F1 : 1.0


Question : What are Muggles?
Prediction : non - magical people
True Answer : non-magical people
EM : 0
F1 : 0.4


Question : Which is the name of Harry Poter's first novel?
Prediction : harry potter and the philosopher ' s stone
True Answer : Harry Potter and the Philosopher's Stone
EM : 0
F1 : 0.7272727272727272


Question : When did the first novel release?
Prediction : 26 june 1997
True Answer : 26 June 1997
EM : 1
F1 : 1.0


Question : Who was attracted by Harry Potter novels?
Prediction : wide adult audience as well as younger readers
True Answer : a wide adult audience as well as younger reader

In [9]:
%%time

# Importing some of the necessary libraries

import json
from pathlib import Path
import time

CPU times: user 14 µs, sys: 1 µs, total: 15 µs
Wall time: 20.3 µs


In [10]:
%%time

# Retrieval and Storage of the Data

def generate_texts_queries_answers(path):
    with open(path, 'rb') as f:
        squad_dict = json.load(f)
    texts, queries, answers = [], [], []
    for group in squad_dict['data']:
        for passage in group['paragraphs']:
            context = passage['context']
            for qa in passage['qas']:
                question = qa['question']
                for answer in qa['answers']:
                    texts.append(context)
                    queries.append(question)
                    answers.append(answer)
    return texts, queries, answers
    

validation_path = Path('../input/stanford-question-answering-dataset/dev-v1.1.json')
validation_texts, validation_queries, validation_answers = generate_texts_queries_answers(validation_path)

print (len(validation_texts))
print (len(validation_queries))
print (len(validation_answers))

34726
34726
34726
CPU times: user 66.9 ms, sys: 17 ms, total: 83.8 ms
Wall time: 141 ms


In [11]:
%%time

# Selecting only unique queries to get a fair value of the average F1 Score

unique_queries = set()
unique_validation_contexts, unique_validation_queries, unique_validation_answers = [], [], []

for i in range(len(validation_queries)):
    if validation_queries[i] in unique_queries:
        continue
    unique_queries.add(validation_queries[i])
    unique_validation_contexts.append(validation_texts[i])
    unique_validation_queries.append(validation_queries[i])
    unique_validation_answers.append(validation_answers[i]['text'])


print (len(unique_validation_contexts))
print (len(unique_validation_queries))
print (len(unique_validation_answers))

10539
10539
10539
CPU times: user 17.8 ms, sys: 0 ns, total: 17.8 ms
Wall time: 17.8 ms


In [12]:
%%time

# Application of Evaluation Metrics on the Validation Dataset of SQuAD 1.0

from transformers import pipeline

qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer)
avg_f1_score, avg_score, i = 0.0, 0.0, 0

for context, question, answer in zip(unique_validation_contexts, unique_validation_queries, unique_validation_answers):
    print ('Query Number', i)
    try:
        f1 = give_an_answer(context, question, answer)
        avg_f1_score += f1
        i += 1
        result = qa_pipeline(question=question, context=context)
        score = result['score']
        print ('Pipeline Score :', score)
        avg_score += score
    except Exception as e:
        continue
    
avg_f1_score /= i
avg_score /= i
print ('Mean of all the F1-Scores :', avg_f1_score)
print ('Mean of all the Scores :', avg_score)

Query Number 0
Question : Which NFL team represented the AFC at Super Bowl 50?
Prediction : denver broncos
True Answer : Denver Broncos
EM : 1
F1 : 1.0


Pipeline Score : 0.9883761405944824
Query Number 1
Question : Which NFL team represented the NFC at Super Bowl 50?
Prediction : carolina panthers
True Answer : Carolina Panthers
EM : 1
F1 : 1.0


Pipeline Score : 0.9976987838745117
Query Number 2
Question : Where did Super Bowl 50 take place?
Prediction : levi ' s stadium
True Answer : Santa Clara, California
EM : 0
F1 : 0


Pipeline Score : 0.4958169460296631
Query Number 3
Question : Which NFL team won Super Bowl 50?
Prediction : 
True Answer : Denver Broncos
EM : 0
F1 : 0


Pipeline Score : 0.25972631573677063
Query Number 4
Question : What color was used to emphasize the 50th anniversary of the Super Bowl?
Prediction : gold
True Answer : gold
EM : 1
F1 : 1.0


Pipeline Score : 0.9760332703590393
Query Number 5
Question : What was the theme of Super Bowl 50?
Prediction : arabic num

Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors


Pipeline Score : 0.9849967360496521
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Query Number 4128
Question : The legislative body, the Council, are made up of what type of individuals?
Prediction : different ministers of the member states
True Answer : different ministers of the member states
EM : 1
F1 : 1.0


Pipeline Score : 0.8483889102935791
Query Number 4129
Question : Who is currently the President of the Council?
Prediction : donald tusk
True Answer : Donald Tusk
EM : 1
F1 : 1.0


Pipeline Score : 0.9255362749099731
Query Number 4130
Question : How are the votes weighted to ensure that smaller states aren't dominated by larger ones?
Prediction : inversely to member state size
True Answer : inversely
EM : 0
F1 : 0.33333333333333337


Pipeline Score : 0.26799672842025757
Query Number 4131
Question : What are the total number of votes to be counted during the voting