## The Stanford Question Answering Dataset

In [1]:
import json

In [2]:
with open('../data/dev-v2.0.json', 'rb') as f:
    squad = json.load(f) 

In [4]:
squad.keys()

dict_keys(['version', 'data'])

In [9]:
squad['data'][0]

{'title': 'Normans',
 'paragraphs': [{'qas': [{'question': 'In what country is Normandy located?',
     'id': '56ddde6b9a695914005b9628',
     'answers': [{'text': 'France', 'answer_start': 159},
      {'text': 'France', 'answer_start': 159},
      {'text': 'France', 'answer_start': 159},
      {'text': 'France', 'answer_start': 159}],
     'is_impossible': False},
    {'question': 'When were the Normans in Normandy?',
     'id': '56ddde6b9a695914005b9629',
     'answers': [{'text': '10th and 11th centuries', 'answer_start': 94},
      {'text': 'in the 10th and 11th centuries', 'answer_start': 87},
      {'text': '10th and 11th centuries', 'answer_start': 94},
      {'text': '10th and 11th centuries', 'answer_start': 94}],
     'is_impossible': False},
    {'question': 'From which countries did the Norse originate?',
     'id': '56ddde6b9a695914005b962a',
     'answers': [{'text': 'Denmark, Iceland and Norway', 'answer_start': 256},
      {'text': 'Denmark, Iceland and Norway', 'answer

## processing SQuAD data

In [25]:
new_squad = []

In [26]:
for group in squad['data']:
    for paragraph in group['paragraphs']:
        context = paragraph['context']
        for qa_pair in paragraph['qas']:
            question = qa_pair['question']
            if 'answers' in qa_pair.keys() and len(qa_pair['answers']) > 0:
                answer = qa_pair['answers'][0]['text']
            elif 'plausible_answers' in qa_pair.keys() and len(qa_pair['plausible_answers']) > 0:
                answer = qa_pair['plausible_answers'][0]['text']  
            else:
                answer = None
            new_squad.append({'context': context, 'question': question, 'answer': answer})          
              


In [27]:
new_squad[:4]

[{'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.',
  'question': 'In what country is Normandy located?',
  'answer': 'France'},
 {'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a r

In [15]:
new_squad[-4:]

[{'context': 'The pound-force has a metric counterpart, less commonly used than the newton: the kilogram-force (kgf) (sometimes kilopond), is the force exerted by standard gravity on one kilogram of mass. The kilogram-force leads to an alternate, but rarely used unit of mass: the metric slug (sometimes mug or hyl) is that mass that accelerates at 1 m·s−2 when subjected to a force of 1 kgf. The kilogram-force is not a part of the modern SI system, and is generally deprecated; however it still sees use for some purposes as expressing aircraft weight, jet thrust, bicycle spoke tension, torque wrench settings and engine output torque. Other arcane units of force include the sthène, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.',
  'question': 'What does not have a metric counterpart?',
  'answer': 'pound-force'},
 {'context': 'The pound-force has a metric counterpart, less commonly used than the newton: the kilogram-force (kgf) (sometimes kilopond), is the fo

In [28]:
with open ('../data/squad.json', 'w') as f:
    json.dump(new_squad, f)

## Training data with Match case

In [21]:
match "test":
    case "test":
        print("True")

True


In [22]:
# initialize list where we will place all of our data
new_squad = []

# we need to loop through groups -> paragraphs -> qa_pairs
for group in squad['data']:
    for paragraph in group['paragraphs']:
        # we pull out the context from here
        context = paragraph['context']
        for qa_pair in paragraph['qas']:
            # we pull out the question
            question = qa_pair['question']
            # now the NEW match-case logic to check if we have 'answers' or 'plausible_answers'
            match qa_pair:
                case {'answers': [{'text': answer}]}:
                    # this will be truthy IF the qa_pair dictionary contains a 'answers' key
                    # which in turn contains a list containing a dictionary with a 'text' key
                    # and any value mapping to this 'text' key is assigned to the answer variable
                    pass  # because the case pattern assigns 'answer' for us, we pass
                case {'plausible_answers': [{'text': answer}]}:
                    # we perform same check but for 'plausible_answers'
                    pass
                case _:
                    # this is our catchall, we will set answer to None
                    answer = None
            # append dictionary sample to parsed squad
            new_squad.append({
                'question': question,
                'answer': answer,
                'context': context
            })

In [24]:
new_squad[:2]

[{'question': 'In what country is Normandy located?',
  'answer': None,
  'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.'},
 {'question': 'When were the Normans in Normandy?',
  'answer': None,
  'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people w

## Q&A Model (Reader model)

In [29]:
with open('../data/squad.json', 'r') as f:
    squad=json.load(f)

In [30]:
squad

[{'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.',
  'question': 'In what country is Normandy located?',
  'answer': 'France'},
 {'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a r

In [31]:
from transformers import BertTokenizer, BertForQuestionAnswering

In [32]:
modelname ="deepset/bert-base-cased-squad2"

In [33]:
tokenizer = BertTokenizer.from_pretrained(modelname)
model = BertForQuestionAnswering.from_pretrained(modelname)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/152 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/508 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

In [34]:
from transformers import pipeline

In [35]:
qa = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [36]:
squad[:2]

[{'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.',
  'question': 'In what country is Normandy located?',
  'answer': 'France'},
 {'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a r

In [37]:
qa({'question': squad[0]['question'], 'context': squad[0]['context']})

{'score': 0.9995271563529968, 'start': 159, 'end': 166, 'answer': 'France.'}

In [38]:
squad[0]['answer']

'France'

In [41]:
# we will intialize a list for answers
answers = []

for pair in squad[:20]:
    # pass in our question and context to return an answer
    ans = qa({
        'question': pair['question'],
        'context': pair['context']
    })
    # append predicted answer and real to answers list
    answers.append({
        'predicted': ans['answer'],
        'true': pair['answer']
    })

In [42]:
answers

[{'predicted': 'France.', 'true': 'France'},
 {'predicted': '10th and 11th centuries', 'true': '10th and 11th centuries'},
 {'predicted': 'Denmark, Iceland and Norway',
  'true': 'Denmark, Iceland and Norway'},
 {'predicted': 'Rollo,', 'true': 'Rollo'},
 {'predicted': '10th', 'true': '10th century'},
 {'predicted': '10th and 11th centuries', 'true': 'Normans'},
 {'predicted': 'France.', 'true': 'Normandy'},
 {'predicted': 'King Charles III of West Francia.', 'true': 'Rollo'},
 {'predicted': 'in the first half of the 10th century,',
  'true': '10th century'},
 {'predicted': 'William the Conqueror,', 'true': 'William the Conqueror'},
 {'predicted': 'Richard I', 'true': 'Richard I'},
 {'predicted': 'Christian piety,', 'true': 'Catholic'},
 {'predicted': 'political, cultural and military',
  'true': 'political, cultural and military'},
 {'predicted': 'The Normans', 'true': 'Normans'},
 {'predicted': 'Richard I', 'true': 'Normans'},
 {'predicted': 'Richard I', 'true': 'Richard I'},
 {'predi

## metrics for Language Models

### Exact match

In [46]:
import re
em  = []

for ans in answers:
    pred = re.sub(r'[^0-9a-z]','',ans['predicted']).lower()
    true = re.sub(r'[^0-9a-z]','',ans['true']).lower()

    # we will use a ternary operator to assign 1 if predicted == true, else 0
    em.append(1) if pred == true else em.append(0)

In [47]:
sum(em)/len(em)

0.5

## ROUGE (RECALL ORIENTED UNDERSTUDY FOR GIST EVALUATION)

In [48]:
!pip install rouge
!pip install rouge-score

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py): started
  Building wheel for rouge-score (setup.py): finished with status 'done'
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24972 sha256=50f6cc66ef9d1e95a759ee2cf96b1692301231b5b955952bca10f5034d42f8f5
  Stored in directory: c:\users\abhilash\appdata\local\pip\cache\wheels\5f\dd\89\461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [49]:
from rouge_score import rouge_scorer
from rouge import Rouge

In [50]:
rouge = Rouge()

In [54]:
answers[0]

{'predicted': 'France.', 'true': 'France'}

In [52]:
rouge.get_scores(answers[0]['predicted'], answers[0]['true'])

[{'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}}]

In [58]:
answers[4]

{'predicted': '10th', 'true': '10th century'}

In [59]:
rouge.get_scores(answers[4]['predicted'], answers[4]['true'])

[{'rouge-1': {'r': 0.5, 'p': 1.0, 'f': 0.6666666622222223},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 0.5, 'p': 1.0, 'f': 0.6666666622222223}}]

In [64]:
model_out = [answer['predicted'] for answer in answers]

In [65]:
model_out

['France.',
 '10th and 11th centuries',
 'Denmark, Iceland and Norway',
 'Rollo,',
 '10th',
 '10th and 11th centuries',
 'France.',
 'King Charles III of West Francia.',
 'in the first half of the 10th century,',
 'William the Conqueror,',
 'Richard I',
 'Christian piety,',
 'political, cultural and military',
 'The Normans',
 'Richard I',
 'Richard I',
 'Duchy of Normandy,',
 '"Norseman, Viking".',
 '9th century)',
 '"Normans"']

In [66]:
reference = [answer['true'] for answer in answers]

In [67]:
reference

['France',
 '10th and 11th centuries',
 'Denmark, Iceland and Norway',
 'Rollo',
 '10th century',
 'Normans',
 'Normandy',
 'Rollo',
 '10th century',
 'William the Conqueror',
 'Richard I',
 'Catholic',
 'political, cultural and military',
 'Normans',
 'Normans',
 'Richard I',
 'Antioch',
 'Viking',
 '9th century',
 '"Normans"']

In [68]:
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.5083333333333334,
  'p': 0.4904761904761905,
  'f': 0.48611110824382714},
 'rouge-2': {'r': 0.275, 'p': 0.275, 'f': 0.27499999849999995},
 'rouge-l': {'r': 0.5083333333333334,
  'p': 0.4904761904761905,
  'f': 0.48611110824382714}}

In [69]:
from tqdm import tqdm

model_out = []
reference = []

for pair in tqdm(squad[:50], leave=True):
    ans = qa({
        'question': pair['question'],
        'context': pair['context']
    })
    model_out.append(ans['answer'])
    reference.append(pair['answer'])

100%|██████████| 50/50 [05:25<00:00,  6.50s/it]


In [70]:
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.47857142857142854,
  'p': 0.4604761904761905,
  'f': 0.44963858099752485},
 'rouge-2': {'r': 0.24671428571428575,
  'p': 0.26030303030303026,
  'f': 0.23692152155225366},
 'rouge-l': {'r': 0.47857142857142854,
  'p': 0.4604761904761905,
  'f': 0.44963858099752485}}

In [72]:
scores = rouge.get_scores(model_out, reference)

In [79]:
print(model_out[4], '|', reference[4], '|', scores[4]['rouge-l']['f'] )

10th | 10th century | 0.6666666622222223


In [80]:
clean = re.compile('(?i)[^0-9a-z ]')

model_out = [clean.sub('', ans).lower() for ans in model_out]
reference = [clean.sub('', ans).lower() for ans in reference]

In [81]:
scores = rouge.get_scores(model_out, reference)

In [82]:
print(model_out[4], '|', reference[4], '|', scores[4]['rouge-l']['f'] )

10th | 10th century | 0.6666666622222223


In [83]:
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.6552380952380952,
  'p': 0.5684848484848485,
  'f': 0.574083024869747},
 'rouge-2': {'r': 0.3167142857142857,
  'p': 0.31316017316017314,
  'f': 0.29192152130850363},
 'rouge-l': {'r': 0.6552380952380952,
  'p': 0.5684848484848485,
  'f': 0.574083024869747}}

In [86]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)


In [87]:
scores = [scorer.score(model, ref) for model, ref in zip(model_out, reference)]

In [92]:
scores

[{'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0),
  'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)},
 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0),
  'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)},
 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0),
  'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)},
 {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0),
  'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)},
 {'rouge1': Score(precision=0.5, recall=1.0, fmeasure=0.6666666666666666),
  'rougeL': Score(precision=0.5, recall=1.0, fmeasure=0.6666666666666666)},
 {'rouge1': Score(precision=0.0, recall=0.0, fmeasure=0.0),
  'rougeL': Score(precision=0.0, recall=0.0, fmeasure=0.0)},
 {'rouge1': Score(precision=0.0, recall=0.0, fmeasure=0.0),
  'rougeL': Score(precision=0.0, recall=0.0, fmeasure=0.0)},
 {'rouge1': Score(precision=0.0, recall=0.0, fmeasure=0.0),
  'rougeL': Score(precision=0.0, recall=0.0, fmeasure=0.0)},
 {