<a href="https://colab.research.google.com/github/erikapaceep/NLP/blob/main/qa_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

squad_dir = '/content/data/squad'

if not os.path.exists(squad_dir):
  os.mkdir(squad_dir)

In [2]:
url = 'https://rajpurkar.github.io/SQuAD-explorer/dataset/'
files = ['train-v2.0.json', 'dev-v2.0.json']

In [3]:
# write the file in the squad directory
import requests

for file in files:
  # send a get request for each one of the URL
  res = requests.get(url+file)
  # wb as we are writing a binary json file
  with open(os.path.join(squad_dir,file),'wb') as f:
    for chunk in res.iter_content(chunk_size=40):
      f.write(chunk)


In [4]:
import json

with open(os.path.join(squad_dir, files[0]),'rb') as f:
  squad = json.load(f)

In [5]:
def get_QA(squad):
  new_squad = []
  for group in squad['data']:
    for paragraph in group['paragraphs']:
      context = paragraph['context']
      for qa in paragraph['qas']:
        question = qa['question']
        if 'answers' in qa.keys() and len(qa['answers']) > 0 :
          # the answers is always in the 0 index of the qa path list
          answer = qa['answers'][0]['text']
        # in case we have a plausible answer
        elif 'plausible_answers' in qa.keys() and len(qa['plausible_answers'])>0:
          answer = qa['plausible_answers'][0]['text']
        else:
          answer = None
        new_squad.append({
            'question':question,
            'answer':answer,
            'context':context})
  return new_squad

In [6]:
new_squad = get_QA(squad)

In [7]:
with open(os.path.join(squad_dir, 'train.json'),'w') as f:
  json.dump(new_squad, f)

In [8]:
import json

with open(os.path.join(squad_dir, files[1]),'rb') as f:
  squad = json.load(f)

In [9]:
dev_squad = get_QA(squad)

In [10]:
with open(os.path.join(squad_dir,'dev.json'),'w') as f:
  json.dump(dev_squad,f)

In [11]:
with open('data/squad/dev.json','r') as f:
  squad = json.load(f)

In [14]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 27.1 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 48.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 48.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [18]:
from transformers import BertTokenizer, BertForQuestionAnswering

modelname = 'deepset/bert-base-cased-squad2'

# initialize the model
tokenizer = BertTokenizer.from_pretrained(modelname)
model = BertForQuestionAnswering.from_pretrained(modelname)

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/152 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/508 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

In [19]:
from transformers import pipeline

In [20]:
#initialize pipeline
qa = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [27]:
#Q&A are initialize as a list of dictionary
squad[:3]

[{'question': 'In what country is Normandy located?',
  'answer': 'France',
  'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.'},
 {'question': 'When were the Normans in Normandy?',
  'answer': '10th and 11th centuries',
  'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: No

In [23]:
qa ({
    'question':'In what country is Normandy located?',
    'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.'
})

{'score': 0.9995271563529968, 'start': 159, 'end': 166, 'answer': 'France.'}

In [30]:
answers = []

for pair in squad[:5]:
  #predicted answers
  ans = qa({
      'question': pair['question'],
      'context':pair['context']
  })

  answers.append({
      'predicted':ans['answer'],
      'actual': pair['answer']
  })

In [34]:
answers

[{'predicted': 'France.', 'actual': 'France'},
 {'predicted': '10th and 11th centuries', 'actual': '10th and 11th centuries'},
 {'predicted': 'Denmark, Iceland and Norway',
  'actual': 'Denmark, Iceland and Norway'},
 {'predicted': 'Rollo,', 'actual': 'Rollo'},
 {'predicted': '10th', 'actual': '10th century'}]

## Exact Match (EM)

Have we found an exact match between prediceted and true answers.

In [36]:
em = []
for answer in answers:
  if answer['predicted'] == answer['actual']:
    em.append(1)
  else:
    em.append(0)

In [37]:
#accuracy
sum(em)/(len(em))

0.4

In [39]:
#Repeat the same with everything that is not a number or a letter

import re

em = []

for answer in answers:
  pred = re.sub('[^0-9a-z ]','', answer['predicted'].lower())
  actual = re.sub('[^0-9a-z ]','', answer['actual'].lower())
  if pred == actual:
    em.append(1)
  else:
    em.append(0)

In [40]:
#accuracy
sum(em)/(len(em))

0.8

## ROUGE : 
Recall
Oriented
Understanding for
Gisting
Evaluation

To deal with a more fuzzy logic

There are a set of metrics: N L S
and all of these will measure the match between our reference text and the predicted text. 

**Rouge N** : measure the match of n-grams between our predicted model answers and the reference model answers (n-gram is  a group of tokens or words). Unigram we expect a grouping of single words, bigrams represent two words. Paring set of words together.
ROUGE-1, will match single words, ROUGE-2 will match pairs of words, ROOUGE 3- 



In [42]:
!pip install rouge

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [43]:
from rouge import Rouge

In [44]:
model_out = 'hello to the word'
reference = 'hello word'

In [45]:
rouge = Rouge()

In [46]:
rouge.get_scores(model_out, reference)

[{'rouge-1': {'r': 1.0, 'p': 0.5, 'f': 0.6666666622222223},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 1.0, 'p': 0.5, 'f': 0.6666666622222223}}]

ROUGE-2 has terrible performance since there are no matching bigram between the two sentences.

In [47]:
answers

[{'predicted': 'France.', 'actual': 'France'},
 {'predicted': '10th and 11th centuries', 'actual': '10th and 11th centuries'},
 {'predicted': 'Denmark, Iceland and Norway',
  'actual': 'Denmark, Iceland and Norway'},
 {'predicted': 'Rollo,', 'actual': 'Rollo'},
 {'predicted': '10th', 'actual': '10th century'}]

In [51]:
# Apply the ROUGE score to our answers
model_out = [ans['predicted'] for ans in answers]
reference = [ans['actual'] for ans in answers]

In [54]:
rouge.get_scores(model_out, reference)

[{'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}},
 {'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-2': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}},
 {'rouge-1': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-2': {'r': 1.0, 'p': 1.0, 'f': 0.999999995},
  'rouge-l': {'r': 1.0, 'p': 1.0, 'f': 0.999999995}},
 {'rouge-1': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 0.0, 'p': 0.0, 'f': 0.0}},
 {'rouge-1': {'r': 0.5, 'p': 1.0, 'f': 0.6666666622222223},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 0.5, 'p': 1.0, 'f': 0.6666666622222223}}]

In [55]:
# in order to get an avg score across all of our predicted and reference answers
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.7, 'p': 0.8, 'f': 0.7333333294444444},
 'rouge-2': {'r': 0.4, 'p': 0.4, 'f': 0.399999998},
 'rouge-l': {'r': 0.7, 'p': 0.8, 'f': 0.7333333294444444}}

## ROUGE applied to Q&A

In [57]:
from tqdm import tqdm

model_out = []
reference = []

In [59]:
for pair in tqdm(squad[:50], leave=True):
  ans = qa({
      'question':pair['question'],
      'context':pair['context']
  })

  model_out.append(ans['answer'])
  reference.append(pair['answer'])



100%|██████████| 50/50 [01:36<00:00,  1.94s/it]


In [60]:
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.47336561743341393,
  'p': 0.4604519774011299,
  'f': 0.44696301552311685},
 'rouge-2': {'r': 0.24297820823244554,
  'p': 0.2544940934771443,
  'f': 0.2346792553832658},
 'rouge-l': {'r': 0.47336561743341393,
  'p': 0.4604519774011299,
  'f': 0.44696301552311685}}

In [61]:
# define a modifier
clean = re.compile('(?i)[^0-9a-z ]')

model_out = [clean.sub('',text) for text in model_out]
reference = [clean.sub('',text) for text in reference]

In [63]:
rouge.get_scores(model_out, reference, avg=True)

{'rouge-1': {'r': 0.6485068603712673,
  'p': 0.5713551984738425,
  'f': 0.5731400394355463},
 'rouge-2': {'r': 0.31924939467312347,
  'p': 0.30170958984518303,
  'f': 0.28552671276674035},
 'rouge-l': {'r': 0.6485068603712673,
  'p': 0.5713551984738425,
  'f': 0.5731400394355463}}