In [1]:
!pip install transformers datasets -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m94.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m74.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
import re
import json
import string
import time
import torch
from torch.utils.data import DataLoader
from pathlib import Path

from transformers import AutoTokenizer, AdamW, BertForQuestionAnswering, AutoModel

## Define Tokenizer & Load Model

In [4]:
MODEL_CHECKPOINT = "fahmiaziz/bert-squad-v2"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
model = BertForQuestionAnswering.from_pretrained(MODEL_CHECKPOINT)

model.eval()

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elem

### Utility Functions

In [8]:
def predict(context: str, query: str):
    inputs = tokenizer.encode_plus(
        query, context, return_tensors="pt"
    )
    outputs = model(**inputs)

    answer_start = torch.argmax(outputs[0])   # get the most likely beginning of answer with the argmax of the score
    answer_end = torch.argmax(outputs[1]) + 1
    answer = tokenizer.convert_tokens_to_string(
        tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end])
    )

    return answer

def normalize_text(s):
    # remove article (a, an, the)
    s = re.sub(r'\b(a|an|the)\b', ' ', s, flags=re.UNICODE)

    # remove punctuation
    exclude = set(string.punctuation)
    s = ''.join(ch for ch in s if ch not in exclude)

    # Mengubah teks menjadi huruf kecil dan menghilangkan spasi berlebih
    s = " ".join(s.split()).lower()

    return s


In [70]:
def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))


def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
      return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
      return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return 2 * (prec * rec) / (prec + rec)

def compute_precision(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
      return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
      return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return prec

def compute_recall(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
      return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
      return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return rec

In [93]:
def give_an_answer(context,query,answer):
  prediction = predict(context,query)
  em_score = compute_exact_match(prediction, answer)
  f1_score = compute_f1(prediction, answer)
  prec = compute_precision(prediction, answer)
  rec = compute_recall(prediction, answer)

  print(f"Question: {query}")
  print(f"Prediction: {prediction}")
  print(f"True Answer: {answer}")
  print(f"EM: {em_score}")
  print(f"F1: {f1_score}")
  print(f"Precision: {prec}")
  print(f"Recall: {rec}")

  return f1_score

In [23]:
from datasets import load_dataset

dataset = load_dataset("squad_v2").shuffle(100)

In [82]:
dataset["validation"][100]

{'id': '5ad14d8e645df0001a2d16b9',
 'title': 'European_Union_law',
 'context': 'The Social Charter was subsequently adopted in 1989 by 11 of the then 12 member states. The UK refused to sign the Social Charter and was exempt from the legislation covering Social Charter issues unless it agreed to be bound by the legislation. The UK subsequently was the only member state to veto the Social Charter being included as the "Social Chapter" of the 1992 Maastricht Treaty - instead, an Agreement on Social Policy was added as a protocol. Again, the UK was exempt from legislation arising from the protocol, unless it agreed to be bound by it. The protocol was to become known as "Social Chapter", despite not actually being a chapter of the Maastricht Treaty. To achieve aims of the Agreement on Social Policy the European Union was to "support and complement" the policies of member states. The aims of the Agreement on Social Policy are:',
 'question': 'Who was the only member state not to veto the So

## Question Answering

In [84]:
context = "Batman is a superhero who appears in American comic books published by DC Comics. The character was created by artist Bob Kane and writer Bill Finger, and debuted in the 27th issue of the comic book Detective Comics on March 30, 1939. In the DC Universe continuity, Batman is the alias of Bruce Wayne, a wealthy American playboy, philanthropist, and industrialist who resides in Gotham City."

queries = ["In which comics does Batman appear?", "Who created the character?", "When did Batman debut?"]

answers = ["DC Comics", "Bob Kane", "March 30, 1939"]

for q,a in zip(queries,answers):
  give_an_answer(context,q,a)

Question: In which comics does Batman appear?
Prediction: detective comics
True Answer: DC Comics
EM: 0
F1: 0.5
Precision: 0.5
Recall: 0.5
Question: Who created the character?
Prediction: bob kane
True Answer: Bob Kane
EM: 1
F1: 1.0
Precision: 1.0
Recall: 1.0
Question: When did Batman debut?
Prediction: march 30, 1939.
True Answer: March 30, 1939
EM: 1
F1: 1.0
Precision: 1.0
Recall: 1.0


## 200 Questions from the squad-v2 validation set

In [86]:
# download data
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

--2023-10-13 12:22:35--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4370528 (4.2M) [application/json]
Saving to: ‘dev-v2.0.json’


2023-10-13 12:22:35 (229 MB/s) - ‘dev-v2.0.json’ saved [4370528/4370528]



In [97]:
path = Path('/content/dev-v2.0.json')

# Open .json file
with open(path, 'rb') as f:
    dev_dict = json.load(f)


In [98]:
import random


test_contexts  = []
test_questions = []
test_answers   = []
TOTAL_QA = 100

# Search for context, question and answer in each passage and append to respective lists
for group in dev_dict['data']:
    for passage in group['paragraphs']:
        context = passage['context']
        for qa in passage['qas']:
            question = qa['question']
            for answer in qa['answers']:
                test_contexts.append(context)
                test_questions.append(question)
                test_answers.append(answer)

# Store information triplets in dictionaries and append to a list to randomize
triplets = []
for c,q,a in zip(test_contexts, test_questions, test_answers):
    instance = {}
    instance['context'] = c
    instance['question'] = q
    instance['answer'] = a
    triplets.append(instance)
random_set = random.choices(triplets, k=TOTAL_QA)

# Separate back to lists to use in give_an_answer function
random_contexts  = []
random_questions = []
random_answers   = []

for i in random_set:
    random_contexts.append(i['context'])
    random_questions.append(i['question'])
    random_answers.append(i['answer'])

In [100]:
totalF1 = 0

for c, q, a in zip(random_contexts, random_questions, random_answers):
    f = give_an_answer(c, q, a['text'])
    totalF1 += f
    print("\n=========================\n")

print("\nF1 Score:", totalF1/TOTAL_QA)

Question: What does the Sieve of Eratosthenes do?
Prediction: prime numbers :
True Answer: compute primes
EM: 0
F1: 0
Precision: 0
Recall: 0


Question: How many soldiers were in each Tumen?
Prediction: 4
True Answer: 10,000
EM: 0
F1: 0
Precision: 0
Recall: 0


Question: What future Revolutionary key figures participated in this attack?
Prediction: thomas gage,
True Answer: Washington and Thomas Gage
EM: 0
F1: 0.6666666666666666
Precision: 1.0
Recall: 0.5


Question: What alumni wrote "The Good War"?
Prediction: 
True Answer: Studs Terkel
EM: 0
F1: 0
Precision: 0
Recall: 0


Question: What is the name of an algebraic structure in which addition, subtraction and multiplication are defined?
Prediction: prime elements and irreducible elements. an element p of r is called prime element if it is neither zero nor a unit ( i. e., does not have a multiplicative inverse ) and satisfies the following requirement : given x and y in r such that p divides the product xy,
True Answer: commutative ri