# Installation and setup

In [None]:
#!pip install cdqa

In [None]:
# Run this cell only if in colab
#!git clone https://github.com/poornimajoshi/natural-language-questions-capstone.git
#%cd natural-language-questions-capstone/

# Baseline with bert

In [1]:
from cdqa.utils.download import download_squad, download_model

directory = 'models'

# Downloading data
# download_squad(dir=directory)

# Downloading pre-trained BERT fine-tuned on SQuAD 1.1
download_model('bert-squad_1.1', dir=directory)


Downloading trained model...


In [2]:
import pandas as pd
from ast import literal_eval
from cdqa.pipeline import QAPipeline

df = pd.read_csv('dataset.csv', converters={'paragraphs': literal_eval})
df.head()



Unnamed: 0,question_text,short_answers,title,paragraphs
0,which is the most common use of opt-in e-mail ...,a newsletter sent to an advertising firm 's cu...,Email marketing,[Email marketing - Wikipedia Email marketing ...
1,how i.met your mother who is the mother,Tracy McConnell,The Mother ( How I Met Your Mother ),[The Mother ( How I Met Your Mother ) - wikipe...
2,who had the most wins in the nfl,Tom Brady,List of National Football League career quarte...,[List of National Football League career quart...
3,who played mantis guardians of the galaxy 2,Pom Klementieff,Pom Klementieff,[Pom Klementieff - wikipedia Pom Klementieff ...
4,the nashville sound brought a polished and cos...,the use of lush string arrangements with a rea...,Nashville sound,[Nashville sound - wikipedia Nashville sound ...


In [None]:
cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib') # use 'distilbert_qa.joblib' for DistilBERT instead of BERT
cdqa_pipeline.fit_retriever(df=df)

In [4]:
query = 'which is the most common use of opt-in e-mail marketing'
prediction = cdqa_pipeline.predict(query=query)
print('query: {}'.format(query))
print('answer: {}'.format(prediction[0]))
print('title: {}'.format(prediction[1]))
print('paragraph: {}'.format(prediction[2]))

query: which is the most common use of opt-in e-mail marketing
title: Train horn


## Make prediction for each row

In [6]:
import time
start_time = time.time()

eval_bert = []
for i in range(0,10):
    print('------------Example '+str(i)+'----------------')
    print(' ')
    query=df['question_text'][i]
    prediction = cdqa_pipeline.predict(query=query)
    print('query: {}'.format(query))
    print('predicted answer: {}'.format(prediction[0]))
    print('annotated answer: {}'.format(df['short_answers'][i]))
    eval_bert.append([prediction[0],df['short_answers'][i]])
    print('title: {}'.format(prediction[1]))
    print('paragraph: {}'.format(prediction[2]))
    print(' ')
print("--- %s seconds ---" % (time.time() - start_time))

------------Example 0----------------
 
query: which is the most common use of opt-in e-mail marketing
annotated answer: a newsletter sent to an advertising firm 's customers
title: Train horn
 
------------Example 1----------------
 
query: how i.met your mother who is the mother
predicted answer: Tracy McConnell
annotated answer: Tracy McConnell
title: The Mother ( How I Met Your Mother )
paragraph: Tracy McConnell , better known as `` The Mother '' , is the title character from the CBS television sitcom How I Met Your Mother . The show , narrated by Future Ted , tells the story of how Ted Mosby met The Mother . Tracy McConnell appears in 8 episodes from `` Lucky Penny '' to `` The Time Travelers '' as an unseen character ; she was first seen fully in `` Something New '' and was promoted to a main character in season 9 . The Mother is played by Cristin Milioti .  
 
------------Example 2----------------
 
query: who had the most wins in the nfl
predicted answer: the quarterback is th

# Evaluation from Cdqa

In [7]:
#from cdqa.utils.evaluation import evaluate_pipeline

#evaluate_pipeline(cdqa_pipeline, '/content/natural-language-questions-capstone/test_data.jsonl')

# Baseline with distilbert

In [8]:
from cdqa.utils.download import download_squad, download_model

directory = 'models'

# Downloading data
# download_squad(dir=directory)

# Downloading pre-trained BERT fine-tuned on SQuAD 1.1
download_model('distilbert-squad_1.1', dir=directory)


Downloading trained model...
distilbert_qa.joblib already downloaded


In [9]:
import pandas as pd
from ast import literal_eval
from cdqa.pipeline import QAPipeline

df = pd.read_csv('dataset.csv', converters={'paragraphs': literal_eval})
df.head()

Unnamed: 0,question_text,short_answers,title,paragraphs
0,which is the most common use of opt-in e-mail ...,a newsletter sent to an advertising firm 's cu...,Email marketing,[Email marketing - Wikipedia Email marketing ...
1,how i.met your mother who is the mother,Tracy McConnell,The Mother ( How I Met Your Mother ),[The Mother ( How I Met Your Mother ) - wikipe...
2,who had the most wins in the nfl,Tom Brady,List of National Football League career quarte...,[List of National Football League career quart...
3,who played mantis guardians of the galaxy 2,Pom Klementieff,Pom Klementieff,[Pom Klementieff - wikipedia Pom Klementieff ...
4,the nashville sound brought a polished and cos...,the use of lush string arrangements with a rea...,Nashville sound,[Nashville sound - wikipedia Nashville sound ...


In [10]:
cdqa_pipeline = QAPipeline(reader='models/distilbert_qa.joblib') # use 'distilbert_qa.joblib' for DistilBERT instead of BERT
cdqa_pipeline.fit_retriever(df=df)

QAPipeline(reader=BertQA(adam_epsilon=1e-08,
                         bert_model='distilbert-base-uncased',
                         do_lower_case=True, fp16=False,
                         gradient_accumulation_steps=1, learning_rate=5e-05,
                         local_rank=-1, loss_scale=0, max_answer_length=30,
                         n_best_size=20, no_cuda=False,
                         null_score_diff_threshold=0.0, num_train_epochs=3.0,
                         output_dir=None, predict_batch_size=8, seed=42,
                         server_ip='', ser...size=8,
                         verbose_logging=False, version_2_with_negative=False,
                         warmup_proportion=0.1, warmup_steps=0),
           retrieve_by_doc=False,
           retriever=BM25Retriever(b=0.75, floor=None, k1=2.0, lowercase=True,
                                   max_df=0.85, min_df=2, ngram_range=(1, 2),
                                   preprocessor=None, stop_words='english',
           

In [11]:
query = 'which is the most common use of opt-in e-mail marketing'
prediction = cdqa_pipeline.predict(query=query)
print('query: {}'.format(query))
print('answer: {}'.format(prediction[0]))
print('title: {}'.format(prediction[1]))
print('paragraph: {}'.format(prediction[2]))

query: which is the most common use of opt-in e-mail marketing
answer: when approaching a level crossing
title: Train horn


## Make prediction for each row

In [12]:
import time
start_time = time.time()

eval_distilbert = []
for i in range(0,10):
    print('------------Example '+str(i)+'----------------')
    print(' ')
    query=df['question_text'][i]
    prediction = cdqa_pipeline.predict(query=query)
    print('query: {}'.format(query))
    print('predicted answer: {}'.format(prediction[0]))
    print('annotated answer: {}'.format(df['short_answers'][i]))
    eval_distilbert.append([prediction[0],df['short_answers'][i]])
    print('title: {}'.format(prediction[1]))
    print('paragraph: {}'.format(prediction[2]))
    print(' ')
print("--- %s seconds ---" % (time.time() - start_time))

------------Example 0----------------
 
query: which is the most common use of opt-in e-mail marketing
predicted answer: when approaching a level crossing
annotated answer: a newsletter sent to an advertising firm 's customers
title: Train horn
 
------------Example 1----------------
 
query: how i.met your mother who is the mother
predicted answer: The Mother
annotated answer: Tracy McConnell
title: The Mother ( How I Met Your Mother )
paragraph: The Mother ( How I Met Your Mother ) - wikipedia  The Mother ( How I Met Your Mother )  Jump to : navigation , search    Tracy McConnell     How I Met Your Mother character     The Mother appearing in `` The Locket ''     First appearance   `` Lucky Penny ( unseen ) '' `` Something New '' ( seen )     Last appearance   `` Last Forever ''     Created by   Carter Bays Craig Thomas     Portrayed by   Cristin Milioti     Information     Aliases   The Mother     Gender   Female     Spouse ( s )   Ted Mosby     Significant other ( s )   Max ( decea

query: who needs to be in the car with a permit driver
predicted answer: an adult licensed driver
annotated answer: an adult licensed driver who is at least 21 years of age or older and in the passenger seat of the vehicle at all times
title: Learner 's permit
paragraph: Typically , a driver operating with a learner 's permit must be accompanied by an adult licensed driver who is at least 21 years of age or older and in the passenger seat of the vehicle at all times .  
 
------------Example 7----------------
 
query: god's not dead a light in the darkness release date
predicted answer: March 30 , 2018
annotated answer: March 30 , 2018
title: God 's Not Dead : a Light in Darkness
paragraph: God 's Not Dead : a Light in Darkness - Wikipedia  God 's Not Dead : a Light in Darkness     God 's Not Dead : A Light in Darkness     Theatrical release poster     Directed by   Michael Mason     Produced by     David A.R. White   Michael Scott   Brittany Yost   Elizabeth Travis   Alysoun Wolfe    

# Evaluation function

In [13]:
from __future__ import print_function
from collections import Counter
import torch
import string
import re
import argparse
import tqdm
import json
import sys
import os

import joblib
from tqdm.autonotebook import tqdm
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r"\b(a|an|the)\b", " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def exact_match_score(prediction, ground_truth):
    if normalize_answer(prediction) == normalize_answer(ground_truth):
        return 1
    else:
        return 0


def metric_max_over_ground_truths(metric_fn, prediction, ground_truth):
    scores_for_ground_truths = []
    score = metric_fn(prediction, ground_truth)
    scores_for_ground_truths.append(score)
    
    return max(scores_for_ground_truths)


    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total

    return {"exact_match": exact_match, "f1": f1}

def evaluate(eval_list):
    f1 = exact_match =increm_f1= total = 0
    for item in eval_list:
        exact_match += metric_max_over_ground_truths(exact_match_score,item[0], item[1])
        increm_f1 += metric_max_over_ground_truths(f1_score,item[0], item[1])
    print("Exact_match: ", exact_match)
    print("F1_score: ", increm_f1)

## Evaluation of the models

In [14]:
evaluate(eval_bert)

Exact_match:  3
F1_score:  3.7466666666666666


In [15]:
evaluate(eval_distilbert)

Exact_match:  2
F1_score:  2.9066666666666667
