### Default Pipeline

In [1]:
#test
from transformers import pipeline

nlp = pipeline("question-answering")

context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune
a model on a SQuAD task, you may leverage the `run_squad.py`.
"""

print(nlp(question="What is extractive question answering?", context=context))

{'score': 0.6222445964813232, 'start': 34, 'end': 95, 'answer': 'the task of extracting an answer from a text given a question'}


In [39]:
import pandas as pd
res = pd.read_csv('weather_app_sample_convos.csv')  
res = res[["Context", "Location"]]
res.dropna(inplace = True)
res = res.astype(str)

In [40]:
#convert strings to lower case
res = res.applymap(lambda x: x.lower())

In [42]:
def test_loc(questions=["where?", "what is the location?", "which area?"]):
    """
    Function to test qa model against a list of location related questions. 
    Calculates accuracy (disregards nan values)
    """
    num_correct = []
    for ques in questions:
        curr_correct = 0
        for index, row in res.iterrows():
            context = row["Context"]
            nlp_res = nlp(question=ques, context=context)
            
            if nlp_res['answer'].lower() in str(row['Location']).lower(): 
                curr_correct += 1
        num_correct.append(curr_correct/len(res))
            
    return num_correct
        

In [43]:
accuracy_1 = test_loc()
accuracy_1

[0.5945945945945946, 0.5135135135135135, 0.6216216216216216]

### Roberta-base-squad2

In [44]:
model_name = "deepset/roberta-base-squad2"
nlp = pipeline(model=model_name, tokenizer=model_name, revision="v1.0", task="question-answering")




Some weights of RobertaModel were not initialized from the model checkpoint at deepset/roberta-base-squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [45]:
accuracy_2 = test_loc()
accuracy_2

[0.5405405405405406, 0.4594594594594595, 0.35135135135135137]

### Bert-small-finetuned

In [46]:
model_name = "mrm8488/bert-small-finetuned-squadv2"
nlp = pipeline(model=model_name, tokenizer=model_name, task="question-answering")

In [47]:
accuracy_3 = test_loc()
accuracy_3

[0.5675675675675675, 0.5405405405405406, 0.5135135135135135]

### Bert-tiny-finetuned

In [48]:
model_name = "mrm8488/bert-tiny-5-finetuned-squadv2"
nlp = pipeline(model=model_name, tokenizer=model_name, task="question-answering")

In [49]:
accuracy_4 = test_loc()
accuracy_4

[0.5945945945945946, 0.5135135135135135, 0.6216216216216216]

In [50]:
accuracy_df = pd.DataFrame(columns = ['default_pipeline', 'roberta_base_finetuned', 'bert_small_finetuned', 'bert_tiny_finetuned'])
accuracy_df['default_pipeline'] = accuracy_1
accuracy_df['roberta_base_finetuned'] = accuracy_2
accuracy_df['bert_small_finetuned'] = accuracy_3
accuracy_df['bert_tiny_finetuned'] = accuracy_4

accuracy_df


Unnamed: 0,default_pipeline,roberta_base_finetuned,bert_small_finetuned,bert_tiny_finetuned
0,0.594595,0.540541,0.567568,0.594595
1,0.513514,0.459459,0.540541,0.513514
2,0.621622,0.351351,0.513514,0.621622
