In [1]:
import evaluate
from pprint import pprint
import pandas as pd
from datasets import load_dataset
from tqdm import tqdm
from sentence_transformers import SentenceTransformer, util
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, BartForQuestionAnswering
import project_utility as utils

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df, val_df, test_df = utils.load_data()

train_df.shape=(14756, 5)
valid_df.shape=(4229, 5)
test_df.shape=(2096, 5)


In [3]:
pretrained_checkpoint_name = 'facebook/bart-large' # should this be bart-large to match climatebart?
tokenizer = AutoTokenizer.from_pretrained(pretrained_checkpoint_name)
model = AutoModelForQuestionAnswering.from_pretrained(pretrained_checkpoint_name)
# looks like Bart for QA comes with QA head but is untrained, need to train to use for inference

Some weights of BartForQuestionAnswering were not initialized from the model checkpoint at facebook/bart-large and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# example that works out of the box: https://huggingface.co/docs/transformers/en/model_doc/bart#transformers.BartForQuestionAnswering.forward.example

tokenizer = AutoTokenizer.from_pretrained("valhalla/bart-large-finetuned-squadv1")
model = BartForQuestionAnswering.from_pretrained("valhalla/bart-large-finetuned-squadv1")

question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

inputs = tokenizer(question, text, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
print(tokenizer.decode(predict_answer_tokens, skip_special_tokens=True))

# target is "nice puppet"
target_start_index = torch.tensor([14])
target_end_index = torch.tensor([15])

outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
loss = outputs.loss
round(loss.item(), 2)

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.


 nice puppet


0.59

In [6]:
tokenizer = AutoTokenizer.from_pretrained("valhalla/bart-large-finetuned-squadv1")
model = BartForQuestionAnswering.from_pretrained("valhalla/bart-large-finetuned-squadv1")

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.


In [7]:
# iterate through test to get answer spans
for i in tqdm(test_df.index):
    question = test_df['question'][i]
    text = test_df['context'][i]
    
    inputs = tokenizer(question, text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    answer_start_index = outputs.start_logits.argmax()
    answer_end_index = outputs.end_logits.argmax()

    predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    answer_decoded = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
    test_df.at[i, 'bart_answer'] = answer_decoded
test_df.head()

100%|██████████| 2096/2096 [17:02<00:00,  2.05it/s]


Unnamed: 0,context,question,id,answer,answer_start,bart_answer
0,some more detailed work has been done at natio...,The 9 percent reduction of rice in Bangladesh ...,14095,flooding damage and climate variability,514,flooding damage and climate variability
1,some more detailed work has been done at natio...,What kind of model of Bangladesh was had been ...,14096,a dynamic economywide model,70,dynamic economywide
2,some more detailed work has been done at natio...,What approach did Ahmed use to estimate how ch...,14097,a modelling approach,639,modelling approach
3,extreme sea level height fluctuations are also...,Where height fluctuations are large?,2843,extreme sea level height fluctuations are also...,0,to the north
4,extreme sea level height fluctuations are also...,How non-tide sea levels are obtained?,2844,the non-tide sea levels are obtained by spectr...,167,by spectrally removing the tidal energy from ...


In [14]:
for i in test_df.index[:10]:
    pprint(f"question: {test_df['question'][i]}")
    pprint(f"label answer: {test_df['answer'][i]}")
    pprint(f"BART answer: {test_df['bart_answer'][i]}")
    print()

('question: The 9 percent reduction of rice in Bangladesh is attributed to '
 'what two variables?')
'label answer: flooding damage and climate variability'
'BART answer:  flooding damage and climate variability'

('question: What kind of model of Bangladesh was had been used to estimate '
 'economic damages from historical climate variability and future '
 'anthropogenic climate change?')
'label answer: a dynamic economywide model'
'BART answer:  dynamic economywide'

('question: What approach did Ahmed use to estimate how changes in climate '
 'variability might affect crop yields and poverty rates in Tanzania to the '
 'early 2030s')
'label answer: a modelling approach'
'BART answer:  modelling approach'

'question: Where  height fluctuations are large?'
('label answer: extreme sea level height fluctuations are also larger to the '
 'north, as a result of increasing storm intensities at the more northerly '
 'coastal locations')
'BART answer:  to the north'

'question: How  non-tide

In [8]:
test_df.to_parquet('test_bart_qa_scored.parquet')

In [9]:
utils.evaluate_abstractive(test_df, 'bart_answer',)

rouge scores:
{'rouge1': np.float64(0.4341106284336489),
 'rouge2': np.float64(0.3796218872329144),
 'rougeL': np.float64(0.4321849585581291),
 'rougeLsum': np.float64(0.4325789489633455)}

average semantic similarity:
tensor(0.5995)
