In [None]:
import torch
from transformers import (
    BertTokenizerFast,
    BertForQuestionAnswering,
)
from datasets import load_dataset
from utils import preprocess_and_tokenize, make_predictions, make_predictions_with_swa, post_processing_function
from swag_transformers.swag_bert import SwagBertForQuestionAnswering

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "bert-base-uncased"
tokenizer = BertTokenizerFast.from_pretrained(model_name) # This supports offsets mapping
model = BertForQuestionAnswering.from_pretrained(model_name)

swag_model = SwagBertForQuestionAnswering.from_base(model, no_cov_mat=False)  # Use SWAG (no_cov_mat=False)
model = model.to(device)
swag_model = swag_model.to(device)

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Load SQuAD 2.0 dataset from Hugging Face
squad_dataset = load_dataset("squad_v2")

# Because test set is hidden, make a new split
split = squad_dataset["train"].train_test_split(test_size=0.1, shuffle=False)
train_set = split["train"]
dev_set = split["test"]
test_set = squad_dataset["validation"]

# subsets for testing
#train_set = train_set.select(range(1))  
#dev_set = dev_set.select(range(10))     
#test_set = test_set.select(range(1)) 

# Preprocess the custom splits
tokenized_train, tokenized_dev, tokenized_test = preprocess_and_tokenize(train_set, dev_set, test_set, tokenizer)

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

Example from Train Set:
id: 56be85543aeaaa14008c9063
title: Beyoncé
context: Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one si

In [None]:
# Evaluate baseline from checkpoint

output_dir = "./model"
checkpoint_dir = f"{output_dir}/checkpoint-14825" # epoch 1
model = BertForQuestionAnswering.from_pretrained(checkpoint_dir)
model = model.to(device)
tokenizer = BertTokenizerFast.from_pretrained(output_dir)

predictions = make_predictions(model, tokenized_dev)
final_predictions = post_processing_function(dev_set, tokenized_dev, predictions)
references = [{"id": ex['id'], "answers": ex['answers']} for ex in dev_set]
results = metric.compute(predictions=final_predictions.predictions, references=references)
print("Baseline: ", results)

# Evaluate SWA from checkpoint

checkpoint_dir = f"{output_dir}/checkpoint-swag-epoch-1" 
swag_model = SwagBertForQuestionAnswering.from_pretrained(checkpoint_dir)
swag_model = swag_model.to(device)

predictions = make_predictions_with_swa(swag_model, tokenized_dev)
final_predictions = post_processing_function(dev_set, tokenized_dev, predictions)
references = [{"id": ex['id'], "answers": ex['answers']} for ex in dev_set]
results = metric.compute(predictions=final_predictions.predictions, references=references)
print("SWA: ", results)

Some weights of the model checkpoint at .\swag_model_mini were not used when initializing BertForQuestionAnswering: ['bert.swag.base._dummy_param', 'bert.swag.base.bert.embeddings.LayerNorm.bias', 'bert.swag.base.bert.embeddings.LayerNorm.bias_cov_mat_sqrt', 'bert.swag.base.bert.embeddings.LayerNorm.bias_mean', 'bert.swag.base.bert.embeddings.LayerNorm.bias_sq_mean', 'bert.swag.base.bert.embeddings.LayerNorm.weight', 'bert.swag.base.bert.embeddings.LayerNorm.weight_cov_mat_sqrt', 'bert.swag.base.bert.embeddings.LayerNorm.weight_mean', 'bert.swag.base.bert.embeddings.LayerNorm.weight_sq_mean', 'bert.swag.base.bert.embeddings.position_embeddings.weight', 'bert.swag.base.bert.embeddings.position_embeddings.weight_cov_mat_sqrt', 'bert.swag.base.bert.embeddings.position_embeddings.weight_mean', 'bert.swag.base.bert.embeddings.position_embeddings.weight_sq_mean', 'bert.swag.base.bert.embeddings.token_type_embeddings.weight', 'bert.swag.base.bert.embeddings.token_type_embeddings.weight_cov_ma

  0%|          | 0/1 [00:00<?, ?it/s]

No parameters collected yet, you should first run collect_model!


Baseline:  {'exact': 0.0, 'f1': 0.0, 'total': 1, 'HasAns_exact': 0.0, 'HasAns_f1': 0.0, 'HasAns_total': 1, 'best_exact': 0.0, 'best_exact_thresh': 0.0, 'best_f1': 0.0, 'best_f1_thresh': 0.0}


  0%|          | 0/1 [00:00<?, ?it/s]

SWAG:  {'exact': 0.0, 'f1': 0.0, 'total': 1, 'HasAns_exact': 0.0, 'HasAns_f1': 0.0, 'HasAns_total': 1, 'best_exact': 0.0, 'best_exact_thresh': 0.0, 'best_f1': 0.0, 'best_f1_thresh': 0.0}
