In [1]:
import torch
import transformers
from transformers import (
    BertTokenizerFast,
    BertForQuestionAnswering,
    default_data_collator,
    EvalPrediction,
    TrainerCallback
)
from datasets import load_dataset
from trainer_qa import QuestionAnsweringTrainer
import evaluate
import os
from utils_qa import postprocess_qa_predictions
from utils import preprocess_and_tokenize
from swag_transformers.swag_bert import SwagBertForQuestionAnswering
from swag_transformers.trainer_utils import SwagUpdateCallback

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "bert-base-uncased"
tokenizer = BertTokenizerFast.from_pretrained(model_name) # This supports offsets mapping
model = BertForQuestionAnswering.from_pretrained(model_name)

swag_model = SwagBertForQuestionAnswering.from_base(model, no_cov_mat=False)  # Use SWAG (no_cov_mat=False)
model = model.to(device)
swag_model = swag_model.to(device)

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
# Load SQuAD 2.0 dataset from Hugging Face
squad_dataset = load_dataset("squad_v2")

print(squad_dataset)
print("\nExample from Train Set:")
train_example = squad_dataset["train"][0]
for key, value in train_example.items():
    print(f"{key}: {value}")

print("\nExample from Validation Set:")
validation_example = squad_dataset["validation"][0]
for key, value in validation_example.items():
    print(f"{key}: {value}")

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

Example from Train Set:
id: 56be85543aeaaa14008c9063
title: Beyoncé
context: Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one si

In [6]:
# Because test set is hidden, I will make a new split

split = squad_dataset["train"].train_test_split(test_size=0.1, shuffle=False)
train_set = split["train"]
dev_set = split["test"]
test_set = squad_dataset["validation"]

total_train_size = len(squad_dataset["train"])

new_train_percentage = (len(train_set) / total_train_size) * 100
new_dev_percentage = (len(dev_set) / total_train_size) * 100
test_percentage = (len(test_set) / total_train_size) * 100

print(f"New Train Set: {len(train_set)} examples ({new_train_percentage:.2f}%)")
print(f"New Dev Set: {len(dev_set)} examples ({new_dev_percentage:.2f}%)")
print(f"Test Set: {len(test_set)} examples ({test_percentage:.2f}%)")

New Train Set: 117287 examples (90.00%)
New Dev Set: 13032 examples (10.00%)
Test Set: 11873 examples (9.11%)


In [None]:
# Preprocess the custom splits
tokenized_train, tokenized_dev, tokenized_test = preprocess_and_tokenize(train_set, dev_set, test_set, tokenizer)

Map: 100%|██████████| 10/10 [00:00<00:00, 86.06 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 267.63 examples/s]


In [None]:
# Post-processing function adapted from https://github.com/huggingface/transformers/blob/main/examples/pytorch/question-answering/run_qa.py

def post_processing_function(examples, features, predictions, stage="eval"):
    # Post-process to match the logits to answers
    predictions = postprocess_qa_predictions(
        examples=examples,
        features=features,
        predictions=predictions,
        version_2_with_negative=True,
        n_best_size=20,
        max_answer_length=30,
        null_score_diff_threshold=0.0,
        output_dir="./results",
        prefix=stage,
    )
    
    formatted_predictions = [
                {"id": str(k), "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
            ]

    references = [{"id": str(ex["id"]), "answers": ex["answers"]} for ex in examples]

    return EvalPrediction(predictions=formatted_predictions, label_ids=references)

In [None]:
metric = evaluate.load("squad_v2")

# Define the compute metrics function
def compute_metrics(p):
    return metric.compute(predictions=p.predictions, references=p.label_ids)

In [None]:
# Custom callback function to save swag model by epoch

class SWAepochCallback(TrainerCallback):
    def __init__(self, output_dir):
        self.output_dir = output_dir

    def on_epoch_end(self, args, state, control, model, **kwargs):
        # Get the current epoch from state.epoch
        current_epoch = int(state.epoch)  # Ensure it's an integer for naming

        # Save the SWAG model after each epoch
        swag_model.save_pretrained(f"{self.output_dir}/checkpoint-swag-epoch-{current_epoch}")

        return control

In [None]:
training_args = transformers.TrainingArguments(
    output_dir="./model",
    learning_rate=2e-5,  
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    seed=42,
    save_strategy="epoch",
    eval_strategy="epoch",
    logging_strategy="epoch"
)

# Trainer initialization
trainer = QuestionAnsweringTrainer(
    model=model, 
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    eval_examples=test_set,
    processing_class=tokenizer,
    data_collator=default_data_collator,
    post_process_function=post_processing_function,
    compute_metrics=compute_metrics,
    callbacks=[SwagUpdateCallback(swag_model, collect_steps=100, skip_first=150) ,SWAepochCallback(output_dir="./model")]
    )

In [None]:
trainer.train()

output_dir = "./model" 
trainer.save_model(output_dir + "/trainer") 
               
# Saving the model, tokenizer, and training arguments
swag_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
torch.save(training_args, os.path.join(output_dir, "training_args.bin"))

Epoch,Training Loss,Validation Loss,Exact,F1,Total,Hasans Exact,Hasans F1,Hasans Total,Noans Exact,Noans F1,Noans Total,Best Exact,Best Exact Thresh,Best F1,Best F1 Thresh
1,5.9967,No log,0.0,4.809524,10,0.0,8.015873,6,0.0,0.0,4,40.0,0.0,44.809524,0.0
2,5.6274,No log,0.0,4.809524,10,0.0,8.015873,6,0.0,0.0,4,40.0,0.0,44.809524,0.0


100%|██████████| 10/10 [00:00<00:00, 212.25it/s]
100%|██████████| 10/10 [00:00<00:00, 165.10it/s]
