<a href="https://colab.research.google.com/github/alexpod1000/SQuAD-QA/blob/main/ModelTrainExperimentalCode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#%%bash
#[[ ! -e /colabtools ]] && exit  # Continue only if running on Google Colab

# Clone repository
# https://sysadmins.co.za/clone-a-private-github-repo-with-personal-access-token/
# For cloning the main branch:
#!git clone https://fb5b65b126107273e595ce8b6c9d2d533103c6e2:x-oauth-basic@github.com/alexpod1000/SQuAD-QA.git
# For cloning the "evaluation-features" branch
#!git clone --branch evaluation-features https://fb5b65b126107273e595ce8b6c9d2d533103c6e2:x-oauth-basic@github.com/alexpod1000/SQuAD-QA.git
# Change current working directory to match project
#%cd SQuAD-QA/
#!pwd

In [2]:
# External imports
import copy
import numpy as np
import pandas as pd
import string
import torch

from nltk.tokenize import TreebankWordTokenizer, SpaceTokenizer
from typing import Tuple, List, Dict, Any, Union

# Project imports
from squad_data.parser import SquadFileParser
from squad_data.utils import build_mappers_and_dataframe, add_paragraphs_spans
from evaluation.evaluation_metrics import Evaluator
from evaluation.utils import extract_answer, build_evaluation_dict

### Download Embedding

In [3]:
from utils.embedding_utils import EmbeddingDownloader

embedding_downloader = EmbeddingDownloader(
    "embedding_models", 
    "embedding_model.kv", 
    model_name="fasttext-wiki-news-subwords-300"
)

embedding_model = embedding_downloader.load()

Loading pre-downloaded embeddings from /home/alexpod/uni/magistrale_ai/secondo_anno/nlp/project/SQuAD-QA/embedding_models/embedding_model.kv
End!
Embedding dimension: 300


### Parse the json and get the data

In [4]:
parser = SquadFileParser("squad_data/data/training_set.json")
data = parser.parse_documents()

########################### DEBUG
# reduce size for faster testing
#full_data = data
#data = []
#for i in range(1): # use only the first 1 documents
#  data.append(full_data[i])

### Prepare the mappers and datafram

In [5]:
paragraphs_mapper, questions_mapper, df = build_mappers_and_dataframe(data)
print(questions_mapper[next(iter(questions_mapper))])
print(paragraphs_mapper[next(iter(paragraphs_mapper))])
df.head()

To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.


Unnamed: 0,paragraph_id,question_id,answer_id,answer_start,answer_text
0,0_0,5733be284776f41900661182,0,515,Saint Bernadette Soubirous
1,0_0,5733be284776f4190066117f,0,188,a copper statue of Christ
2,0_0,5733be284776f41900661180,0,279,the Main Building
3,0_0,5733be284776f41900661181,0,381,a Marian place of prayer and reflection
4,0_0,5733be284776f4190066117e,0,92,a golden statue of the Virgin Mary


In [6]:
pp = paragraphs_mapper[next(iter(paragraphs_mapper))]
qq = questions_mapper[next(iter(questions_mapper))]
aa = df.iloc[0]

In [7]:
from nltk.tokenize import MWETokenizer, TreebankWordTokenizer

mwe = MWETokenizer(separator='')
mwe.add_mwe(('<', 'ANS_START', '>'))
mwe.add_mwe(('<', 'ANS_END', '>'))

start_indicator = "<ANS_START>"
end_indicator = "<ANS_END>"

def add_in_middle(string, pos, to_add):
    """
    Given a string, a position and a substring, 
    adds the substring at position index of the string.
    """
    return string[:pos] + to_add + string[pos:]

def augment_string(string, answer_start_idx, answer_end_idx):
    """
    Given a string, adds the start and end indicators at right indexes
    """
    start_aug = add_in_middle(string, answer_start_idx, start_indicator)
    end_aug = add_in_middle(start_aug, len(start_indicator) + answer_end_idx, end_indicator)
    return end_aug

def get_indexes_from_augmented_string(string):
    tokenized_aug = mwe.tokenize(tokenizer.tokenize(string))
    # get start of answer span index
    index_of_start_indicator = tokenized_aug.index(start_indicator)
    # remove index from string (now it will coincide with span start)
    tokenized_aug.pop(index_of_start_indicator)
    # same procedure for the end of span
    index_of_end_indicator = tokenized_aug.index(end_indicator)
    tokenized_aug.pop(index_of_end_indicator)
    index_of_end_indicator -= 1
    return tokenized_aug, (index_of_start_indicator, index_of_end_indicator)

In [125]:
text = "This is good"
ans_start_idx = 5
ans_end_idx = 7 # start + len
au = augment_string(text, ans_start_idx, ans_end_idx)
tokenized_str, ans_span_idxs = get_indexes_from_augmented_string(au)
print(tokenized_str[ans_span_idxs[0]:ans_span_idxs[1]+1])

['is']


In [115]:
#augment_string(pp, aa["answer_start"], aa["answer_start"] + len(aa["answer_text"]))

'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to <ANS_START>Saint Bernadette Soubirous<ANS_END> in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.'

In [107]:
eee = "This <ANS_START>is<ANS_END> good"

In [6]:
def preprocess_text(text_dict: Dict[str, Any], text_key: Union[str, None] = None) -> Any:
    text_dict = copy.deepcopy(text_dict)
    # just tokenize and remove punctuation for now
    # TODO: add better punctuation removal later
    tokenizer = SpaceTokenizer()#TreebankWordTokenizer()
    for key in text_dict.keys():
        if text_key is not None:
            text = tokenizer.tokenize(text_dict[key][text_key])
            text_dict[key][text_key] = text
        else:
            text = tokenizer.tokenize(text_dict[key])
            text_dict[key] = text
    return text_dict

In [7]:
paragraphs_mapper = preprocess_text(paragraphs_mapper)
questions_mapper = preprocess_text(questions_mapper)

In [8]:
# Extend the paragraphs mapper to include spans
paragraphs_spans_mapper = add_paragraphs_spans(paragraphs_mapper)

In [9]:
print(paragraphs_spans_mapper['0_0']['text'])
print(paragraphs_spans_mapper['0_0']['spans'])

['Architecturally,', 'the', 'school', 'has', 'a', 'Catholic', 'character.', 'Atop', 'the', 'Main', "Building's", 'gold', 'dome', 'is', 'a', 'golden', 'statue', 'of', 'the', 'Virgin', 'Mary.', 'Immediately', 'in', 'front', 'of', 'the', 'Main', 'Building', 'and', 'facing', 'it,', 'is', 'a', 'copper', 'statue', 'of', 'Christ', 'with', 'arms', 'upraised', 'with', 'the', 'legend', '"Venite', 'Ad', 'Me', 'Omnes".', 'Next', 'to', 'the', 'Main', 'Building', 'is', 'the', 'Basilica', 'of', 'the', 'Sacred', 'Heart.', 'Immediately', 'behind', 'the', 'basilica', 'is', 'the', 'Grotto,', 'a', 'Marian', 'place', 'of', 'prayer', 'and', 'reflection.', 'It', 'is', 'a', 'replica', 'of', 'the', 'grotto', 'at', 'Lourdes,', 'France', 'where', 'the', 'Virgin', 'Mary', 'reputedly', 'appeared', 'to', 'Saint', 'Bernadette', 'Soubirous', 'in', '1858.', 'At', 'the', 'end', 'of', 'the', 'main', 'drive', '(and', 'in', 'a', 'direct', 'line', 'that', 'connects', 'through', '3', 'statues', 'and', 'the', 'Gold', 'Dome),

### DataConverter and CustomQADataset

In [6]:
from data_loading.utils import DataConverter, padder_collate_fn
from data_loading.qa_dataset import CustomQADataset

data_converter = DataConverter(embedding_model, paragraphs_mapper)
datasetQA = CustomQADataset(data_converter, df, paragraphs_mapper, questions_mapper)
data_loader = torch.utils.data.DataLoader(datasetQA, collate_fn = padder_collate_fn, batch_size=10, shuffle=True)

test_batch = next(iter(data_loader))
print(test_batch["paragraph_emb"].shape)
print(test_batch["y_gt"].shape)

torch.Size([10, 226, 300])
torch.Size([10, 2])


In [7]:
test_batch["y_gt"]

tensor([[ 83,  91],
        [ 96, 101],
        [ 62,  64],
        [ 17,  18],
        [  3,   3],
        [ 37,  42],
        [  4,   5],
        [ 87,  91],
        [ 65,  76],
        [  8,   8]])

# Model train

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from timeit import default_timer as timer

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"The device is {device}")

The device is cuda


Model:

(paragraph_emb, question_emb) -> (answer_start, answer_end) // for each token in paragraph_emb

In [10]:
def train_step(model, optimizer, loss_function, dataloader, device="cpu"):
    acc_loss = 0
    acc_start_accuracy = 0
    acc_end_accuracy = 0
    count = 0

    time_start = timer()
    
    model.train()
    for batch in dataloader:
        paragraph_in = batch["paragraph_emb"]
        question_in = batch["question_emb"]
        answer_spans_start = batch["y_gt"][:, 0]
        answer_spans_end = batch["y_gt"][:, 1]
        # Clear gradients
        model.zero_grad()
        # Place to right device
        paragraph_in = paragraph_in.to(device)
        question_in = question_in.to(device)
        answer_spans_start = answer_spans_start.to(device)
        answer_spans_end = answer_spans_end.to(device)
        # Run forward pass
        pred_answer_start_scores, pred_answer_end_scores = model(paragraph_in, question_in)
        # Compute the CrossEntropyLoss
        loss = loss_function(pred_answer_start_scores, answer_spans_start) + loss_function(pred_answer_end_scores, answer_spans_end)
        # Compute gradients
        loss.backward()
        # Optimizer step
        optimizer.step()
        # --- Compute metrics ---
        # Get span indexes
        pred_span_start_idxs, pred_span_end_idxs = model.estimate_best_span(pred_answer_start_scores, pred_answer_end_scores)
        #pred_span_start_idxs = torch.argmax(pred_answer_start_scores, axis=-1).cpu().detach()
        #pred_span_end_idxs = torch.argmax(pred_answer_end_scores, axis=-1).cpu().detach()
        gt_start_idxs = answer_spans_start.cpu().detach()
        gt_end_idxs = answer_spans_end.cpu().detach()
        # two accs
        start_accuracy = torch.sum(gt_start_idxs == pred_span_start_idxs) / len(pred_span_start_idxs)
        end_accuracy = torch.sum(gt_end_idxs == pred_span_end_idxs) / len(pred_span_end_idxs)
        # Gather stats
        acc_loss += loss.item()
        acc_start_accuracy += start_accuracy.item()
        acc_end_accuracy += end_accuracy.item()
        count += 1
    time_end = timer()
    return {
        "loss": acc_loss / count, 
        "accuracy_start": acc_start_accuracy / count, 
        "accuracy_end": acc_end_accuracy / count,
        "time": time_end - time_start
    }

In [11]:
# create Evaluator object
evaluator = Evaluator(documents_list=data)

In [12]:
def evaluate_model_on_data(model, evaluator, dataloader, paragraphs_mapper, questions_mapper, device, debug=False):
    eval_dict = build_evaluation_dict(model, dataloader, paragraphs_mapper, questions_mapper, device)
    if debug:
        print(f"DEBUG: Eval_dict: {eval_dict}")
    stats = {}
    stats['exact_match'] = evaluator.ExactMatch(eval_dict)
    stats['f1'] = evaluator.F1(eval_dict)
    return stats

In [13]:
class WeightedSum(nn.Module):
    def __init__(self, input_dim):
        """
        General idea, given a random dummy weights vector, 
        learn to weight it based on query
        """
        super(WeightedSum, self).__init__()
        self.weights = nn.Parameter(torch.randn(input_dim))

    def forward(self, input_emb, mask=None):
        # TODO: if needed, implement time masking
        batch, timesteps, embed_dim = input_emb.shape
        # w dot q_j
        dot_prods = torch.matmul(input_emb, self.weights)
        # exp(w dot q_j)
        exp_prods = torch.exp(dot_prods)
        # normalization factor
        sum_exp_prods = torch.sum(exp_prods, dim=1)
        sum_exp_prods = sum_exp_prods.repeat(timesteps, 1).T
        # b_j
        b = exp_prods / sum_exp_prods
        # q (embedding) = sum_t(b_t * q_t)
        b_scal_q = input_emb * b[:, :, None]
        # now sum along correct axis
        q = torch.sum(b_scal_q, axis=1)
        return q

**Compatibility functions**

**Multiplicative (dot)**:

p = paragraph emb shape: [B, T, E] (Query)

q = question weighted shape: [B, E] reshaped to [B, E, 1] (Keys)

scores = p @ q (of shape: [B, T, 1])

**General bilinear**:

p = paragraph emb shape: [B, T, Ep] (Query)

q = question weighted shape: [B, Eq] reshaped to [B, Eq, 1] (Keys)

W = parameter matrix of shape: [Ep, Eq]

scores = p @ W @ q (of shape: [B, T, 1])

In [14]:
class BilinearCompatibility(nn.Module):
    def __init__(self, query_dim, keys_dim):
        """
        Perform bilinear compatibility f(q, K) = q.T @ W @ K
        Recall: multiplicative/dot compatibility is f(q, K) = q.T @ K
        
        Where: 
            q -> embedded paragraphs (p in DrQA)
            K -> embedded question (q in DrQA)
        """
        super(BilinearCompatibility, self).__init__()
        self.weights = nn.Parameter(torch.randn(query_dim, keys_dim))

    def forward(self, query, keys):
        """
        query: batch of shape (batch, seq_len, query_dim) (Query)
        keys = batch of shape (batch, key_dim) which will be reshaped into [batch, key_dim, 1] (Keys)
        """
        return query @ self.weights @ keys[:, :, None]

In [83]:
class LSTM_QA(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        super(LSTM_QA, self).__init__()
        self.tagset_size = tagset_size
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.paragraph_embedder = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.question_embedder = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.weighted_sum = WeightedSum(hidden_dim * 2)
        # used to compute similarity scores
        self.general_bilinear_start = BilinearCompatibility(hidden_dim * 2, hidden_dim * 2)
        self.general_bilinear_end = BilinearCompatibility(hidden_dim * 2, hidden_dim * 2)
        # to classify from similarity to prob of start and prob of end
        self.sim_to_start = nn.Linear(1, 1) # given a similarity score, predict P(start)
        self.sim_to_end = nn.Linear(1, 1) # given a similarity score, predict P(end)
        
    def estimate_best_span(self, start_scores, end_scores):
        #pred_start_idxs = torch.argmax(pred_start_scores, axis=-1).cpu().detach()
        #pred_end_idxs = torch.argmax(pred_end_scores, axis=-1).cpu().detach()

        batch_dim, timestep_dim = start_scores.shape

        # compute marginal distributions for start and end
        start_probs = torch.nn.functional.softmax(start_scores, dim=1)
        end_probs = torch.nn.functional.softmax(end_scores, dim=1)
        # compute start_end joint distribution
        joint_dist_start_end = start_probs[:, :, None] @ end_probs[:, None, :]# end_probs
        constrained_joint_dist = torch.triu(joint_dist_start_end)
        # compute the actual indexes
        flattened_distr_argmax = constrained_joint_dist.view(batch_dim, -1).argmax(1).view(-1, 1)
        start_end_idxs = torch.cat((flattened_distr_argmax // timestep_dim, flattened_distr_argmax % timestep_dim), dim=1).cpu().detach()#.numpy()
        return start_end_idxs[:, 0], start_end_idxs[:, 1]

    def forward(self, paragraphs, questions):
        batch_size, seq_len, n_feat = paragraphs.shape
        # As we assume batch_first true, then our sentence_embeddings will have correct shape
        paragraphs_seq_emb, _ = self.paragraph_embedder(paragraphs) # (batch, seq_len, n_feats * n_dirs)
        questions_seq_emb, _ = self.question_embedder(questions) # (batch, seq_len, n_feats * n_dirs)
        # weighted sum
        questions_state_repr = self.weighted_sum(questions_seq_emb)
        #return paragraphs_seq_emb, questions_state_repr
        # compute similarities -> (batch, timestep, 1)
        similarities_start = self.general_bilinear_start(paragraphs_seq_emb, questions_state_repr)
        similarities_end = self.general_bilinear_end(paragraphs_seq_emb, questions_state_repr)
        #print(f"INSIDE MODEL: similarities shape: {similarities.shape}") #DEBUG
        # --- Given a similarity score, predict P(start), P(end) ---
        # similarities flattened
        similarities_start = similarities_start.contiguous()
        similarities_start = similarities_start.view(-1, 1) # as similarity dim is 1 -> viewed shape is (batch*timestep, 1)
        start_scores = self.sim_to_start(similarities_start)
        start_logits = start_scores.view(batch_size, seq_len) # P(start)
        
        similarities_end = similarities_end.contiguous()
        similarities_end = similarities_end.view(-1, 1) # as similarity dim is 1 -> viewed shape is (batch*timestep, 1)
        end_scores = self.sim_to_end(similarities_end)
        end_logits = end_scores.view(batch_size, seq_len) # P(end)
        
        # if we view each sequence of tokens as a feature vector
        # we can interpret the start/end assignation problem as 
        # a classification with a variable number of classes
        # thus assume that our model outputs logits that will just be passed
        # to a softmax, to build a probable distribution of the start token
        return start_logits, end_logits

In [78]:
#torch.nn.functional.softmax(outs_mod[0])

In [84]:
# Define baseline model
model = LSTM_QA(embedding_model.vector_size, 128, 10).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001, amsgrad=True)

In [85]:
data_converter = DataConverter(embedding_model, paragraphs_mapper)
datasetQA = CustomQADataset(data_converter, df, paragraphs_mapper, questions_mapper)

In [86]:
train_data_loader = torch.utils.data.DataLoader(datasetQA, collate_fn = padder_collate_fn, batch_size=64, shuffle=True)

In [88]:
treebank_tokenizer = TreebankWordTokenizer()

In [89]:
batch = next(iter(train_data_loader))

In [90]:
paragraph_in = batch["paragraph_emb"]
question_in = batch["question_emb"]
answer_spans_start = batch["y_gt"][:, 0]
answer_spans_end = batch["y_gt"][:, 1]
paragraph_id = batch["paragraph_id"]
question_id = batch["question_id"]
# Place to right device
paragraph_in = paragraph_in.to(device)
question_in = question_in.to(device)
answer_spans_start = answer_spans_start.to(device)
answer_spans_end = answer_spans_end.to(device)
# Run forward pass
pred_answer_start_scores, pred_answer_end_scores = model(paragraph_in, question_in)
# Get span indexes
pred_span_start_idxs, pred_span_end_idxs = model.estimate_best_span(pred_answer_start_scores, pred_answer_end_scores)

In [91]:
eval_dict = {}
for sample_idx in range(len(paragraph_id)):
    paragraph_sample_id = paragraph_id[sample_idx]
    question_sample_id = question_id[sample_idx]
    pred_span_start_sample = pred_span_start_idxs[sample_idx]
    pred_span_end_sample = pred_span_end_idxs[sample_idx]
    par_tokens = treebank_tokenizer.tokenize(paragraphs_mapper[paragraph_sample_id])
    pred_answer_text = extract_answer(par_tokens,
                                      pred_span_start_sample,
                                      pred_span_end_sample)
    # add new (question_id, pred_answer_text) to the eval dict:
    eval_dict[question_sample_id] = pred_answer_text

In [94]:
questions_mapper['572820d23acd2414000df517']

'What were the protesters on September 2 demonstrating against?'

In [92]:
eval_dict

{'571a7b2b10f8ca1400305099': 'Hasidic and',
 '572c9ab3dfb02c14005c6bae': '`` living law',
 '573223cce17f3d14004226b5': 'independent',
 '572ed854dfa6aa1500f8d446': "Several sources indicate that during Muhammad 's lifetime a large number of his companions had memorized the",
 '573425624776f41900661959': '13',
 '5727f2f53acd2414000df09b': 'Latin',
 '572f21aea23a5019007fc4ac': 'revolutionary',
 '56d62ca31c85041400946f74': "trying to achieve status '' is characteristic of dog–human interactions. Pet dogs play an active role in family",
 '57094c819928a81400471502': "industry–led boom in 1970. A succession of skyscrapers were built throughout the 1970s—many by real estate developer Gerald D. Hines—culminating with Houston 's tallest skyscraper , the 75-floor , 1,002-foot ( 305 m ) -tall JPMorgan Chase Tower ( formerly the Texas Commerce",
 '5726866b5951b619008f758f': 'me (',
 '56df7fd95ca0a614008f9b6f': 'Street',
 '571a87714faf5e1900b8aa17': 'Persians.',
 '57095ba39928a81400471548': 'foreclo

In [33]:
answer_spans_start

tensor([149, 122,  15, 160,  49,   3,  66,  88, 116, 102,  42,  83,  34,  25,
         75, 197,  16,  66,  97,   0,  27,  42, 160,   3,  74, 163,  68,  58,
         76,  71, 110,   8,  67,   8,  89, 121,  27,  34,  46,  82,  98, 146,
          7,  28,  21,  40,  73,  47,  25,  16,  10,  20,  23,  16,  88,  99,
        115,   4,  27,  62,  79,  92,  82,   2], device='cuda:0')

In [32]:
answer_spans_end

tensor([149, 124,  17, 161,  64,   3,  67,  97, 124, 108,  44,  83,  37,  26,
         75, 200,  17,  69,  98,  12,  27,  42, 160,   3,  75, 166,  70,  60,
         79,  76, 122,   8,  71,  14,  89, 121,  28,  34,  50,  83, 100, 147,
          9,  29,  22,  46,  75,  48,  30,  21,  11,  20,  25,  19,  88, 101,
        125,   8,  27,  72,  81,  92,  84,   2], device='cuda:0')

In [29]:
pred_span_start_idxs

tensor([149, 122,   4, 161,  64,   3,  37,  14, 124, 108,  85,  36,  37,  16,
         75,  71,   5,  69,  12, 122,  12,   7, 160,   7,  33,  46,  70,  10,
         79,  43,  37,  19,   4,  14, 106, 121, 130,  34,   2,  82,  10, 147,
         58,  29,  10,  52,   9,  77,  30,  29,  30,   4,   1, 104, 106,  86,
         87,   8,  60,  99,  60,  79,  28,   3])

In [30]:
pred_span_end_idxs

tensor([149, 122,   4, 161,  64,   3,  37,  14, 124, 108,  85,  36,  37,  16,
         75,  71,   5,  69,  12, 122,  12,   7, 160,   7,  33,  46,  70,  10,
         79,  43,  37,  19,   4,  14, 106, 121, 130,  34,   2,  82,  10, 147,
         58,  29,  10,  52,   9,  77,  30,  29,  30,   4,   1, 104, 106,  86,
         87,   8,  60,  99,  60,  79,  28,   3])

In [87]:
history = {"train_loss": [], "train_acc_start": [], "train_acc_end": []}
loop_start = timer()
# lr scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5, threshold=0.01)
for epoch in range(50):
    train_dict = train_step(model, optimizer, loss_function, train_data_loader, device=device)
    #eval_results = evaluate_model_on_data(model, evaluator, train_data_loader, paragraphs_mapper, questions_mapper, device, debug=False)
    cur_lr = optimizer.param_groups[0]['lr']
    print(f'Epoch: {epoch}, lr: {cur_lr}, Train loss: {train_dict["loss"]:.4f},  Train acc start: {train_dict["accuracy_start"]:.4f}, Train acc end: {train_dict["accuracy_end"]:.4f}, Time: {train_dict["time"]:.4f}')
    history["train_loss"].append(train_dict["loss"]);history["train_acc_start"].append(train_dict["accuracy_start"]);history["train_acc_end"].append(train_dict["accuracy_end"]);
    #history["val_loss"].append(val_dict["loss"]);history["val_acc"].append(val_dict["accuracy"]);
    #scheduler.step(val_dict["loss"])
    #print(f"Evaluation Results: {eval_results}")
loop_end = timer()
print(f"Elapsed time: {(loop_end - loop_start):.4f}")

Epoch: 0, lr: 0.01, Train loss: 6.8148,  Train acc start: 0.1455, Train acc end: 0.2215, Time: 448.5750
Epoch: 1, lr: 0.01, Train loss: 6.4083,  Train acc start: 0.1675, Train acc end: 0.2437, Time: 448.5415
Epoch: 2, lr: 0.01, Train loss: 5.7889,  Train acc start: 0.2327, Train acc end: 0.3104, Time: 459.5925
Epoch: 3, lr: 0.01, Train loss: 5.2566,  Train acc start: 0.2760, Train acc end: 0.3550, Time: 461.8417
Epoch: 4, lr: 0.01, Train loss: 4.7785,  Train acc start: 0.3265, Train acc end: 0.4022, Time: 482.4080
Epoch: 5, lr: 0.01, Train loss: 4.5512,  Train acc start: 0.3528, Train acc end: 0.4252, Time: 496.4858
Epoch: 6, lr: 0.01, Train loss: 4.4082,  Train acc start: 0.3696, Train acc end: 0.4419, Time: 430.5539
Epoch: 7, lr: 0.01, Train loss: 4.3214,  Train acc start: 0.3789, Train acc end: 0.4522, Time: 428.6056


KeyboardInterrupt: 

In [35]:
eval_dict = build_evaluation_dict(model, train_data_loader, paragraphs_mapper, questions_mapper, device)

Might fail ['General', 'Electric', 'heavily', 'contaminated', 'the', 'Hudson', 'River', 'with', 'polychlorinated', 'biphenyls', '(PCBs)', 'between', '1947-77.', 'This', 'pollution', 'caused', 'a', 'range', 'of', 'harmful', 'effects', 'to', 'wildlife', 'and', 'people', 'who', 'eat', 'fish', 'from', 'the', 'river', 'or', 'drink', 'the', 'water.', 'In', 'response', 'to', 'this', 'contamination,', 'activists', 'protested', 'in', 'various', 'ways.', 'Musician', 'Pete', 'Seeger', 'founded', 'the', 'Hudson', 'River', 'Sloop', 'Clearwater', 'and', 'the', 'Clearwater', 'Festival', 'to', 'draw', 'attention', 'to', 'the', 'problem.', 'The', 'activism', 'led', 'to', 'the', 'site', 'being', 'designated', 'by', 'the', 'EPA', 'as', 'one', 'of', 'the', 'superfund', 'sites', 'requiring', 'extensive', 'cleanup.', 'Other', 'sources', 'of', 'pollution,', 'including', 'mercury', 'contamination', 'and', 'sewage', 'dumping,', 'have', 'also', 'contributed', 'to', 'problems', 'in', 'the', 'Hudson', 'River', 'w

In [47]:
qid = "5725b40389a1e219009abd0a"
print(questions_mapper[qid], eval_dict[qid])

['Of', 'what', 'ancestry', 'was', 'Napoleon?'] Corsica


In [36]:
eval_dict

{'56f8eee09e9bad19000a0724': 'Medicine',
 '5733a8d44776f41900660f81': 'threat.[citation',
 '56bf99aca10cfb14005511ab': 'million.',
 '5733fa9a4776f41900661628': 'May',
 '56de2cf1cffd8e1900b4b61f': 'Commensal',
 '56e8d85b0b45c0140094cd0c': 'Westminster,',
 '5733b49a4776f419006610c0': 'Beira,',
 '572fafeca23a5019007fc89d': 'model.',
 '56e7b57900c9c71400d7753d': '400,000',
 '572ac493111d821400f38d49': 'Philadelphia',
 '5725c35889a1e219009abe13': 'Cross',
 '572b63a8be1ee31400cb834d': 'Airport,',
 '572fa02704bcaa1900d76b0c': 'coiled,',
 '570e45630b85d914000d7dd5': '800,000',
 '5726a652f1498d1400e8e61a': '"As',
 '57294dda1d0469140077926e': 'JP).',
 '5731a004e99e3014001e6192': 'Obama,',
 '57284e224b864d190016490c': 'Vedanta',
 '5726d340dd62a815002e9174': 'William',
 '5726a73df1498d1400e8e630': '167.950.',
 '5726b8a5f1498d1400e8e8e1': 'Korea.',
 '56e032247aa994140058e34b': 'Cartoonists',
 '5729b86a3f37b31900478542': 'Lenovo',
 '570db4b716d0071400510d15': 'Emperor.',
 '57296bd76aef051400154e4c':

In [25]:
eval_results = evaluate_model_on_data(model, evaluator, train_data_loader, paragraphs_mapper, questions_mapper, device, debug=True)

IndexError: list index out of range