# Question answering on the SQuAD dataset

In [6]:
import sys
import random
from functools import partial

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
import transformers
import tokenizers

from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.normalizers import Sequence, StripAccents, Lowercase, Strip
from tokenizers.pre_tokenizers import Sequence as PreSequence
from tokenizers.pre_tokenizers import Whitespace, Punctuation
from tokenizers import BertWordPieceTokenizer

import dataset
import model
import training
import utils

%load_ext autoreload
%autoreload 2
%matplotlib inline

  return torch._C._cuda_getDeviceCount() > 0


In [2]:
plt.rcParams['figure.figsize'] = [8, 6]
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.xmargin'] = .05
plt.rcParams['axes.ymargin'] = .05
plt.style.use('ggplot')

In [45]:
WANDB_PROJECT = "squad-qa"
WANDB_ENTITY = "wadaboa"
WANDB_MODE = "online"
WANDB_RESUME = "never"

init_wandb = partial(
    wandb.init,
    project=WANDB_PROJECT,
    entity=WANDB_ENTITY,
    mode=WANDB_MODE,
    resume=WANDB_RESUME,
)

In [2]:
!wandb disabled

W&B disabled.


## Preliminaries

### Raw data loading

In [7]:
squad_dataset = dataset.SquadDataset()

In [8]:
squad_dataset.raw_train_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,5733be284776f41900661182,To whom did the Virgin Mary allegedly appear i...,University_of_Notre_Dame,0,"Architecturally, the school has a Catholic cha...",Saint Bernadette Soubirous,515,541
1,5733be284776f4190066117f,What is in front of the Notre Dame Main Building?,University_of_Notre_Dame,0,"Architecturally, the school has a Catholic cha...",a copper statue of Christ,188,213
2,5733be284776f41900661180,The Basilica of the Sacred heart at Notre Dame...,University_of_Notre_Dame,0,"Architecturally, the school has a Catholic cha...",the Main Building,279,296
3,5733be284776f41900661181,What is the Grotto at Notre Dame?,University_of_Notre_Dame,0,"Architecturally, the school has a Catholic cha...",a Marian place of prayer and reflection,381,420
4,5733be284776f4190066117e,What sits on top of the Main Building at Notre...,University_of_Notre_Dame,0,"Architecturally, the school has a Catholic cha...",a golden statue of the Virgin Mary,92,126
...,...,...,...,...,...,...,...,...
87594,5735d259012e2f140011a09d,In what US state did Kathmandu first establish...,Kathmandu,18890,"Kathmandu Metropolitan City (KMC), in order to...",Oregon,229,235
87595,5735d259012e2f140011a09e,What was Yangon previously known as?,Kathmandu,18890,"Kathmandu Metropolitan City (KMC), in order to...",Rangoon,414,421
87596,5735d259012e2f140011a09f,With what Belorussian city does Kathmandu have...,Kathmandu,18890,"Kathmandu Metropolitan City (KMC), in order to...",Minsk,476,481
87597,5735d259012e2f140011a0a0,In what year did Kathmandu create its initial ...,Kathmandu,18890,"Kathmandu Metropolitan City (KMC), in order to...",1975,199,203


In [9]:
squad_dataset.raw_test_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56be4db0acb8001400a502ec,Which NFL team represented the AFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,Denver Broncos,177,191
1,56be4db0acb8001400a502ed,Which NFL team represented the NFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,Carolina Panthers,249,266
2,56be4db0acb8001400a502ee,Where did Super Bowl 50 take place?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,"Santa Clara, California",403,426
3,56be4db0acb8001400a502ee,Where did Super Bowl 50 take place?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,Levi's Stadium,355,369
4,56be4db0acb8001400a502ee,Where did Super Bowl 50 take place?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,Levi's Stadium in the San Francisco Bay Area a...,355,427
...,...,...,...,...,...,...,...,...
18211,5737aafd1c456719005744fd,What is a very seldom used unit of mass in the...,Force,2066,"The pound-force has a metric counterpart, less...",slug,274,278
18212,5737aafd1c456719005744fd,What is a very seldom used unit of mass in the...,Force,2066,"The pound-force has a metric counterpart, less...",metric slug,267,278
18213,5737aafd1c456719005744fd,What is a very seldom used unit of mass in the...,Force,2066,"The pound-force has a metric counterpart, less...",the metric slug,263,278
18214,5737aafd1c456719005744fe,What seldom used term of a unit of force equal...,Force,2066,"The pound-force has a metric counterpart, less...",kip,712,715


### Embeddings

In [96]:
UNK_TOKEN = "[UNK]"
PAD_TOKEN = "[PAD]"

- FastText: 
    - _fasttext-wiki-news-subwords_ (dimensions: 300)
- GloVe:
    - _glove-twitter_ (dimensions: 25. 50, 100, 200)
    - _glove-wiki-gigaword_ (dimensions: 50, 100, 200, 300)
- Word2Vec:
    - _word2vec-google-news_ (dimensions: 300)
    - _word2vec-ruscorpora_ (dimensions: 300)

In [97]:
# See https://github.com/RaRe-Technologies/gensim-data
GLOVE_EMBEDDING_DIMENSION = 50
GLOVE_MODEL_NAME = "glove-twitter"
glove_embedding_model = utils.load_embedding_model(
    GLOVE_MODEL_NAME, embedding_dimension=GLOVE_EMBEDDING_DIMENSION
)

In [98]:
glove_unk = np.mean(glove_embedding_model.vectors, axis=0)
glove_embedding_model.add(UNK_TOKEN, glove_unk)

In [99]:
glove_embedding_model[UNK_TOKEN]

array([-0.21896735,  0.17269313, -0.05617283,  0.06307325,  0.00960657,
       -0.23461065, -0.16731773, -0.25613925,  0.12990713, -0.34179848,
       -0.07411992,  0.00533567,  0.7090377 , -0.1139018 ,  0.10613882,
        0.09186497,  0.15880948,  0.03158554,  0.2241412 ,  0.20387109,
        0.05305386,  0.04961218,  0.11807557, -0.10199773, -0.18345806,
        0.56560194,  0.07183363,  0.04322447, -0.39442873,  0.06828266,
        0.39542177,  0.08794834,  0.41605434, -0.27820984, -0.5106833 ,
       -0.16443801,  0.0973425 ,  0.02233286,  0.19346187,  0.15909852,
        0.886585  , -0.01498107,  0.10211241, -0.12959567, -0.328366  ,
        0.13014658, -0.02061043,  0.05735753,  0.14008364,  0.22588447],
      dtype=float32)

In [100]:
list(glove_embedding_model.vocab.keys())[-1]

'[UNK]'

In [101]:
any(np.all(glove_embedding_model.vectors == 0, axis=1))

False

In [102]:
glove_embedding_model.add(PAD_TOKEN, np.zeros((1, GLOVE_EMBEDDING_DIMENSION)))
glove_embedding_model[PAD_TOKEN]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

In [103]:
list(glove_embedding_model.vocab.keys())[-1]

'[PAD]'

In [104]:
glove_embedding_model.vectors.shape

(1193516, 50)

In [105]:
glove_vocab = dict(
    zip(glove_embedding_model.index2word, range(len(glove_embedding_model.index2word)))
)

In [106]:
glove_embedding_layer = nn.Embedding(
    glove_embedding_model.vectors.shape[0],
    GLOVE_EMBEDDING_DIMENSION,
    padding_idx=glove_vocab[PAD_TOKEN],
)
glove_embedding_layer.weight = nn.Parameter(
    torch.from_numpy(glove_embedding_model.vectors)
)
glove_embedding_layer.weight.requires_grad = False

### Standard tokenizer and preprocessing

In [107]:
MAX_CONTEXT_TOKENS = 300

In [127]:
standard_question_tokenizer = Tokenizer(WordLevel(glove_vocab, unk_token=UNK_TOKEN))
standard_question_tokenizer.normalizer = Sequence(
    [StripAccents(), Lowercase(), Strip()]
)
standard_question_tokenizer.pre_tokenizer = PreSequence([Whitespace(), Punctuation()])
standard_question_tokenizer.enable_padding(
    direction="right", pad_id=glove_vocab[PAD_TOKEN], pad_type_id=1, pad_token=PAD_TOKEN
)

standard_context_tokenizer = Tokenizer(WordLevel(glove_vocab, unk_token=UNK_TOKEN))
standard_context_tokenizer.normalizer = Sequence([StripAccents(), Lowercase(), Strip()])
standard_context_tokenizer.pre_tokenizer = PreSequence([Whitespace(), Punctuation()])
standard_context_tokenizer.enable_padding(
    direction="right",
    pad_id=glove_vocab[PAD_TOKEN],
    pad_type_id=1,
    pad_token=PAD_TOKEN,
    length=MAX_CONTEXT_TOKENS,
)
standard_context_tokenizer.enable_truncation(MAX_CONTEXT_TOKENS)

In [128]:
standard_tokenizer = dataset.StandardSquadTokenizer(
    standard_question_tokenizer, standard_context_tokenizer
)

In [129]:
standard_dm = dataset.SquadDataManager(squad_dataset, standard_tokenizer)

In [121]:
standard_dm.tokenizer = standard_tokenizer

In [111]:
standard_dm.train_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56cc239e6d243a140015eeb7,Who were Wang Jiawei and Nyima Gyaincain?,Sino-Tibetan_relations_during_the_Ming_dynasty,299,The exact nature of relations between Tibet an...,[Mainland Chinese scholars],[274],[299]
1,56cc27346d243a140015eeba,What important trade did the Ming Dynasty have...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[horse trade],[338],[349]
2,56cc27346d243a140015eebb,During what years did the Mongol leader Kublai...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[1402–1424],[739],[748]
3,56cc27346d243a140015eebc,Who did the Yongle Emperor try to build a reli...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[Deshin Shekpa],[821],[834]
4,56cc27346d243a140015eebd,Deshin Shekpa was the head of what school?,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[the Karma Kagyu school],[863],[885]
...,...,...,...,...,...,...,...,...
69864,573636bf9c79961900ff7e06,What Botswana was resently forced to do?,Hunting,18832,"In contrast, Botswana has recently been forced...",[ban trophy hunting],[50],[68]
69865,573636bf9c79961900ff7e07,What animal declined across Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[antelope],[126],[134]
69866,573636bf9c79961900ff7e08,What animal numbers have increased in Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[hippopotamus],[251],[263]
69867,573636bf9c79961900ff7e09,What animal numbers remain stable in Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[elephant],[214],[222]


In [112]:
standard_dm.val_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56be85543aeaaa14008c9063,When did Beyonce start becoming popular?,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[in the late 1990s],[269],[286]
1,56be85543aeaaa14008c9065,What areas did Beyonce compete in when she was...,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[singing and dancing],[207],[226]
2,56be85543aeaaa14008c9066,When did Beyonce leave Destiny's Child and bec...,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[2003],[526],[530]
3,56be86cf3aeaaa14008c9076,"After her second solo album, what other entert...",Beyoncé,56,Following the disbandment of Destiny's Child i...,[acting],[207],[213]
4,56be86cf3aeaaa14008c9078,Which artist did Beyonce marry?,Beyoncé,56,Following the disbandment of Destiny's Child i...,[Jay Z],[369],[374]
...,...,...,...,...,...,...,...,...
17282,573445bbacc1501500babd6d,Why is cycling popular in Tucson?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[its flat terrain and dry climate],[36],[68]
17283,573445bbacc1501500babd6e,What is The Loop?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[a network of seven linear parks],[254],[285]
17284,573445bbacc1501500babd6f,How many miles of trails are in The Loop?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[over 100],[297],[305]
17285,573445bbacc1501500babd70,What organization advises the Tucson governmen...,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[Tucson-Pima County Bicycle Advisory Committee],[429],[474]


In [113]:
standard_dm.test_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56be4db0acb8001400a502ec,Which NFL team represented the AFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Denver Broncos],[177],[191]
1,56be4db0acb8001400a502ed,Which NFL team represented the NFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Carolina Panthers],[249],[266]
2,56be4db0acb8001400a502ee,Where did Super Bowl 50 take place?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,"[Santa Clara, California, Levi's Stadium, Levi...","[403, 355, 355]","[426, 369, 427]"
3,56be4db0acb8001400a502ef,Which NFL team won Super Bowl 50?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Denver Broncos],[177],[191]
4,56be4db0acb8001400a502f0,What color was used to emphasize the 50th anni...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[gold],[521],[525]
...,...,...,...,...,...,...,...,...
10525,5737aafd1c456719005744fb,What is the metric term less used than the New...,Force,2066,"The pound-force has a metric counterpart, less...","[kilogram-force, pound-force, kilogram-force (...","[82, 4, 82, 78]","[96, 15, 102, 98]"
10526,5737aafd1c456719005744fc,What is the kilogram-force sometimes reffered ...,Force,2066,"The pound-force has a metric counterpart, less...",[kilopond],[114],[122]
10527,5737aafd1c456719005744fd,What is a very seldom used unit of mass in the...,Force,2066,"The pound-force has a metric counterpart, less...","[slug, metric slug, the metric slug]","[274, 267, 263]","[278, 278, 278]"
10528,5737aafd1c456719005744fe,What seldom used term of a unit of force equal...,Force,2066,"The pound-force has a metric counterpart, less...",[kip],[712],[715]


## Baseline model

In [130]:
baseline_model = model.QABaselineModel(glove_embedding_layer, MAX_CONTEXT_TOKENS)
baseline_model.count_parameters()

101400

In [131]:
baseline_args = transformers.TrainingArguments(
    output_dir="./checkpoints",
    logging_dir="./runs",
    logging_first_step=True,
    logging_steps=5,
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=1e-3,
    num_train_epochs=10,
    remove_unused_columns=False,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=64,
    label_names=["answers"]
)

In [132]:
baseline_trainer = training.SquadTrainer(
    model=baseline_model,
    args=baseline_args,
    data_collator=standard_dm.tokenizer,
    train_dataset=standard_dm.train_dataset,
    eval_dataset=standard_dm.val_dataset,
    compute_metrics=training.compute_metrics,
)

In [125]:
baseline_run_name = utils.get_run_name()
baseline_wandb_logger = init_wandb(
    name=baseline_run_name, group="baseline", reinit=True,
)

NameError: name 'init_wandb' is not defined

In [133]:
baseline_trainer.train()

./checkpoints
tensor([[129]]) tensor([[140]])
tensor([[22]]) tensor([[25]])
tensor([[45]]) tensor([[47]])
tensor([[16]]) tensor([[17]])
{'57317a2505b4da19006bd1e0': tensor([   7450,       4,      13,   82602,    3155,  133035,    7016,      58,
             13,     122,    4493,       4,   63600, 1193514,      13,  126345,
             39,    5718,   32478,    5154,       1,      53,   84608,      35,
             11,   11647,    8162,      35,    6865,    1781,   26959,      45,
         202769,   26183,     133,      13,     291]), '572804ce2ca10214002d9bb4': tensor([    325,    4768,      39,      13,    1296,   36609,   12519,      93,
           6110,      35,      13,   36609,       1,     107,    1527,      13,
        1193514,   37224,  782555,    5354,       4,     965,   13790,     124,
             13,    3650,    7747,      37,      13,   12519,       1,      13,
            125,      93,    3208,  211455,    5154]), '5727e53bff5b5019007d97c8': tensor([   489,   3187,     1

Epoch,Training Loss,Validation Loss


tensor([[145]]) tensor([[148]])
tensor([[38]]) tensor([[55]])
tensor([[13]]) tensor([[14]])
tensor([[62]]) tensor([[62]])
{'570b57586b8089140040f8aa': tensor([     26,      13,   50979,  173784,    6394,      11,    5920,      35,
        1193514,      92,      13,    4417,   19907,      13,   36275,      39,
          32744,    1477,   48467,    3187,     393,      13,   10278,   76943,
          12211,     109,  114776,     124,      11,   49497,      39,     350,
          54665,    6244,     124,  471622,       1]), '56dfc460231d4119001abdc5': tensor([    329,    9375,  268376,  790591,      66,  275564,    2552,     153,
           1158,   11605,    2552,      17, 1081460,      20,       4,   17317,
         114366,      39,    3825,    1471,    1174,    4285,       4,     663,
          17069,      96,     598,      37,      13,    3825,  162561,     169,
           3825,      45,     196,    8570,      64]), '56e0838f231d4119001ac232': tensor([   4, 1012,   39,  965,   70, 2821]

KeyboardInterrupt: 

In [610]:
baseline_wandb_logger.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

## BiDAF

In [596]:
bidaf_model = model.BiDAFModel(glove_embedding_layer)
bidaf_model.count_parameters()

314350

In [597]:
bidaf_args = transformers.TrainingArguments(
    output_dir="./checkpoints",
    logging_dir="./runs",
    logging_first_step=True,
    logging_steps=5,
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=1e-3,
    num_train_epochs=10,
    remove_unused_columns=False,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=64,
    label_names=["answers"]
)

In [598]:
bidaf_optimizer = optim.Adadelta(bidaf_model.parameters(), lr=0.5)
bidaf_lr_scheduler = optim.lr_scheduler.ExponentialLR(bidaf_optimizer, gamma=.999)

In [599]:
bidaf_trainer = training.SquadTrainer(
    model=bidaf_model,
    args=bidaf_args,
    data_collator=standard_dm.tokenizer,
    train_dataset=standard_dm.train_dataset,
    eval_dataset=standard_dm.val_dataset,
    optimizers=(bidaf_optimizer, bidaf_lr_scheduler),
    compute_metrics=training.compute_metrics,
)

In [602]:
bidaf_run_name = utils.get_run_name()
bidaf_wandb_logger = init_wandb(name=bidaf_run_name, group="bidaf", reinit=True,)

[34m[1mwandb[0m: wandb version 0.10.14 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [603]:
bidaf_trainer.train()

./checkpoints


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [601]:
bidaf_wandb_logger.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

## BERT

In [73]:
MAX_BERT_TOKENS = 512

In [89]:
bert_model = model.QABertModel()

In [90]:
bert_wp_tokenizer = BertWordPieceTokenizer("data/bert-base-uncased-vocab.txt", lowercase=True)
bert_wp_tokenizer.enable_padding(
    direction="right",
    pad_type_id=1,
)
bert_wp_tokenizer.enable_truncation(MAX_BERT_TOKENS)

In [91]:
bert_tokenizer = dataset.BertSquadTokenizer(bert_wp_tokenizer)

In [14]:
bert_dm = dataset.SquadDataManager(squad_dataset, bert_tokenizer)

In [92]:
bert_dm.tokenizer = bert_tokenizer

In [16]:
bert_dm.train_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56cc239e6d243a140015eeb7,Who were Wang Jiawei and Nyima Gyaincain?,Sino-Tibetan_relations_during_the_Ming_dynasty,299,The exact nature of relations between Tibet an...,[Mainland Chinese scholars],[274],[299]
1,56cc27346d243a140015eeba,What important trade did the Ming Dynasty have...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[horse trade],[338],[349]
2,56cc27346d243a140015eebb,During what years did the Mongol leader Kublai...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[1402–1424],[739],[748]
3,56cc27346d243a140015eebc,Who did the Yongle Emperor try to build a reli...,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[Deshin Shekpa],[821],[834]
4,56cc27346d243a140015eebd,Deshin Shekpa was the head of what school?,Sino-Tibetan_relations_during_the_Ming_dynasty,300,Some scholars note that Tibetan leaders during...,[the Karma Kagyu school],[863],[885]
...,...,...,...,...,...,...,...,...
70022,573636bf9c79961900ff7e06,What Botswana was resently forced to do?,Hunting,18832,"In contrast, Botswana has recently been forced...",[ban trophy hunting],[50],[68]
70023,573636bf9c79961900ff7e07,What animal declined across Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[antelope],[126],[134]
70024,573636bf9c79961900ff7e08,What animal numbers have increased in Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[hippopotamus],[251],[263]
70025,573636bf9c79961900ff7e09,What animal numbers remain stable in Botswana?,Hunting,18832,"In contrast, Botswana has recently been forced...",[elephant],[214],[222]


In [17]:
bert_dm.val_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56be85543aeaaa14008c9063,When did Beyonce start becoming popular?,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[in the late 1990s],[269],[286]
1,56be85543aeaaa14008c9065,What areas did Beyonce compete in when she was...,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[singing and dancing],[207],[226]
2,56be85543aeaaa14008c9066,When did Beyonce leave Destiny's Child and bec...,Beyoncé,55,Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...,[2003],[526],[530]
3,56be86cf3aeaaa14008c9076,"After her second solo album, what other entert...",Beyoncé,56,Following the disbandment of Destiny's Child i...,[acting],[207],[213]
4,56be86cf3aeaaa14008c9078,Which artist did Beyonce marry?,Beyoncé,56,Following the disbandment of Destiny's Child i...,[Jay Z],[369],[374]
...,...,...,...,...,...,...,...,...
17332,573445bbacc1501500babd6d,Why is cycling popular in Tucson?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[its flat terrain and dry climate],[36],[68]
17333,573445bbacc1501500babd6e,What is The Loop?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[a network of seven linear parks],[254],[285]
17334,573445bbacc1501500babd6f,How many miles of trails are in The Loop?,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[over 100],[297],[305]
17335,573445bbacc1501500babd70,What organization advises the Tucson governmen...,"Tucson,_Arizona",16649,Cycling is popular in Tucson due to its flat t...,[Tucson-Pima County Bicycle Advisory Committee],[429],[474]


In [18]:
bert_dm.test_df

Unnamed: 0,question_id,question,title,context_id,context,answer,answer_start,answer_end
0,56be4db0acb8001400a502ec,Which NFL team represented the AFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Denver Broncos],[177],[191]
1,56be4db0acb8001400a502ed,Which NFL team represented the NFC at Super Bo...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Carolina Panthers],[249],[266]
2,56be4db0acb8001400a502ee,Where did Super Bowl 50 take place?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,"[Santa Clara, California, Levi's Stadium, Levi...","[403, 355, 355]","[426, 369, 427]"
3,56be4db0acb8001400a502ef,Which NFL team won Super Bowl 50?,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[Denver Broncos],[177],[191]
4,56be4db0acb8001400a502f0,What color was used to emphasize the 50th anni...,Super_Bowl_50,0,Super Bowl 50 was an American football game to...,[gold],[521],[525]
...,...,...,...,...,...,...,...,...
10559,5737aafd1c456719005744fb,What is the metric term less used than the New...,Force,2066,"The pound-force has a metric counterpart, less...","[kilogram-force, pound-force, kilogram-force (...","[82, 4, 82, 78]","[96, 15, 102, 98]"
10560,5737aafd1c456719005744fc,What is the kilogram-force sometimes reffered ...,Force,2066,"The pound-force has a metric counterpart, less...",[kilopond],[114],[122]
10561,5737aafd1c456719005744fd,What is a very seldom used unit of mass in the...,Force,2066,"The pound-force has a metric counterpart, less...","[slug, metric slug, the metric slug]","[274, 267, 263]","[278, 278, 278]"
10562,5737aafd1c456719005744fe,What seldom used term of a unit of force equal...,Force,2066,"The pound-force has a metric counterpart, less...",[kip],[712],[715]


In [93]:
bert_args = transformers.TrainingArguments(
    output_dir="./checkpoints",
    logging_dir="./runs",
    logging_first_step=True,
    logging_steps=5,
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=1e-3,
    num_train_epochs=10,
    remove_unused_columns=False,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=64,
    label_names=["answers"]
)

In [94]:
bert_trainer = training.SquadTrainer(
    model=bert_model,
    args=bert_args,
    data_collator=bert_dm.tokenizer,
    train_dataset=bert_dm.train_dataset,
    eval_dataset=bert_dm.val_dataset,
    compute_metrics=training.compute_metrics,
)

In [87]:
bert_run_name = utils.get_run_name()
bert_wandb_logger = init_wandb(name=bert_run_name, group="bert", reinit=True,)

NameError: name 'init_wandb' is not defined

In [95]:
bert_trainer.train()

./checkpoints
tensor([[80]]) tensor([[81]])
tensor([[86]]) tensor([[93]])
tensor([[20]]) tensor([[25]])
tensor([[43]]) tensor([[43]])
tensor([[-28.2279, -27.9948, -27.9620, -28.0944, -27.6715, -27.4422, -27.8649,
         -27.4422, -27.7152, -27.8789, -27.4151, -28.2127, -28.3322, -27.5018,
         -27.8195, -27.8419, -28.3313, -27.4956,  -4.7595,  -4.3950,  -4.7152,
          -5.5549,  -5.4272,  -5.2651, -28.0315,  -4.7880, -28.4162, -27.1522,
          -4.5501,  -4.9171,  -5.3770,  -4.9928, -28.0852, -27.9848,  -4.4202,
          -4.4163,  -5.3207,  -4.0691,  -4.4017,  -4.9169,  -5.0050,  -5.1494,
          -4.4633,  -4.4163,  -5.0150,  -4.7157,  -4.6039,  -4.6479,  -4.8473,
          -4.0797,  -5.0550,  -4.9860,  -5.0138,  -4.5126,  -4.1783,  -4.4163,
          -4.7556,  -4.4163,  -4.9390,  -4.3240,  -4.3448,  -4.5085,  -4.5084,
          -4.4163,  -4.5745,  -4.6542,  -5.0019,  -4.3031,  -4.5936,  -4.5363,
          -4.4266,  -4.4163, -27.7494,  -4.9956, -27.3273, -27.3743,  -4.845

Epoch,Training Loss,Validation Loss


tensor([[83]]) tensor([[85]])
tensor([[134]]) tensor([[134]])
tensor([[61]]) tensor([[61]])
tensor([[12]]) tensor([[13]])
tensor([[-26.9452, -26.3145, -25.9474, -25.8639, -25.8632, -26.2330, -25.8189,
         -26.4178, -23.6844, -25.9225, -25.8274, -26.0865, -26.3029, -25.5052,
         -23.6844, -25.4123, -25.7873, -26.0918,  -3.4331,  -3.3809,  -3.3076,
          -3.1896,  -3.5927,  -3.6631, -26.3475, -26.5378, -25.9717,  -3.2106,
          -3.4700,  -3.5464,  -3.4658,  -3.5342,  -0.6585,  -2.7186,  -2.8398,
         -23.6844, -26.3392, -26.3266, -27.0548, -26.4039, -26.3406, -26.4131,
         -25.9988, -25.8577, -23.6844, -26.0394, -26.0538, -25.9302, -23.6844,
         -26.2055, -25.0170, -26.0796, -25.4455, -26.3164, -23.6844, -26.1473,
         -25.8210, -23.6844, -25.8622, -24.9576, -26.0664, -26.4875, -26.5129,
         -23.6844, -25.5965, -23.6844, -26.4094, -26.7691, -25.8726, -25.7941,
         -23.6844, -26.1499, -25.8083, -25.5834, -25.9957, -25.8189, -25.4700,
         

KeyboardInterrupt: 

In [40]:
bert_wandb_logger.finish()