In [1]:
import numpy as np
import os
import json
import dataset_utils as du
import eval_utils as eu
import torch
import torch.distributed as dist
from datetime import datetime
from transformers import BertTokenizer, BertForQuestionAnswering, AdamW
from tokenizers import BertWordPieceTokenizer
from torch.utils.data import DataLoader, DistributedSampler
from torch.nn.parallel import DistributedDataParallel
from tqdm import tqdm
from tqdm.notebook import tqdm

In [2]:
hf_model = 'bert-base-uncased'
bert_cache = os.path.join(os.getcwd(), 'cache')
save_path = os.path.join(bert_cache, f'{hf_model}-tokenizer')

In [3]:
# Load the fast tokenizer from saved file
tokenizer = BertWordPieceTokenizer(os.path.join(save_path, 'vocab.txt'),
                                   lowercase=True)

In [4]:
model = BertForQuestionAnswering.from_pretrained(
    hf_model,
    cache_dir=os.path.join(bert_cache, f'{hf_model}_qa')
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased a

In [5]:
eval_path = os.path.join(bert_cache, 'data', 'dev-v1.1.json')
with open(eval_path) as f:
    raw_eval_data = json.load(f)

In [6]:
max_len = 384
eval_squad_examples = du.create_squad_examples(raw_eval_data, max_len, tokenizer)
x_eval, y_eval = du.create_inputs_targets(eval_squad_examples)
print(f"{len(eval_squad_examples)} evaluation points created.")

10331 evaluation points created.


In [7]:
model_hash = '2021-12-23-095243'  # datetime.now().strftime("%Y-%m-%d-%H%M%S")
model_path_name = f'./cache/model_trained_2_nodes_{model_hash}'

# load the model on cpu
model.load_state_dict(
    torch.load(model_path_name,
               map_location=torch.device('cpu'))
)

# load the model on gpu
# model.load_state_dict(torch.load(model_path_name))

<All keys matched successfully>

In [8]:
import importlib
importlib.reload(eu)

samples = np.random.choice(len(x_eval[0]), 50, replace=False)

eu.EvalUtility(
    (x_eval[0][samples], x_eval[1][samples], x_eval[2][samples]),
    model,
    eval_squad_examples[samples]
).results()