In [3]:
%load_ext autoreload
%autoreload 2

import torch
from config import conf
from torch.utils.data import DataLoader, Subset
from torch.optim import Adam
from transformers import DistilBertTokenizerFast, DistilBertForQuestionAnswering
from tqdm import tqdm
from dataset import SquadDataset
from pathlib import Path
from datetime import datetime
from training import train_loop, evaluate

torch.cuda.is_available()

True

In [4]:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
dataset = SquadDataset.from_json(conf['DATASET_FILE'], tokenizer)
train_dataset, val_dataset = dataset.train_val_split(conf['TRAIN_RATIO'])

train_dataset = Subset(train_dataset, range(500))
val_dataset = Subset(val_dataset, range(100))

model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this mode

In [5]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else: 
    device = torch.device('cpu')
model.to(device)

opt = torch.optim.Adam(model.parameters(), lr=5e-5)
train_loader = DataLoader(train_dataset, 
                          batch_size=conf['BATCH_SIZE'], 
                          shuffle=True)
val_loader = DataLoader(val_dataset, 
                        batch_size=conf['BATCH_SIZE'])

In [6]:
if conf['TRAIN_MODEL']:
    train_loss, train_acc, val_loss, val_acc = train_loop(model, train_loader, val_loader, opt, device)

    if conf['SAVE_MODEL']:
        Path(conf['MODELS_FOLDER']).mkdir(parents=True, exist_ok=True)
        filepath = f"{conf['MODELS_FOLDER']}/model_{datetime.today().strftime('%m%d')}.pt"
        torch.save(model.state_dict(), filepath)
        print(f'Model saved in {filepath}')

else:
    filepath = conf['MODELS_FOLDER'] + '/' + conf['MODEL_LOAD_NAME']
    model.load_state_dict(torch.load(filepath))
    print(f'Loaded model at {filepath}')

    n_val, val_loss, val_acc = evaluate(model, val_loader, device)
    val_loss /= n_val
    val_acc /= n_val
    print(f'Validation loss: {val_loss:.3f}')
    print(f'Validation accuracy: {val_acc:.3f}')

Loaded model at ./models/model_0125.pt
Validation loss: 0.224
Validation accuracy: 0.917
