In [1]:
import torch
from torch import nn
from transformers import BertConfig, BertTokenizer, BertForSequenceClassification, AdamW

from data import PaddedSentiDataset, get_dataloader
from learner import SentimentLearner

torch.manual_seed(41)
loss_fn = nn.BCEWithLogitsLoss()

config = BertConfig(num_labels=1)

### Hyperparameters

In [2]:
BATCH_SIZE = 64
OPTIM_CLS = AdamW

### Load data

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [4]:
%%time
FILENAME = 'senti.{}.tsv'

train_set = PaddedSentiDataset(FILENAME.format('train'), tokenizer)
valid_set = PaddedSentiDataset(FILENAME.format('dev'), tokenizer)
test_set = PaddedSentiDataset(FILENAME.format('test'), tokenizer)

train_loader = get_dataloader(train_set, batch_size=BATCH_SIZE)
valid_loader = get_dataloader(valid_set, batch_size=BATCH_SIZE)
test_loader = get_dataloader(test_set, batch_size=BATCH_SIZE)

CPU times: user 22.1 s, sys: 110 ms, total: 22.2 s
Wall time: 22.2 s


# Fine tune BERT

### Load model and learner

In [5]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [6]:
learner = SentimentLearner(
    model=model,
    train_loader=train_loader,
    valid_loader=valid_loader,
    loss_fn=loss_fn,
    optim_cls=OPTIM_CLS,
    lr=1e-6
)

### Train model

In [7]:
bert_filename = 'bert.pt'
learner.train(epochs=5, filename=bert_filename)

Epoch : 01	Wall time : 730.374s
	Train Loss: 0.458 | Train Acc: 77.80%
	Valid Loss: 0.311 | Valid Acc: 88.30%
	Model parameters saved to bert.pt
Epoch : 02	Wall time : 784.042s
	Train Loss: 0.273 | Train Acc: 89.65%
	Valid Loss: 0.267 | Valid Acc: 89.56%
	Model parameters saved to bert.pt
Epoch : 03	Wall time : 773.579s
	Train Loss: 0.232 | Train Acc: 91.12%
	Valid Loss: 0.253 | Valid Acc: 90.25%
	Model parameters saved to bert.pt
Epoch : 04	Wall time : 711.095s
	Train Loss: 0.207 | Train Acc: 92.18%
	Valid Loss: 0.251 | Valid Acc: 90.71%
	Model parameters saved to bert.pt
Epoch : 05	Wall time : 688.702s
	Train Loss: 0.189 | Train Acc: 92.89%
	Valid Loss: 0.243 | Valid Acc: 91.51%
	Model parameters saved to bert.pt


### Load best model to evaluate

In [8]:
learner.load_model_params(bert_filename)

In [9]:
%%time
test_loss, test_acc = learner.evaluate(test_loader)
print(f'\t Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')

	 Test Loss: 0.203 | Test Acc: 91.54%
CPU times: user 3.37 s, sys: 2.04 s, total: 5.41 s
Wall time: 5.41 s
