In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import FullVocabModel
from utils import count_params
from full_vocab_utils import train_model, evaluate, show_mistakes

torch.manual_seed(41)

<torch._C.Generator at 0x7f39e2672180>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
HIDDEN_SIZE = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 10
OPTIM = Adam

## Load data

In [3]:
filename = 'bobsue.prevsent.{}.tsv'

vocab = Vocabulary()
train_loader = get_dataloader(filename.format('train'), vocab, batch_size=BATCH_SIZE)
val_loader = get_dataloader(filename.format('dev'), vocab, batch_size=BATCH_SIZE)
test_loader = get_dataloader(filename.format('test'), vocab, batch_size=BATCH_SIZE)

# Log loss training w/o context

## Load model

In [4]:
model_wo_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_wo_context)

922699

## Train model

In [5]:
mistakes_wo_context = train_model(
    model=model_wo_context,
    filename='wo_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    pad_idx=vocab.pad_idx,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=False
)

Epoch:  1
	 Wall Time:  1.738 s
	Train Loss:  4.474 | Train Acc:  1.16%
	 Wall Time:  0.187 s
	 Val. Loss:  3.479 |  Val. Acc:  5.13%
	Model parameters saved to wo_context.pt
Epoch:  2
	 Wall Time:  1.772 s
	Train Loss:  3.197 | Train Acc:  13.48%
	 Wall Time:  0.210 s
	 Val. Loss:  2.958 |  Val. Acc:  23.30%
	Model parameters saved to wo_context.pt
Epoch:  3
	 Wall Time:  1.667 s
	Train Loss:  2.882 | Train Acc:  22.64%
	 Wall Time:  0.172 s
	 Val. Loss:  2.791 |  Val. Acc:  24.12%
	Model parameters saved to wo_context.pt
Epoch:  4
	 Wall Time:  1.693 s
	Train Loss:  2.753 | Train Acc:  23.74%
	 Wall Time:  0.171 s
	 Val. Loss:  2.630 |  Val. Acc:  25.02%
	Model parameters saved to wo_context.pt
Epoch:  5
	 Wall Time:  1.677 s
	Train Loss:  2.645 | Train Acc:  24.88%
	 Wall Time:  0.175 s
	 Val. Loss:  2.559 |  Val. Acc:  26.69%
	Model parameters saved to wo_context.pt
Epoch:  6
	 Wall Time:  1.715 s
	Train Loss:  2.573 | Train Acc:  26.17%
	 Wall Time:  0.173 s
	 Val. Loss:  2.496 | 

## Show top mistakes

In [6]:
show_mistakes(mistakes_wo_context, vocab, top=35)

Unnamed: 0,prediction,ground truth
0,He,Bob
1,He,She
2,He,Sue
3,the,his
4,the,a
5,.,and
6,was,had
7,the,her
8,.,to
9,he,she


# Log loss training w/ context

## Load model

In [7]:
model_w_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_w_context)

922699

## Train model

In [8]:
mistakes_w_context = train_model(
    model=model_w_context,
    filename='w_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    pad_idx=vocab.pad_idx,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=True
)

Epoch:  1
	 Wall Time:  2.013 s
	Train Loss:  4.503 | Train Acc:  0.07%
	 Wall Time:  0.214 s
	 Val. Loss:  3.746 |  Val. Acc:  0.00%
	Model parameters saved to w_context.pt
Epoch:  2
	 Wall Time:  1.806 s
	Train Loss:  3.314 | Train Acc:  12.56%
	 Wall Time:  0.174 s
	 Val. Loss:  3.038 |  Val. Acc:  20.45%
	Model parameters saved to w_context.pt
Epoch:  3
	 Wall Time:  1.926 s
	Train Loss:  2.938 | Train Acc:  21.89%
	 Wall Time:  0.205 s
	 Val. Loss:  2.785 |  Val. Acc:  23.88%
	Model parameters saved to w_context.pt
Epoch:  4
	 Wall Time:  1.861 s
	Train Loss:  2.765 | Train Acc:  24.01%
	 Wall Time:  0.196 s
	 Val. Loss:  2.652 |  Val. Acc:  25.47%
	Model parameters saved to w_context.pt
Epoch:  5
	 Wall Time:  1.883 s
	Train Loss:  2.656 | Train Acc:  25.58%
	 Wall Time:  0.179 s
	 Val. Loss:  2.548 |  Val. Acc:  27.17%
	Model parameters saved to w_context.pt
Epoch:  6
	 Wall Time:  1.988 s
	Train Loss:  2.559 | Train Acc:  26.67%
	 Wall Time:  0.187 s
	 Val. Loss:  2.493 |  Val.

## Show top mistakes

In [9]:
show_mistakes(mistakes_w_context, vocab, top=35)

Unnamed: 0,prediction,ground truth
0,He,Bob
1,He,She
2,He,Sue
3,.,and
4,the,his
5,was,had
6,the,a
7,.,to
8,he,she
9,to,.


# Evaluate trained models

## Load models

In [10]:
model_wo_context.load_state_dict(torch.load('wo_context.pt'))
model_w_context.load_state_dict(torch.load('w_context.pt'))

<All keys matched successfully>

## Evaluate models on test set

In [11]:
from torch import nn
loss_fn = nn.CrossEntropyLoss()

print('Model trained w/o context')
test_loss_wo_context, test_acc_wo_context, _ = evaluate(
    model_wo_context,
    test_loader,
    loss_fn,
    pad_idx=vocab.pad_idx,
    read_prev=False
)
print(f'\t Test Loss: {test_loss_wo_context: .3f} | Test Acc: {test_acc_wo_context * 100: .2f}%')

print('Model trained w/ context')
test_loss_w_context, test_acc_w_context, _ = evaluate(
    model_w_context,
    test_loader,
    loss_fn,
    pad_idx=vocab.pad_idx,
    read_prev=True
)
print(f'\t Test Loss: {test_loss_w_context: .3f} | Test Acc: {test_acc_w_context * 100: .2f}%')

Model trained w/o context
	 Wall Time:  0.179 s
	 Test Loss:  2.324 | Test Acc:  29.73%
Model trained w/ context
	 Wall Time:  0.190 s
	 Test Loss:  2.286 | Test Acc:  30.02%
