In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import FullVocabModel
from utils import count_params
from full_vocab_utils import train_model, evaluate, show_mistakes

torch.manual_seed(41)

<torch._C.Generator at 0x7f45b1459180>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
HIDDEN_SIZE = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 10
OPTIM = Adam

## Load data

In [3]:
filename = 'bobsue.prevsent.{}.tsv'

vocab = Vocabulary()
train_loader = get_dataloader(filename.format('train'), vocab, batch_size=BATCH_SIZE)
val_loader = get_dataloader(filename.format('dev'), vocab, batch_size=BATCH_SIZE)
test_loader = get_dataloader(filename.format('test'), vocab, batch_size=BATCH_SIZE)

# Log loss training w/o context

## Load model

In [4]:
model_wo_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_wo_context)

922699

## Train model

In [5]:
mistakes_wo_context = train_model(
    model=model_wo_context,
    filename='wo_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=False
)

Epoch:  1
	 Wall Time:  2.043 s
	Train Loss:  4.461 | Train Acc:  35.26%
	 Wall Time:  0.198 s
	 Val. Loss:  3.462 |  Val. Acc:  40.07%
	Model parameters saved to wo_context.pt
Epoch:  2
	 Wall Time:  2.131 s
	Train Loss:  3.190 | Train Acc:  45.92%
	 Wall Time:  0.197 s
	 Val. Loss:  2.957 |  Val. Acc:  51.36%
	Model parameters saved to wo_context.pt
Epoch:  3
	 Wall Time:  2.041 s
	Train Loss:  2.883 | Train Acc:  51.51%
	 Wall Time:  0.190 s
	 Val. Loss:  2.796 |  Val. Acc:  51.72%
	Model parameters saved to wo_context.pt
Epoch:  4
	 Wall Time:  2.027 s
	Train Loss:  2.759 | Train Acc:  51.99%
	 Wall Time:  0.206 s
	 Val. Loss:  2.632 |  Val. Acc:  53.39%
	Model parameters saved to wo_context.pt
Epoch:  5
	 Wall Time:  2.019 s
	Train Loss:  2.648 | Train Acc:  52.96%
	 Wall Time:  0.203 s
	 Val. Loss:  2.564 |  Val. Acc:  53.87%
	Model parameters saved to wo_context.pt
Epoch:  6
	 Wall Time:  2.054 s
	Train Loss:  2.574 | Train Acc:  53.44%
	 Wall Time:  0.191 s
	 Val. Loss:  2.497 

## Show top mistakes

In [6]:
show_mistakes(mistakes_wo_context, vocab, top=35)

Unnamed: 0,prediction,ground truth
0,Bob,He
1,Bob,Sue
2,Bob,She
3,the,his
4,.,and
5,the,a
6,was,had
7,.,to
8,he,she
9,the,her


# Log loss training w/ context

## Load model

In [7]:
model_w_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_w_context)

922699

## Train model

In [8]:
mistakes_w_context = train_model(
    model=model_w_context,
    filename='w_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=True
)

Epoch:  1
	 Wall Time:  2.510 s
	Train Loss:  4.499 | Train Acc:  36.57%
	 Wall Time:  0.235 s
	 Val. Loss:  3.717 |  Val. Acc:  36.96%
	Model parameters saved to w_context.pt
Epoch:  2
	 Wall Time:  2.479 s
	Train Loss:  3.308 | Train Acc:  45.08%
	 Wall Time:  0.238 s
	 Val. Loss:  3.023 |  Val. Acc:  49.76%
	Model parameters saved to w_context.pt
Epoch:  3
	 Wall Time:  2.437 s
	Train Loss:  2.925 | Train Acc:  51.08%
	 Wall Time:  0.215 s
	 Val. Loss:  2.768 |  Val. Acc:  52.45%
	Model parameters saved to w_context.pt
Epoch:  4
	 Wall Time:  2.507 s
	Train Loss:  2.748 | Train Acc:  52.45%
	 Wall Time:  0.227 s
	 Val. Loss:  2.638 |  Val. Acc:  53.18%
	Model parameters saved to w_context.pt
Epoch:  5
	 Wall Time:  2.470 s
	Train Loss:  2.652 | Train Acc:  53.08%
	 Wall Time:  0.218 s
	 Val. Loss:  2.545 |  Val. Acc:  54.06%
	Model parameters saved to w_context.pt
Epoch:  6
	 Wall Time:  2.459 s
	Train Loss:  2.556 | Train Acc:  53.96%
	 Wall Time:  0.233 s
	 Val. Loss:  2.490 |  Va

## Show top mistakes

In [9]:
show_mistakes(mistakes_w_context, vocab, top=35)

Unnamed: 0,prediction,ground truth
0,He,Bob
1,He,She
2,He,Sue
3,.,and
4,the,his
5,was,had
6,the,her
7,the,a
8,.,to
9,he,she


# Evaluate trained models

## Load models

In [10]:
model_wo_context.load_state_dict(torch.load('wo_context.pt'))
model_w_context.load_state_dict(torch.load('w_context.pt'))

<All keys matched successfully>

## Evaluate models on test set

In [11]:
from torch import nn
loss_fn = nn.CrossEntropyLoss()

print('Model trained w/o context')
test_loss_wo_context, test_acc_wo_context, _ = evaluate(
    model_wo_context,
    test_loader,
    loss_fn,
    read_prev=False
)
print(f'\t Test Loss: {test_loss_wo_context: .3f} | Test Acc: {test_acc_wo_context * 100: .2f}%')

print('Model trained w/ context')
test_loss_w_context, test_acc_w_context, _ = evaluate(
    model_w_context,
    test_loader,
    loss_fn,
    read_prev=True
)
print(f'\tTest Loss: {test_loss_w_context: .3f} | Test Acc: {test_acc_w_context * 100: .2f}%')

Model trained w/o context
	 Wall Time:  0.211 s
	 Test Loss:  2.329 | Test Acc:  56.31%
Model trained w/ context
	 Wall Time:  0.237 s
	Test Loss:  2.286 | Test Acc:  56.90%
