In [1]:
import torch
from data import Vocabulary, get_dataloader
from models import FullVocabModel
from utils import count_params
from full_vocab_utils import train_model, evaluate, show_mistakes

torch.manual_seed(41)

<torch._C.Generator at 0x7fc38d0e8180>

## Hyperparameters

In [2]:
from torch.optim import Adam

BATCH_SIZE = 64
EMBEDDING_DIM = 200
HIDDEN_SIZE = 200
EMBED_DROPOUT = 0.5
RNN_DROPOUT = 0.5
LR = 1e-3
EPOCHS = 10
OPTIM = Adam

## Load data

In [3]:
filename = 'bobsue.prevsent.{}.tsv'

vocab = Vocabulary()
train_loader = get_dataloader(filename.format('train'), vocab, batch_size=BATCH_SIZE)
val_loader = get_dataloader(filename.format('dev'), vocab, batch_size=BATCH_SIZE)
test_loader = get_dataloader(filename.format('test'), vocab, batch_size=BATCH_SIZE)

# Log loss training w/o context

## Load model

In [4]:
model_wo_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_wo_context)

922699

## Train model

In [5]:
mis_preds_wo_context = train_model(
    model=model_wo_context,
    filename='wo_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=False
)

Epoch:  1
	Train time: 0m 1.69s
	Train Loss:  4.474 | Train Acc:  35.16%
	 Val. time: 0m 0.173s
	 Val. Loss:  3.479 |  Val. Acc:  40.19%
	Model parameters saved to wo_context.pt
Epoch:  2
	Train time: 0m 1.742s
	Train Loss:  3.197 | Train Acc:  45.66%
	 Val. time: 0m 0.188s
	 Val. Loss:  2.958 |  Val. Acc:  51.37%
	Model parameters saved to wo_context.pt
Epoch:  3
	Train time: 0m 1.744s
	Train Loss:  2.882 | Train Acc:  51.43%
	 Val. time: 0m 0.172s
	 Val. Loss:  2.791 |  Val. Acc:  51.69%
	Model parameters saved to wo_context.pt
Epoch:  4
	Train time: 0m 1.735s
	Train Loss:  2.753 | Train Acc:  52.00%
	 Val. time: 0m 0.184s
	 Val. Loss:  2.630 |  Val. Acc:  53.31%
	Model parameters saved to wo_context.pt
Epoch:  5
	Train time: 0m 1.737s
	Train Loss:  2.645 | Train Acc:  52.93%
	 Val. time: 0m 0.195s
	 Val. Loss:  2.559 |  Val. Acc:  54.00%
	Model parameters saved to wo_context.pt
Epoch:  6
	Train time: 0m 1.724s
	Train Loss:  2.573 | Train Acc:  53.49%
	 Val. time: 0m 0.172s
	 Val. Lo

## Show top mistakes

In [6]:
show_mistakes(mis_preds_wo_context, vocab)

Unnamed: 0,prediction,ground truth,count
0,He,Bob,164
1,He,She,96
2,He,Sue,93
3,the,his,70
4,the,a,65
5,.,and,65
6,was,had,58
7,the,her,50
8,.,to,48
9,he,she,47


# Log loss training w/ context

## Load model

In [7]:
model_w_context = FullVocabModel(
    vocab_size=len(vocab),
    embedding_dim=EMBEDDING_DIM,
    hidden_size=HIDDEN_SIZE,
    padding_idx=vocab.pad_idx,
    embed_dropout=EMBED_DROPOUT,
    rnn_dropout=RNN_DROPOUT
)
count_params(model_w_context)

922699

## Train model

In [8]:
mis_preds_w_context = train_model(
    model=model_w_context,
    filename='w_context.pt', 
    train_loader=train_loader,
    val_loader=val_loader,
    optim=OPTIM,
    lr=LR,
    epochs=EPOCHS,
    read_prev=True
)

Epoch:  1
	Train time: 0m 1.885s
	Train Loss:  4.503 | Train Acc:  36.59%
	 Val. time: 0m 0.194s
	 Val. Loss:  3.746 |  Val. Acc:  36.96%
	Model parameters saved to w_context.pt
Epoch:  2
	Train time: 0m 1.954s
	Train Loss:  3.314 | Train Acc:  45.00%
	 Val. time: 0m 0.195s
	 Val. Loss:  3.038 |  Val. Acc:  49.68%
	Model parameters saved to w_context.pt
Epoch:  3
	Train time: 0m 1.901s
	Train Loss:  2.938 | Train Acc:  51.05%
	 Val. time: 0m 0.181s
	 Val. Loss:  2.785 |  Val. Acc:  52.26%
	Model parameters saved to w_context.pt
Epoch:  4
	Train time: 0m 2.025s
	Train Loss:  2.765 | Train Acc:  52.40%
	 Val. time: 0m 0.183s
	 Val. Loss:  2.652 |  Val. Acc:  53.26%
	Model parameters saved to w_context.pt
Epoch:  5
	Train time: 0m 1.869s
	Train Loss:  2.656 | Train Acc:  53.17%
	 Val. time: 0m 0.191s
	 Val. Loss:  2.548 |  Val. Acc:  54.32%
	Model parameters saved to w_context.pt
Epoch:  6
	Train time: 0m 1.891s
	Train Loss:  2.559 | Train Acc:  53.94%
	 Val. time: 0m 0.183s
	 Val. Loss: 

## Show top mistakes

In [9]:
show_mistakes(mis_preds_w_context, vocab)

Unnamed: 0,prediction,ground truth,count
0,He,Bob,163
1,He,She,96
2,He,Sue,93
3,.,and,62
4,the,his,61
5,was,had,58
6,the,a,48
7,.,to,47
8,he,she,46
9,to,.,43


# Evaluate trained models

## Load models

In [10]:
model_wo_context.load_state_dict(torch.load('wo_context.pt'))
model_w_context.load_state_dict(torch.load('w_context.pt'))

<All keys matched successfully>

## Evaluate models on test set

In [11]:
from torch import nn
loss_fn = nn.CrossEntropyLoss()

_, (test_loss_wo_context, test_acc_wo_context, mis_preds_test_wo_context) = evaluate(
    model_wo_context,
    test_loader,
    loss_fn,
    read_prev=False
)
_, (test_loss_w_context, test_acc_w_context, mis_preds_test_w_context) = evaluate(
    model_w_context,
    test_loader,
    loss_fn,
    read_prev=True
)

print('Model trained w/o context')
print(f'\tTest Loss: {test_loss_wo_context: .3f} | Test Acc: {test_acc_wo_context * 100: .2f}%')
print('Model trained w/ context')
print(f'\tTest Loss: {test_loss_w_context: .3f} | Test Acc: {test_acc_w_context * 100: .2f}%')

Model trained w/o context
	Test Loss:  2.324 | Test Acc:  56.20%
Model trained w/ context
	Test Loss:  2.286 | Test Acc:  57.02%
