In [1]:
import os
os.chdir('..')

%load_ext autoreload
%autoreload 2

In [1]:
import datetime

import torch

import torch.nn as nn

from src.consts import *
from src.main import main, setup_torch, get_corpus
from src.model import RNNModel
from src.training import train, evaluate
from src.split_cross_entropy_loss import SplitCrossEntropyLoss
from src.utils import summary, check_cuda_mem, get_latest_model_file

ModuleNotFoundError: No module named 'src'

In [3]:
use_data_paralellization = False

In [4]:
setup_torch()
device = torch.device("cuda" if USE_CUDA else "cpu")
corpus = get_corpus()

# TODO remove these two lines
assert len(corpus.dictionary) == 602755
assert corpus.valid.size()[0] == 11606861
assert corpus.train.max() < len(corpus.dictionary)
assert corpus.valid.max() < len(corpus.dictionary)
assert corpus.test.max() < len(corpus.dictionary)

# code below is not updated, should get updated version on main.py
ntokens = len(corpus.dictionary)
model = RNNModel(MODEL_TYPE, ntokens, EMBEDDINGS_SIZE, HIDDEN_UNIT_COUNT, LAYER_COUNT, DROPOUT_PROB,
                 TIED)
if use_data_paralellization or USE_DATA_PARALLELIZATION:
    model = CustomDataParallel(model)
else:
    model.to(device)
criterion = nn.CrossEntropyLoss()

summary(model, criterion)

RNNModel(
  (drop): Dropout(p=0.2)
  (encoder): Embedding(602755, 200)
  (rnn): LSTM(200, 200, num_layers=2, dropout=0.2)
  (decoder): Linear(in_features=200, out_features=602755, bias=True)
)

encoder.weight torch.Size([602755, 200])
rnn.weight_ih_l0 torch.Size([800, 200])
rnn.weight_hh_l0 torch.Size([800, 200])
rnn.bias_ih_l0 torch.Size([800])
rnn.bias_hh_l0 torch.Size([800])
rnn.weight_ih_l1 torch.Size([800, 200])
rnn.weight_hh_l1 torch.Size([800, 200])
rnn.bias_ih_l1 torch.Size([800])
rnn.bias_hh_l1 torch.Size([800])
decoder.weight torch.Size([602755, 200])
decoder.bias torch.Size([602755])

Total Parameters: 121,796,955


In [None]:
train(model, corpus, criterion, device)

INFO 2019-05-24 17:20:20,605: | epoch   1 |   200/23759 batches | lr 20.00 | ms/batch 168.17 | loss  9.71 | ppl 16518.85
INFO 2019-05-24 17:20:54,223: | epoch   1 |   400/23759 batches | lr 20.00 | ms/batch 168.09 | loss  8.27 | ppl  3900.39
INFO 2019-05-24 17:21:28,045: | epoch   1 |   600/23759 batches | lr 20.00 | ms/batch 169.11 | loss  7.50 | ppl  1814.86
INFO 2019-05-24 17:22:01,898: | epoch   1 |   800/23759 batches | lr 20.00 | ms/batch 169.26 | loss  7.18 | ppl  1311.43
INFO 2019-05-24 17:22:35,704: | epoch   1 |  1000/23759 batches | lr 20.00 | ms/batch 169.03 | loss  6.97 | ppl  1066.40
INFO 2019-05-24 17:23:09,617: | epoch   1 |  1200/23759 batches | lr 20.00 | ms/batch 169.56 | loss  6.74 | ppl   841.74
INFO 2019-05-24 17:23:43,440: | epoch   1 |  1400/23759 batches | lr 20.00 | ms/batch 169.11 | loss  6.60 | ppl   735.89
INFO 2019-05-24 17:24:17,229: | epoch   1 |  1600/23759 batches | lr 20.00 | ms/batch 168.94 | loss  6.48 | ppl   653.93
INFO 2019-05-24 17:24:51,161: | 

In [6]:
# timestamp = datetime.datetime.now()
# with open(MODEL_FILE_NAME.format(timestamp), 'wb') as f:
#     torch.save(model, f)

In [6]:
# with open(MODEL_FILE_NAME.format(timestamp), 'rb') as f:
with open('models/trained_models/model-2019-05-24 17:19:46.971655.pt', 'rb') as f:
    model = torch.load(f)
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    model.rnn.flatten_parameters()

In [7]:
evaluate(model, corpus, criterion, device, use_test_data=True)

INFO 2019-05-27 10:57:05,109: -----------------------------------------------------------------------------------------
INFO 2019-05-27 10:57:05,110: Running eval
INFO 2019-05-27 10:57:05,110: -----------------------------------------------------------------------------------------
INFO 2019-05-27 10:57:31,470: |  1000/42211 batches | loss 175.47
INFO 2019-05-27 10:57:57,898: |  2000/42211 batches | loss 175.93
INFO 2019-05-27 10:58:24,412: |  3000/42211 batches | loss 176.24
INFO 2019-05-27 10:58:50,994: |  4000/42211 batches | loss 176.12
INFO 2019-05-27 10:59:17,711: |  5000/42211 batches | loss 175.86
INFO 2019-05-27 10:59:44,515: |  6000/42211 batches | loss 175.90
INFO 2019-05-27 11:00:11,298: |  7000/42211 batches | loss 175.92
INFO 2019-05-27 11:00:38,091: |  8000/42211 batches | loss 176.16
INFO 2019-05-27 11:01:04,898: |  9000/42211 batches | loss 176.03
INFO 2019-05-27 11:01:31,771: | 10000/42211 batches | loss 176.04
INFO 2019-05-27 11:01:58,692: | 11000/42211 batches | los

5.038112595037704

In [8]:
import math
print('Test perplexity:')
math.exp(5.038112595037704)

Test perplexity:


154.1787425140638

In [5]:
model_file_name = get_latest_model_file()
with open(model_file_name, 'rb') as f:
    model = torch.load(f)
    model.rnn.flatten_parameters()



In [6]:
evaluate(model, corpus, criterion, device, use_test_data=False)

INFO 2019-06-17 14:23:18,863: -----------------------------------------------------------------------------------------
INFO 2019-06-17 14:23:18,864: Running eval
INFO 2019-06-17 14:23:18,864: -----------------------------------------------------------------------------------------
33163it [15:50, 34.89it/s]


4.516977660576491

In [9]:
import math
print('Validation perplexity:')
math.exp(4.516977660576491)

Validation perplexity:


91.55845864543299

In [16]:
evaluate(model, corpus, criterion, device, use_train_data=True)

INFO 2019-06-17 14:48:39,366: -----------------------------------------------------------------------------------------
INFO 2019-06-17 14:48:39,366: Running eval
INFO 2019-06-17 14:48:39,367: -----------------------------------------------------------------------------------------
23760it [21:39, 18.28it/s]


3.921935869550003

In [18]:
import math
print('Training perplexity:')
math.exp(3.921935869550003)

Training perplexity:


50.498107965513185