In [1]:
import numpy as np
import csv
import torch

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/data/rgur/courses/cs_7643_deep_learning/hw4/')

# Just run this block. Please do not modify the following code.
import math
import time

# Pytorch package
import torch
import torch.nn as nn
import torch.optim as optim

# Torchtest package
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator, Example, TabularDataset, interleave_keys

# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm

# Code provide to you for training and evaluation
from hw4_code.utils import train, evaluate, set_seed_nb, unit_test_values

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("You are using device: %s" % device)

You are using device: cuda


# En -> Ro

In [2]:
# You don't need to modify any code in this block

# Define the maximum length of the sentence. Shorter sentences will be padded to that length and longer sentences will be croped. Given that the average length of the sentence in the corpus is around 13, we can set it to 20
MAX_LEN = 20

# Define the source and target language
SRC = Field(tokenize = "spacy",
            tokenizer_language="en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

TRG = Field(tokenize = "spacy",
            tokenizer_language="ro_core_news_sm",
            #tokenizer_language="de",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)



In [11]:
train_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/train.roen.df.short.tsv',format='TSV',fields={'translation':('src',SRC),
'original':('trg',TRG)})

val_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/dev.roen.df.short.tsv',format='TSV',fields={'translation':('src',SRC),
'original':('trg',TRG)})

In [12]:
# Define Batchsize
BATCH_SIZE = 128

# Build the vocabulary associated with each language
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

# Get the padding index to be ignored later in loss calculation
PAD_IDX = TRG.vocab.stoi['<pad>']

In [13]:
# Get data-loaders using BucketIterator
train_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    train_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

val_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    val_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

# Get the input and the output sizes for model
input_size = len(SRC.vocab)
output_size = len(TRG.vocab)

In [14]:
input_size, output_size

(6477, 9549)

In [15]:
from hw4_code.models.Transformer import TransformerTranslator

In [16]:
# Hyperparameters
learning_rate = .001
EPOCHS = 250

# Model
trans_model = TransformerTranslator(input_size, output_size, device, max_length = MAX_LEN).to(device)

# optimizer = optim.Adam(model.parameters(), lr = learning_rate)
optimizer = torch.optim.Adam(trans_model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [17]:
best_avg_val_loss = np.inf
for epoch_idx in range(EPOCHS):
    print("-----------------------------------")
    print("Epoch %d" % (epoch_idx+1))
    print("-----------------------------------")
    
    train_loss, avg_train_loss = train(trans_model, train_loader, optimizer, criterion)
    scheduler.step(train_loss)

    val_loss, avg_val_loss = evaluate(trans_model, val_loader, criterion)
    avg_train_loss = avg_train_loss.item()
    avg_val_loss = avg_val_loss.item()
    print("Training Loss: %.4f. Validation Loss: %.4f. " % (avg_train_loss, avg_val_loss))
    print("Training Perplexity: %.4f. Validation Perplexity: %.4f. " % (np.exp(avg_train_loss), np.exp(avg_val_loss)))

    if avg_val_loss < best_avg_val_loss:
        best_avg_val_loss = avg_val_loss
        torch.save(trans_model.state_dict(), 'en_ro.pt')
        print('Best Model Saved')

-----------------------------------
Epoch 1
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]



  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 6.3294. Validation Loss: 5.1448. 
Training Perplexity: 560.8393. Validation Perplexity: 171.5452. 
Best Model Saved
-----------------------------------
Epoch 2
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 5.4818. Validation Loss: 5.0411. 
Training Perplexity: 240.2684. Validation Perplexity: 154.6373. 
Best Model Saved
-----------------------------------
Epoch 3
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 5.2412. Validation Loss: 4.9325. 
Training Perplexity: 188.8956. Validation Perplexity: 138.7287. 
Best Model Saved
-----------------------------------
Epoch 4
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.9912. Validation Loss: 4.8188. 
Training Perplexity: 147.1119. Validation Perplexity: 123.8215. 
Best Model Saved
-----------------------------------
Epoch 5
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.7313. Validation Loss: 4.8636. 
Training Perplexity: 113.4393. Validation Perplexity: 129.4906. 
-----------------------------------
Epoch 6
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.4602. Validation Loss: 4.7377. 
Training Perplexity: 86.5046. Validation Perplexity: 114.1733. 
Best Model Saved
-----------------------------------
Epoch 7
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.2039. Validation Loss: 4.7403. 
Training Perplexity: 66.9489. Validation Perplexity: 114.4674. 
-----------------------------------
Epoch 8
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.9582. Validation Loss: 4.7206. 
Training Perplexity: 52.3608. Validation Perplexity: 112.2347. 
Best Model Saved
-----------------------------------
Epoch 9
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.7377. Validation Loss: 4.7994. 
Training Perplexity: 42.0029. Validation Perplexity: 121.4348. 
-----------------------------------
Epoch 10
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.5332. Validation Loss: 4.8081. 
Training Perplexity: 34.2334. Validation Perplexity: 122.4937. 
-----------------------------------
Epoch 11
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.3366. Validation Loss: 4.7575. 
Training Perplexity: 28.1229. Validation Perplexity: 116.4544. 
-----------------------------------
Epoch 12
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.1715. Validation Loss: 4.8655. 
Training Perplexity: 23.8423. Validation Perplexity: 129.7311. 
-----------------------------------
Epoch 13
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.9719. Validation Loss: 4.8649. 
Training Perplexity: 19.5287. Validation Perplexity: 129.6574. 
-----------------------------------
Epoch 14
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.7656. Validation Loss: 4.9214. 
Training Perplexity: 15.8884. Validation Perplexity: 137.2012. 
-----------------------------------
Epoch 15
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.6214. Validation Loss: 5.0989. 
Training Perplexity: 13.7550. Validation Perplexity: 163.8413. 
-----------------------------------
Epoch 16
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.4926. Validation Loss: 4.9682. 
Training Perplexity: 12.0933. Validation Perplexity: 143.7731. 
-----------------------------------
Epoch 17
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.3636. Validation Loss: 5.0725. 
Training Perplexity: 10.6291. Validation Perplexity: 159.5801. 
-----------------------------------
Epoch 18
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.2390. Validation Loss: 5.0871. 
Training Perplexity: 9.3836. Validation Perplexity: 161.9193. 
-----------------------------------
Epoch 19
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.1688. Validation Loss: 5.1670. 
Training Perplexity: 8.7475. Validation Perplexity: 175.3949. 
-----------------------------------
Epoch 20
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.9693. Validation Loss: 5.2146. 
Training Perplexity: 7.1656. Validation Perplexity: 183.9296. 
-----------------------------------
Epoch 21
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.8608. Validation Loss: 5.2788. 
Training Perplexity: 6.4289. Validation Perplexity: 196.1285. 
-----------------------------------
Epoch 22
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.7589. Validation Loss: 5.4169. 
Training Perplexity: 5.8062. Validation Perplexity: 225.1873. 
-----------------------------------
Epoch 23
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.6607. Validation Loss: 5.4616. 
Training Perplexity: 5.2630. Validation Perplexity: 235.4764. 
-----------------------------------
Epoch 24
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.5709. Validation Loss: 5.5329. 
Training Perplexity: 4.8110. Validation Perplexity: 252.8869. 
-----------------------------------
Epoch 25
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.4768. Validation Loss: 5.6652. 
Training Perplexity: 4.3791. Validation Perplexity: 288.6471. 
-----------------------------------
Epoch 26
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.4092. Validation Loss: 5.6408. 
Training Perplexity: 4.0928. Validation Perplexity: 281.6858. 
-----------------------------------
Epoch 27
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.3197. Validation Loss: 5.7284. 
Training Perplexity: 3.7424. Validation Perplexity: 307.4762. 
-----------------------------------
Epoch 28
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.2487. Validation Loss: 5.8631. 
Training Perplexity: 3.4859. Validation Perplexity: 351.8134. 
-----------------------------------
Epoch 29
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.1558. Validation Loss: 6.0547. 
Training Perplexity: 3.1765. Validation Perplexity: 426.1297. 
-----------------------------------
Epoch 30
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.0851. Validation Loss: 6.0434. 
Training Perplexity: 2.9596. Validation Perplexity: 421.3349. 
-----------------------------------
Epoch 31
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.0132. Validation Loss: 6.2311. 
Training Perplexity: 2.7543. Validation Perplexity: 508.2986. 
-----------------------------------
Epoch 32
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.9466. Validation Loss: 6.1360. 
Training Perplexity: 2.5769. Validation Perplexity: 462.2060. 
-----------------------------------
Epoch 33
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.8834. Validation Loss: 6.3617. 
Training Perplexity: 2.4191. Validation Perplexity: 579.2518. 
-----------------------------------
Epoch 34
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.8526. Validation Loss: 6.4666. 
Training Perplexity: 2.3458. Validation Perplexity: 643.2621. 
-----------------------------------
Epoch 35
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.8079. Validation Loss: 6.4398. 
Training Perplexity: 2.2433. Validation Perplexity: 626.2742. 
-----------------------------------
Epoch 36
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.7365. Validation Loss: 6.6477. 
Training Perplexity: 2.0886. Validation Perplexity: 770.9978. 
-----------------------------------
Epoch 37
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.6298. Validation Loss: 6.7586. 
Training Perplexity: 1.8773. Validation Perplexity: 861.4224. 
-----------------------------------
Epoch 38
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.5725. Validation Loss: 7.0998. 
Training Perplexity: 1.7726. Validation Perplexity: 1211.7670. 
-----------------------------------
Epoch 39
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.5348. Validation Loss: 7.0344. 
Training Perplexity: 1.7071. Validation Perplexity: 1134.9872. 
-----------------------------------
Epoch 40
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.5251. Validation Loss: 7.2262. 
Training Perplexity: 1.6907. Validation Perplexity: 1374.9405. 
-----------------------------------
Epoch 41
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.4992. Validation Loss: 7.3300. 
Training Perplexity: 1.6474. Validation Perplexity: 1525.4151. 
-----------------------------------
Epoch 42
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.4602. Validation Loss: 7.4751. 
Training Perplexity: 1.5843. Validation Perplexity: 1763.5495. 
-----------------------------------
Epoch 43
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.4162. Validation Loss: 7.5383. 
Training Perplexity: 1.5162. Validation Perplexity: 1878.6955. 
-----------------------------------
Epoch 44
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3834. Validation Loss: 7.7078. 
Training Perplexity: 1.4673. Validation Perplexity: 2225.5628. 
-----------------------------------
Epoch 45
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3412. Validation Loss: 7.7282. 
Training Perplexity: 1.4067. Validation Perplexity: 2271.5422. 
-----------------------------------
Epoch 46
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3059. Validation Loss: 7.9956. 
Training Perplexity: 1.3578. Validation Perplexity: 2967.8727. 
-----------------------------------
Epoch 47
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2806. Validation Loss: 8.0212. 
Training Perplexity: 1.3239. Validation Perplexity: 3044.7830. 
-----------------------------------
Epoch 48
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2450. Validation Loss: 8.1517. 
Training Perplexity: 1.2777. Validation Perplexity: 3469.1131. 
-----------------------------------
Epoch 49
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2575. Validation Loss: 8.2715. 
Training Perplexity: 1.2937. Validation Perplexity: 3910.8392. 
-----------------------------------
Epoch 50
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2330. Validation Loss: 8.4004. 
Training Perplexity: 1.2623. Validation Perplexity: 4448.7703. 
-----------------------------------
Epoch 51
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2211. Validation Loss: 8.5558. 
Training Perplexity: 1.2474. Validation Perplexity: 5196.6623. 
-----------------------------------
Epoch 52
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2071. Validation Loss: 8.4978. 
Training Perplexity: 1.2301. Validation Perplexity: 4903.9583. 
-----------------------------------
Epoch 53
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2067. Validation Loss: 8.6265. 
Training Perplexity: 1.2296. Validation Perplexity: 5577.2469. 
-----------------------------------
Epoch 54
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1972. Validation Loss: 8.7075. 
Training Perplexity: 1.2180. Validation Perplexity: 6047.8811. 
-----------------------------------
Epoch 55
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1718. Validation Loss: 8.9810. 
Training Perplexity: 1.1874. Validation Perplexity: 7950.2523. 
-----------------------------------
Epoch 56
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1696. Validation Loss: 8.9530. 
Training Perplexity: 1.1848. Validation Perplexity: 7730.9328. 
-----------------------------------
Epoch 57
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1635. Validation Loss: 9.1856. 
Training Perplexity: 1.1776. Validation Perplexity: 9755.5041. 
-----------------------------------
Epoch 58
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1502. Validation Loss: 9.3434. 
Training Perplexity: 1.1620. Validation Perplexity: 11422.7018. 
-----------------------------------
Epoch 59
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1570. Validation Loss: 9.3120. 
Training Perplexity: 1.1700. Validation Perplexity: 11069.9530. 
-----------------------------------
Epoch 60
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1531. Validation Loss: 9.5330. 
Training Perplexity: 1.1655. Validation Perplexity: 13808.1240. 
-----------------------------------
Epoch 61
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1396. Validation Loss: 9.5185. 
Training Perplexity: 1.1499. Validation Perplexity: 13609.1477. 
-----------------------------------
Epoch 62
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1335. Validation Loss: 9.6027. 
Training Perplexity: 1.1428. Validation Perplexity: 14805.0144. 
-----------------------------------
Epoch 63
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1339. Validation Loss: 9.6222. 
Training Perplexity: 1.1433. Validation Perplexity: 15096.7868. 
-----------------------------------
Epoch 64
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1317. Validation Loss: 9.7290. 
Training Perplexity: 1.1408. Validation Perplexity: 16798.1642. 
-----------------------------------
Epoch 65
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1353. Validation Loss: 9.8066. 
Training Perplexity: 1.1449. Validation Perplexity: 18153.1197. 
-----------------------------------
Epoch 66
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1261. Validation Loss: 9.9172. 
Training Perplexity: 1.1344. Validation Perplexity: 20275.7723. 
-----------------------------------
Epoch 67
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1240. Validation Loss: 9.9088. 
Training Perplexity: 1.1320. Validation Perplexity: 20106.2657. 
-----------------------------------
Epoch 68
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1179. Validation Loss: 9.9565. 
Training Perplexity: 1.1251. Validation Perplexity: 21088.2532. 
-----------------------------------
Epoch 69
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

KeyboardInterrupt: 