In [1]:
import numpy as np
import csv
import torch

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/data/rgur/courses/cs_7643_deep_learning/hw4/')

# Just run this block. Please do not modify the following code.
import math
import time

# Pytorch package
import torch
import torch.nn as nn
import torch.optim as optim

# Torchtest package
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator, Example, TabularDataset, interleave_keys

# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm

# Code provide to you for training and evaluation
from hw4_code.utils import train, evaluate, set_seed_nb, unit_test_values

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("You are using device: %s" % device)

You are using device: cuda


# Ro -> En

In [2]:
# You don't need to modify any code in this block

# Define the maximum length of the sentence. Shorter sentences will be padded to that length and longer sentences will be croped. Given that the average length of the sentence in the corpus is around 13, we can set it to 20
MAX_LEN = 20

# Define the source and target language
SRC = Field(tokenize = "spacy",
            tokenizer_language="ro_core_news_sm",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

TRG = Field(tokenize = "spacy",
            tokenizer_language="en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)



In [3]:
train_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/train.roen.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG)})

val_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/dev.roen.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG)})



In [4]:
# Define Batchsize
BATCH_SIZE = 128

# Build the vocabulary associated with each language
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

# Get the padding index to be ignored later in loss calculation
PAD_IDX = TRG.vocab.stoi['<pad>']

In [5]:
# Get data-loaders using BucketIterator
train_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    train_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

val_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    val_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

# Get the input and the output sizes for model
input_size = len(SRC.vocab)
output_size = len(TRG.vocab)



In [6]:
input_size, output_size

(9549, 6477)

In [7]:
from hw4_code.models.Transformer import TransformerTranslator

In [8]:
# Hyperparameters
learning_rate = .001
EPOCHS = 250

# Model
trans_model = TransformerTranslator(input_size, output_size, device, max_length = MAX_LEN).to(device)

# optimizer = optim.Adam(model.parameters(), lr = learning_rate)
optimizer = torch.optim.Adam(trans_model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [9]:
best_avg_val_loss = np.inf
for epoch_idx in range(EPOCHS):
    print("-----------------------------------")
    print("Epoch %d" % (epoch_idx+1))
    print("-----------------------------------")
    
    train_loss, avg_train_loss = train(trans_model, train_loader, optimizer, criterion)
    scheduler.step(train_loss)

    val_loss, avg_val_loss = evaluate(trans_model, val_loader, criterion)
    avg_train_loss = avg_train_loss.item()
    avg_val_loss = avg_val_loss.item()
    print("Training Loss: %.4f. Validation Loss: %.4f. " % (avg_train_loss, avg_val_loss))
    print("Training Perplexity: %.4f. Validation Perplexity: %.4f. " % (np.exp(avg_train_loss), np.exp(avg_val_loss)))

    if avg_val_loss < best_avg_val_loss:
        best_avg_val_loss = avg_val_loss
        torch.save(trans_model.state_dict(), 'ro_en.pt')
        print('Best Model Saved')

-----------------------------------
Epoch 1
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]



  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 6.1094. Validation Loss: 5.1847. 
Training Perplexity: 450.0550. Validation Perplexity: 178.5206. 
Best Model Saved
-----------------------------------
Epoch 2
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 5.3185. Validation Loss: 5.0626. 
Training Perplexity: 204.0871. Validation Perplexity: 157.9966. 
Best Model Saved
-----------------------------------
Epoch 3
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 5.1032. Validation Loss: 4.9885. 
Training Perplexity: 164.5528. Validation Perplexity: 146.7177. 
Best Model Saved
-----------------------------------
Epoch 4
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.8874. Validation Loss: 4.9035. 
Training Perplexity: 132.6148. Validation Perplexity: 134.7553. 
Best Model Saved
-----------------------------------
Epoch 5
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.6598. Validation Loss: 4.8588. 
Training Perplexity: 105.6195. Validation Perplexity: 128.8713. 
Best Model Saved
-----------------------------------
Epoch 6
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.4275. Validation Loss: 4.8292. 
Training Perplexity: 83.7210. Validation Perplexity: 125.1090. 
Best Model Saved
-----------------------------------
Epoch 7
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.2001. Validation Loss: 4.8558. 
Training Perplexity: 66.6950. Validation Perplexity: 128.4819. 
-----------------------------------
Epoch 8
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.9884. Validation Loss: 4.8006. 
Training Perplexity: 53.9685. Validation Perplexity: 121.5862. 
Best Model Saved
-----------------------------------
Epoch 9
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.7809. Validation Loss: 4.8829. 
Training Perplexity: 43.8559. Validation Perplexity: 132.0130. 
-----------------------------------
Epoch 10
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.5819. Validation Loss: 4.8501. 
Training Perplexity: 35.9408. Validation Perplexity: 127.7569. 
-----------------------------------
Epoch 11
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.4039. Validation Loss: 4.8925. 
Training Perplexity: 30.0812. Validation Perplexity: 133.2927. 
-----------------------------------
Epoch 12
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.2519. Validation Loss: 4.9338. 
Training Perplexity: 25.8400. Validation Perplexity: 138.9046. 
-----------------------------------
Epoch 13
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.0822. Validation Loss: 4.9455. 
Training Perplexity: 21.8071. Validation Perplexity: 140.5364. 
-----------------------------------
Epoch 14
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.9050. Validation Loss: 5.0513. 
Training Perplexity: 18.2657. Validation Perplexity: 156.2230. 
-----------------------------------
Epoch 15
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.7604. Validation Loss: 5.1188. 
Training Perplexity: 15.8066. Validation Perplexity: 167.1293. 
-----------------------------------
Epoch 16
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.6090. Validation Loss: 5.1467. 
Training Perplexity: 13.5854. Validation Perplexity: 171.8656. 
-----------------------------------
Epoch 17
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.4284. Validation Loss: 5.2224. 
Training Perplexity: 11.3406. Validation Perplexity: 185.3865. 
-----------------------------------
Epoch 18
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.3364. Validation Loss: 5.3329. 
Training Perplexity: 10.3442. Validation Perplexity: 207.0322. 
-----------------------------------
Epoch 19
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.1738. Validation Loss: 5.3445. 
Training Perplexity: 8.7916. Validation Perplexity: 209.4604. 
-----------------------------------
Epoch 20
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.0640. Validation Loss: 5.4773. 
Training Perplexity: 7.8771. Validation Perplexity: 239.2064. 
-----------------------------------
Epoch 21
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.9722. Validation Loss: 5.5480. 
Training Perplexity: 7.1866. Validation Perplexity: 256.7128. 
-----------------------------------
Epoch 22
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.8719. Validation Loss: 5.6652. 
Training Perplexity: 6.5004. Validation Perplexity: 288.6582. 
-----------------------------------
Epoch 23
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.7797. Validation Loss: 5.6382. 
Training Perplexity: 5.9281. Validation Perplexity: 280.9551. 
-----------------------------------
Epoch 24
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.6858. Validation Loss: 5.5888. 
Training Perplexity: 5.3966. Validation Perplexity: 267.4064. 
-----------------------------------
Epoch 25
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.5872. Validation Loss: 5.8548. 
Training Perplexity: 4.8899. Validation Perplexity: 348.9138. 
-----------------------------------
Epoch 26
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.5049. Validation Loss: 5.9487. 
Training Perplexity: 4.5036. Validation Perplexity: 383.2544. 
-----------------------------------
Epoch 27
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.4339. Validation Loss: 6.0053. 
Training Perplexity: 4.1948. Validation Perplexity: 405.5680. 
-----------------------------------
Epoch 28
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.3195. Validation Loss: 6.0241. 
Training Perplexity: 3.7417. Validation Perplexity: 413.2543. 
-----------------------------------
Epoch 29
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.2473. Validation Loss: 6.2751. 
Training Perplexity: 3.4810. Validation Perplexity: 531.1943. 
-----------------------------------
Epoch 30
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.1680. Validation Loss: 6.3137. 
Training Perplexity: 3.2156. Validation Perplexity: 552.0698. 
-----------------------------------
Epoch 31
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.0860. Validation Loss: 6.3148. 
Training Perplexity: 2.9623. Validation Perplexity: 552.6737. 
-----------------------------------
Epoch 32
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.0207. Validation Loss: 6.4479. 
Training Perplexity: 2.7751. Validation Perplexity: 631.3788. 
-----------------------------------
Epoch 33
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.9768. Validation Loss: 6.5113. 
Training Perplexity: 2.6559. Validation Perplexity: 672.6889. 
-----------------------------------
Epoch 34
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.8730. Validation Loss: 6.6660. 
Training Perplexity: 2.3941. Validation Perplexity: 785.2419. 
-----------------------------------
Epoch 35
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.8146. Validation Loss: 6.7563. 
Training Perplexity: 2.2582. Validation Perplexity: 859.4481. 
-----------------------------------
Epoch 36
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.7732. Validation Loss: 6.8393. 
Training Perplexity: 2.1666. Validation Perplexity: 933.8158. 
-----------------------------------
Epoch 37
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.7170. Validation Loss: 7.1590. 
Training Perplexity: 2.0483. Validation Perplexity: 1285.5853. 
-----------------------------------
Epoch 38
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.6693. Validation Loss: 7.1932. 
Training Perplexity: 1.9530. Validation Perplexity: 1330.3841. 
-----------------------------------
Epoch 39
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.6114. Validation Loss: 7.2844. 
Training Perplexity: 1.8430. Validation Perplexity: 1457.4198. 
-----------------------------------
Epoch 40
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.5635. Validation Loss: 7.3785. 
Training Perplexity: 1.7568. Validation Perplexity: 1601.1778. 
-----------------------------------
Epoch 41
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.4792. Validation Loss: 7.5516. 
Training Perplexity: 1.6148. Validation Perplexity: 1903.7718. 
-----------------------------------
Epoch 42
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.4642. Validation Loss: 7.6629. 
Training Perplexity: 1.5907. Validation Perplexity: 2127.9772. 
-----------------------------------
Epoch 43
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3942. Validation Loss: 7.8501. 
Training Perplexity: 1.4832. Validation Perplexity: 2566.1183. 
-----------------------------------
Epoch 44
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3807. Validation Loss: 8.0324. 
Training Perplexity: 1.4634. Validation Perplexity: 3079.1084. 
-----------------------------------
Epoch 45
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3434. Validation Loss: 8.1687. 
Training Perplexity: 1.4097. Validation Perplexity: 3528.8587. 
-----------------------------------
Epoch 46
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.3245. Validation Loss: 8.4166. 
Training Perplexity: 1.3834. Validation Perplexity: 4521.4966. 
-----------------------------------
Epoch 47
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2673. Validation Loss: 8.2931. 
Training Perplexity: 1.3065. Validation Perplexity: 3996.1399. 
-----------------------------------
Epoch 48
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2546. Validation Loss: 8.4336. 
Training Perplexity: 1.2900. Validation Perplexity: 4598.9805. 
-----------------------------------
Epoch 49
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2193. Validation Loss: 8.6043. 
Training Perplexity: 1.2452. Validation Perplexity: 5454.8815. 
-----------------------------------
Epoch 50
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2285. Validation Loss: 8.6995. 
Training Perplexity: 1.2568. Validation Perplexity: 5999.7461. 
-----------------------------------
Epoch 51
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.2092. Validation Loss: 8.8340. 
Training Perplexity: 1.2327. Validation Perplexity: 6863.9853. 
-----------------------------------
Epoch 52
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1991. Validation Loss: 8.9846. 
Training Perplexity: 1.2203. Validation Perplexity: 7979.2377. 
-----------------------------------
Epoch 53
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1768. Validation Loss: 9.1110. 
Training Perplexity: 1.1934. Validation Perplexity: 9054.0517. 
-----------------------------------
Epoch 54
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1672. Validation Loss: 9.2934. 
Training Perplexity: 1.1820. Validation Perplexity: 10866.3645. 
-----------------------------------
Epoch 55
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1627. Validation Loss: 9.2530. 
Training Perplexity: 1.1766. Validation Perplexity: 10435.4608. 
-----------------------------------
Epoch 56
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1538. Validation Loss: 9.5527. 
Training Perplexity: 1.1663. Validation Perplexity: 14083.0432. 
-----------------------------------
Epoch 57
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1430. Validation Loss: 9.6493. 
Training Perplexity: 1.1537. Validation Perplexity: 15511.1870. 
-----------------------------------
Epoch 58
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1341. Validation Loss: 9.6033. 
Training Perplexity: 1.1435. Validation Perplexity: 14813.8274. 
-----------------------------------
Epoch 59
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1290. Validation Loss: 9.8164. 
Training Perplexity: 1.1377. Validation Perplexity: 18332.5934. 
-----------------------------------
Epoch 60
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1193. Validation Loss: 9.9650. 
Training Perplexity: 1.1267. Validation Perplexity: 21267.9855. 
-----------------------------------
Epoch 61
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1108. Validation Loss: 10.1291. 
Training Perplexity: 1.1171. Validation Perplexity: 25061.4602. 
-----------------------------------
Epoch 62
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1119. Validation Loss: 10.3628. 
Training Perplexity: 1.1184. Validation Perplexity: 31660.8104. 
-----------------------------------
Epoch 63
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1068. Validation Loss: 10.3448. 
Training Perplexity: 1.1127. Validation Perplexity: 31096.1734. 
-----------------------------------
Epoch 64
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1024. Validation Loss: 10.3084. 
Training Perplexity: 1.1078. Validation Perplexity: 29982.2570. 
-----------------------------------
Epoch 65
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.1005. Validation Loss: 10.5104. 
Training Perplexity: 1.1057. Validation Perplexity: 36696.2679. 
-----------------------------------
Epoch 66
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0944. Validation Loss: 10.4540. 
Training Perplexity: 1.0989. Validation Perplexity: 34682.3492. 
-----------------------------------
Epoch 67
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0890. Validation Loss: 10.7195. 
Training Perplexity: 1.0931. Validation Perplexity: 45227.2367. 
-----------------------------------
Epoch 68
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0916. Validation Loss: 10.7579. 
Training Perplexity: 1.0959. Validation Perplexity: 46997.9485. 
-----------------------------------
Epoch 69
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0878. Validation Loss: 10.7162. 
Training Perplexity: 1.0917. Validation Perplexity: 45081.1691. 
-----------------------------------
Epoch 70
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0828. Validation Loss: 10.8311. 
Training Perplexity: 1.0863. Validation Perplexity: 50568.4096. 
-----------------------------------
Epoch 71
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0831. Validation Loss: 11.0396. 
Training Perplexity: 1.0866. Validation Perplexity: 62295.3669. 
-----------------------------------
Epoch 72
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0790. Validation Loss: 11.0271. 
Training Perplexity: 1.0822. Validation Perplexity: 61520.0664. 
-----------------------------------
Epoch 73
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0766. Validation Loss: 11.1171. 
Training Perplexity: 1.0797. Validation Perplexity: 67311.6306. 
-----------------------------------
Epoch 74
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0776. Validation Loss: 10.9618. 
Training Perplexity: 1.0807. Validation Perplexity: 57628.5232. 
-----------------------------------
Epoch 75
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0746. Validation Loss: 11.2948. 
Training Perplexity: 1.0774. Validation Perplexity: 80400.8299. 
-----------------------------------
Epoch 76
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0719. Validation Loss: 11.4180. 
Training Perplexity: 1.0745. Validation Perplexity: 90939.6687. 
-----------------------------------
Epoch 77
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0722. Validation Loss: 11.5092. 
Training Perplexity: 1.0749. Validation Perplexity: 99624.7358. 
-----------------------------------
Epoch 78
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0629. Validation Loss: 11.6560. 
Training Perplexity: 1.0649. Validation Perplexity: 115380.6053. 
-----------------------------------
Epoch 79
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0688. Validation Loss: 11.6288. 
Training Perplexity: 1.0712. Validation Perplexity: 112283.7221. 
-----------------------------------
Epoch 80
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0636. Validation Loss: 11.6694. 
Training Perplexity: 1.0657. Validation Perplexity: 116943.9228. 
-----------------------------------
Epoch 81
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0641. Validation Loss: 11.6998. 
Training Perplexity: 1.0662. Validation Perplexity: 120549.0419. 
-----------------------------------
Epoch 82
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0656. Validation Loss: 11.8360. 
Training Perplexity: 1.0678. Validation Perplexity: 138138.9997. 
-----------------------------------
Epoch 83
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0637. Validation Loss: 11.6121. 
Training Perplexity: 1.0657. Validation Perplexity: 110426.1785. 
-----------------------------------
Epoch 84
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0632. Validation Loss: 12.0082. 
Training Perplexity: 1.0652. Validation Perplexity: 164099.5118. 
-----------------------------------
Epoch 85
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0585. Validation Loss: 11.8211. 
Training Perplexity: 1.0603. Validation Perplexity: 136094.0117. 
-----------------------------------
Epoch 86
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0591. Validation Loss: 12.1832. 
Training Perplexity: 1.0609. Validation Perplexity: 195476.9943. 
-----------------------------------
Epoch 87
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0604. Validation Loss: 12.2494. 
Training Perplexity: 1.0622. Validation Perplexity: 208859.9504. 
-----------------------------------
Epoch 88
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0587. Validation Loss: 12.3432. 
Training Perplexity: 1.0605. Validation Perplexity: 229391.4989. 
-----------------------------------
Epoch 89
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0546. Validation Loss: 12.2881. 
Training Perplexity: 1.0561. Validation Perplexity: 217094.8757. 
-----------------------------------
Epoch 90
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0549. Validation Loss: 12.2197. 
Training Perplexity: 1.0564. Validation Perplexity: 202738.6838. 
-----------------------------------
Epoch 91
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0527. Validation Loss: 12.5092. 
Training Perplexity: 1.0541. Validation Perplexity: 270816.6317. 
-----------------------------------
Epoch 92
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0539. Validation Loss: 12.5437. 
Training Perplexity: 1.0554. Validation Perplexity: 280333.5727. 
-----------------------------------
Epoch 93
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0496. Validation Loss: 12.7060. 
Training Perplexity: 1.0508. Validation Perplexity: 329716.0065. 
-----------------------------------
Epoch 94
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0504. Validation Loss: 12.4730. 
Training Perplexity: 1.0517. Validation Perplexity: 261194.4828. 
-----------------------------------
Epoch 95
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0550. Validation Loss: 12.7731. 
Training Perplexity: 1.0566. Validation Perplexity: 352612.3927. 
-----------------------------------
Epoch 96
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0493. Validation Loss: 12.6987. 
Training Perplexity: 1.0505. Validation Perplexity: 327335.8257. 
-----------------------------------
Epoch 97
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0484. Validation Loss: 12.7328. 
Training Perplexity: 1.0495. Validation Perplexity: 338691.6232. 
-----------------------------------
Epoch 98
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0470. Validation Loss: 13.1955. 
Training Perplexity: 1.0481. Validation Perplexity: 537965.1237. 
-----------------------------------
Epoch 99
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0483. Validation Loss: 12.9910. 
Training Perplexity: 1.0495. Validation Perplexity: 438428.2880. 
-----------------------------------
Epoch 100
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0460. Validation Loss: 12.9686. 
Training Perplexity: 1.0470. Validation Perplexity: 428734.7025. 
-----------------------------------
Epoch 101
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0460. Validation Loss: 12.9714. 
Training Perplexity: 1.0470. Validation Perplexity: 429951.5975. 
-----------------------------------
Epoch 102
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0472. Validation Loss: 12.9526. 
Training Perplexity: 1.0484. Validation Perplexity: 421945.3139. 
-----------------------------------
Epoch 103
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0435. Validation Loss: 13.0201. 
Training Perplexity: 1.0444. Validation Perplexity: 451398.7236. 
-----------------------------------
Epoch 104
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0430. Validation Loss: 13.1423. 
Training Perplexity: 1.0439. Validation Perplexity: 510087.1362. 
-----------------------------------
Epoch 105
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0437. Validation Loss: 13.2000. 
Training Perplexity: 1.0446. Validation Perplexity: 540381.8404. 
-----------------------------------
Epoch 106
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0449. Validation Loss: 13.2776. 
Training Perplexity: 1.0460. Validation Perplexity: 583963.9710. 
-----------------------------------
Epoch 107
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0435. Validation Loss: 13.1425. 
Training Perplexity: 1.0445. Validation Perplexity: 510145.5144. 
-----------------------------------
Epoch 108
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0411. Validation Loss: 13.2131. 
Training Perplexity: 1.0420. Validation Perplexity: 547499.9227. 
-----------------------------------
Epoch 109
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0415. Validation Loss: 13.4584. 
Training Perplexity: 1.0423. Validation Perplexity: 699721.6693. 
-----------------------------------
Epoch 110
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0434. Validation Loss: 13.3233. 
Training Perplexity: 1.0443. Validation Perplexity: 611255.7934. 
-----------------------------------
Epoch 111
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0396. Validation Loss: 13.6693. 
Training Perplexity: 1.0404. Validation Perplexity: 863987.3710. 
-----------------------------------
Epoch 112
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0397. Validation Loss: 13.3926. 
Training Perplexity: 1.0405. Validation Perplexity: 655160.3838. 
-----------------------------------
Epoch 113
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0416. Validation Loss: 13.7732. 
Training Perplexity: 1.0425. Validation Perplexity: 958549.2219. 
-----------------------------------
Epoch 114
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0396. Validation Loss: 13.7132. 
Training Perplexity: 1.0404. Validation Perplexity: 902750.5269. 
-----------------------------------
Epoch 115
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0405. Validation Loss: 13.6757. 
Training Perplexity: 1.0413. Validation Perplexity: 869533.8874. 
-----------------------------------
Epoch 116
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0376. Validation Loss: 13.7417. 
Training Perplexity: 1.0384. Validation Perplexity: 928893.8638. 
-----------------------------------
Epoch 117
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0372. Validation Loss: 13.8891. 
Training Perplexity: 1.0379. Validation Perplexity: 1076322.7994. 
-----------------------------------
Epoch 118
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0391. Validation Loss: 13.8844. 
Training Perplexity: 1.0398. Validation Perplexity: 1071325.3057. 
-----------------------------------
Epoch 119
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0372. Validation Loss: 14.0481. 
Training Perplexity: 1.0379. Validation Perplexity: 1261822.9847. 
-----------------------------------
Epoch 120
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0354. Validation Loss: 14.1411. 
Training Perplexity: 1.0361. Validation Perplexity: 1384824.1435. 
-----------------------------------
Epoch 121
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0362. Validation Loss: 14.0027. 
Training Perplexity: 1.0369. Validation Perplexity: 1205818.7255. 
-----------------------------------
Epoch 122
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0353. Validation Loss: 14.0109. 
Training Perplexity: 1.0359. Validation Perplexity: 1215816.2825. 
-----------------------------------
Epoch 123
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0372. Validation Loss: 13.8237. 
Training Perplexity: 1.0379. Validation Perplexity: 1008240.3255. 
-----------------------------------
Epoch 124
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0364. Validation Loss: 14.1242. 
Training Perplexity: 1.0371. Validation Perplexity: 1361580.8221. 
-----------------------------------
Epoch 125
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0367. Validation Loss: 13.9724. 
Training Perplexity: 1.0374. Validation Perplexity: 1169865.9313. 
-----------------------------------
Epoch 126
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0349. Validation Loss: 14.1030. 
Training Perplexity: 1.0356. Validation Perplexity: 1333044.3179. 
-----------------------------------
Epoch 127
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0366. Validation Loss: 14.2404. 
Training Perplexity: 1.0372. Validation Perplexity: 1529446.5972. 
-----------------------------------
Epoch 128
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0311. Validation Loss: 14.2661. 
Training Perplexity: 1.0316. Validation Perplexity: 1569215.8255. 
-----------------------------------
Epoch 129
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0348. Validation Loss: 14.4647. 
Training Perplexity: 1.0354. Validation Perplexity: 1914064.9785. 
-----------------------------------
Epoch 130
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0345. Validation Loss: 14.5431. 
Training Perplexity: 1.0351. Validation Perplexity: 2070029.9923. 
-----------------------------------
Epoch 131
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0327. Validation Loss: 14.3110. 
Training Perplexity: 1.0333. Validation Perplexity: 1641288.6294. 
-----------------------------------
Epoch 132
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0334. Validation Loss: 14.5871. 
Training Perplexity: 1.0340. Validation Perplexity: 2163137.9176. 
-----------------------------------
Epoch 133
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0311. Validation Loss: 14.4041. 
Training Perplexity: 1.0316. Validation Perplexity: 1801525.3616. 
-----------------------------------
Epoch 134
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0339. Validation Loss: 14.3700. 
Training Perplexity: 1.0345. Validation Perplexity: 1741106.4446. 
-----------------------------------
Epoch 135
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0311. Validation Loss: 14.4734. 
Training Perplexity: 1.0316. Validation Perplexity: 1930628.6765. 
-----------------------------------
Epoch 136
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0329. Validation Loss: 14.7264. 
Training Perplexity: 1.0335. Validation Perplexity: 2486647.9377. 
-----------------------------------
Epoch 137
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0332. Validation Loss: 14.7947. 
Training Perplexity: 1.0337. Validation Perplexity: 2662301.6171. 
-----------------------------------
Epoch 138
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0318. Validation Loss: 14.4685. 
Training Perplexity: 1.0323. Validation Perplexity: 1921276.0590. 
-----------------------------------
Epoch 139
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0310. Validation Loss: 14.5894. 
Training Perplexity: 1.0315. Validation Perplexity: 2168131.8358. 
-----------------------------------
Epoch 140
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0311. Validation Loss: 14.6321. 
Training Perplexity: 1.0316. Validation Perplexity: 2262678.9627. 
-----------------------------------
Epoch 141
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0301. Validation Loss: 14.6008. 
Training Perplexity: 1.0306. Validation Perplexity: 2192984.1752. 
-----------------------------------
Epoch 142
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0304. Validation Loss: 14.5896. 
Training Perplexity: 1.0309. Validation Perplexity: 2168576.4351. 
-----------------------------------
Epoch 143
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0296. Validation Loss: 14.8812. 
Training Perplexity: 1.0301. Validation Perplexity: 2902864.0754. 
-----------------------------------
Epoch 144
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0295. Validation Loss: 14.9381. 
Training Perplexity: 1.0300. Validation Perplexity: 3072729.9930. 
-----------------------------------
Epoch 145
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0281. Validation Loss: 14.8683. 
Training Perplexity: 1.0285. Validation Perplexity: 2865714.0079. 
-----------------------------------
Epoch 146
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0293. Validation Loss: 14.7288. 
Training Perplexity: 1.0297. Validation Perplexity: 2492545.6080. 
-----------------------------------
Epoch 147
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0293. Validation Loss: 15.1915. 
Training Perplexity: 1.0297. Validation Perplexity: 3959051.2408. 
-----------------------------------
Epoch 148
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0298. Validation Loss: 15.1603. 
Training Perplexity: 1.0303. Validation Perplexity: 3837492.8924. 
-----------------------------------
Epoch 149
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0299. Validation Loss: 15.2646. 
Training Perplexity: 1.0303. Validation Perplexity: 4259339.2290. 
-----------------------------------
Epoch 150
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0271. Validation Loss: 15.2408. 
Training Perplexity: 1.0275. Validation Perplexity: 4158950.0185. 
-----------------------------------
Epoch 151
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0300. Validation Loss: 14.7886. 
Training Perplexity: 1.0305. Validation Perplexity: 2646033.5720. 
-----------------------------------
Epoch 152
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0279. Validation Loss: 15.1677. 
Training Perplexity: 1.0283. Validation Perplexity: 3865816.9981. 
-----------------------------------
Epoch 153
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0283. Validation Loss: 15.3494. 
Training Perplexity: 1.0287. Validation Perplexity: 4636149.8140. 
-----------------------------------
Epoch 154
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0278. Validation Loss: 15.2759. 
Training Perplexity: 1.0282. Validation Perplexity: 4307673.2631. 
-----------------------------------
Epoch 155
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0284. Validation Loss: 15.2352. 
Training Perplexity: 1.0288. Validation Perplexity: 4135953.8563. 
-----------------------------------
Epoch 156
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0276. Validation Loss: 15.3606. 
Training Perplexity: 1.0279. Validation Perplexity: 4688245.6108. 
-----------------------------------
Epoch 157
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0274. Validation Loss: 15.3037. 
Training Perplexity: 1.0278. Validation Perplexity: 4428923.3151. 
-----------------------------------
Epoch 158
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0274. Validation Loss: 15.3854. 
Training Perplexity: 1.0277. Validation Perplexity: 4805946.3518. 
-----------------------------------
Epoch 159
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0255. Validation Loss: 15.3105. 
Training Perplexity: 1.0259. Validation Perplexity: 4459243.3384. 
-----------------------------------
Epoch 160
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0258. Validation Loss: 15.5243. 
Training Perplexity: 1.0262. Validation Perplexity: 5522438.8380. 
-----------------------------------
Epoch 161
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0266. Validation Loss: 15.3105. 
Training Perplexity: 1.0270. Validation Perplexity: 4459455.9768. 
-----------------------------------
Epoch 162
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0272. Validation Loss: 15.5308. 
Training Perplexity: 1.0276. Validation Perplexity: 5558060.7055. 
-----------------------------------
Epoch 163
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0258. Validation Loss: 15.4730. 
Training Perplexity: 1.0261. Validation Perplexity: 5246101.3466. 
-----------------------------------
Epoch 164
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0262. Validation Loss: 15.4483. 
Training Perplexity: 1.0266. Validation Perplexity: 5118079.7161. 
-----------------------------------
Epoch 165
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0262. Validation Loss: 15.5915. 
Training Perplexity: 1.0266. Validation Perplexity: 5906157.4741. 
-----------------------------------
Epoch 166
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0258. Validation Loss: 15.6438. 
Training Perplexity: 1.0261. Validation Perplexity: 6223355.6260. 
-----------------------------------
Epoch 167
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0257. Validation Loss: 15.5106. 
Training Perplexity: 1.0260. Validation Perplexity: 5446962.2111. 
-----------------------------------
Epoch 168
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0265. Validation Loss: 15.8134. 
Training Perplexity: 1.0269. Validation Perplexity: 7373366.9839. 
-----------------------------------
Epoch 169
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0259. Validation Loss: 15.2455. 
Training Perplexity: 1.0262. Validation Perplexity: 4178844.7356. 
-----------------------------------
Epoch 170
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0261. Validation Loss: 15.8980. 
Training Perplexity: 1.0265. Validation Perplexity: 8024449.1123. 
-----------------------------------
Epoch 171
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0096. Validation Loss: 15.8996. 
Training Perplexity: 1.0097. Validation Perplexity: 8037193.3328. 
-----------------------------------
Epoch 172
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0017. Validation Loss: 15.4749. 
Training Perplexity: 1.0017. Validation Perplexity: 5256127.0648. 
-----------------------------------
Epoch 173
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0012. Validation Loss: 15.5535. 
Training Perplexity: 1.0012. Validation Perplexity: 5685684.1930. 
-----------------------------------
Epoch 174
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0010. Validation Loss: 15.6932. 
Training Perplexity: 1.0010. Validation Perplexity: 6538418.2307. 
-----------------------------------
Epoch 175
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0009. Validation Loss: 15.4667. 
Training Perplexity: 1.0009. Validation Perplexity: 5213060.4814. 
-----------------------------------
Epoch 176
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0008. Validation Loss: 15.6888. 
Training Perplexity: 1.0008. Validation Perplexity: 6509512.0832. 
-----------------------------------
Epoch 177
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0007. Validation Loss: 15.5100. 
Training Perplexity: 1.0007. Validation Perplexity: 5443747.6853. 
-----------------------------------
Epoch 178
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0007. Validation Loss: 15.4246. 
Training Perplexity: 1.0007. Validation Perplexity: 4998132.2569. 
-----------------------------------
Epoch 179
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0006. Validation Loss: 15.3636. 
Training Perplexity: 1.0006. Validation Perplexity: 4702547.9462. 
-----------------------------------
Epoch 180
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0006. Validation Loss: 15.6608. 
Training Perplexity: 1.0006. Validation Perplexity: 6329629.7206. 
-----------------------------------
Epoch 181
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0006. Validation Loss: 15.5547. 
Training Perplexity: 1.0006. Validation Perplexity: 5692889.5572. 
-----------------------------------
Epoch 182
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 0.0005. Validation Loss: 15.3387. 
Training Perplexity: 1.0005. Validation Perplexity: 4586718.9419. 
-----------------------------------
Epoch 183
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]