In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from torchtext.legacy.data import Field, TabularDataset, BucketIterator,ReversibleField
import matplotlib.pyplot as plt
from ast import literal_eval
import remi_utils as utils
import twoencodertransformer as kk
import pickle
source_folder = "solo_generation_dataset_augmented_mag"
folder = "dynamic_mag_models/2enc_2nd"
destination_folder = folder + "/solo_generation_weights"
generated_outputs = folder +  "/generated_samples"
dissimilar_interpolation = folder + "/interpolation"
vocab = folder + "/vocab"

In [2]:
from pathlib import Path
Path(destination_folder).mkdir(parents=True, exist_ok=True)
Path(generated_outputs).mkdir(parents=True, exist_ok=True)
Path(dissimilar_interpolation).mkdir(parents=True, exist_ok=True)
Path(vocab).mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/main").mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/piano").mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/solo").mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/piano_predict").mkdir(parents=True, exist_ok=True)
Path(dissimilar_interpolation+"/intro").mkdir(parents=True, exist_ok=True)
Path(dissimilar_interpolation+"/outro").mkdir(parents=True, exist_ok=True)
Path(dissimilar_interpolation+"/predict").mkdir(parents=True, exist_ok=True)

In [3]:
event2word, word2event = pickle.load(open('dictionary_augmented.pkl', 'rb'))

In [4]:
if torch.cuda.is_available():  
    dev = "cuda:1" 
else:  
    dev = "cpu" 
print(dev)
device = torch.device(dev)
print(device)

cuda:1
cuda:1


In [5]:
# Fields

main_field = Field(tokenize=None, lower=True, include_lengths=True, batch_first=True, init_token="<sos>", eos_token="<eos>")
piano_field = Field(tokenize=None, lower=True, include_lengths=True, batch_first=True, init_token="<sos>", eos_token="<eos>")
fields = [('main', main_field), ('piano', piano_field)]

# TabularDataset

train, valid, test = TabularDataset.splits(path=source_folder, train='train_torchtext.csv', validation='val_torchtext.csv', test='test_torchtext.csv',
                                           format='CSV', fields=fields, skip_header=True)

# Iterators
BATCH_SIZE = 1
train_iter = BucketIterator(train, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.main),
                            device=device, sort=False, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.main),
                            device=device, sort=False, sort_within_batch=True)
test_iter = BucketIterator(test, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.main),
                            device=device, sort=False, sort_within_batch=True)

# Vocabulary

main_field.build_vocab(train, min_freq=1)
piano_field.build_vocab(train, min_freq=1)

In [6]:
big = []
for ((main, main_len), (piano, piano_len)), _ in (train_iter):
    #print(intro.transpose(1,0).size(0))
    print(piano_len.cpu().item())

1318
3668
1303
1071
945
1307
1660
1638
1435
2699
687
1278
1314
2699
1610
396
1059
682
2193
1335
798
929
1720
2023
1633
1890
1033
1765
1216
1141
602
1174
1250
1020
2332
1236
787
787
1941
843
1019
1250
800
1178
1084
1743
1690
1547
1715
1111
554
3783
967
1470
899
1425
1237
1176
1178
798
1313
1539
2792
1725
1303
898
1020
1335
1720
1246
895
1303
1030
1546
1644
1329
3528
687
2639
1768
758
906
1558
1542
858
1509
1075
816
1324
1399
705
1830
1960
1633
2117
1855
636
1598
1483
709
1271
1026
682
1355
1504
1047
1167
1335
2307
1086
838
1061
906
1135
1091
1351
439
914
2699
1207
1313
1376
1183
1199
1968
878
1542
1122
1086
931
869
1178
1180
1207
2112
2193
2087
914
2395
1429
868
1478
1419
905
1000
1313
839
822
1598
1180
1770
739
1144
1725
1278
727
1353
1086
1116
839
1442
804
802
1304
928
1019
1459
1113
2088
1318
1941
3668
1644
1774
355
1536
1323
1968
1250
726
1334
1643
2474
1116
1447
1443
1001
1478
929
1394
1509
1419
1638
530
1823
1026
757
1799
945
1607
549
1216
993
2418
794
1320
1935
1365
1113
794
1473

1171
1643
1116
2094
1456
1047
1355
705
1046
914
1180
1662
1558
1061
2087
2151
836
1900
1569
1166
1502
816
323
1999
1548
1470
1086
1798
1355
3783
1439
1274
854
821
1071
1365
1314
1010
1246
1654
439
1545
883
664
858
439
355
556
1443
1558
986
1166
2456
1004
836
1059
942
1046
588
1335
1275
1876
1557
929
1643
1211
1099
1320
530
1798
1026
2892
793
1718
1456
1439
1856
800
964
1478
1141
1443
1763
1387
1790
1274
745
1662
1656
942
1504
530
1322
1252
794
1355
672
1515
1790
1113
355
1376
1456
1303
2879
355
789
1496
1010
1395
1304
1956
1309
1246
2090
1178
687
1927
1508
914
1271
1508
859
854
940
2090
1383
1559
1998
1543
2398
1231
588
1746
1447
1329
828
1174
541
2639
1676
757
589
1438
1144
1353
810
1374
1380
1511
1180
931
1098
1504
874
1108
1920
1937
2117
878
793
1731
1084
1746
1963
1644
1351
787
1374
1001
1725
1595
1284
1134
1502
1319
1798
1539
1502
1509
2193
1086
1673
2094
2032
853
942
1928
1141
1037
1676
556
804
1542
467
934
1187
905
1167
1147
1124
2395
1211
1135
1900
1638
1690
845
1111
1309
638
1

In [7]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
torch.backends.cudnn.enabled=False

In [8]:
#https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/seq2seq_transformer/seq2seq_transformer.py
class Transformer(nn.Module):
    def __init__(
        self,
        embedding_size,
        src_vocab_size,
        trg_vocab_size,
        src_pad_idx,
        num_heads,
        num_encoder_layers,
        num_decoder_layers,
        forward_expansion,
        dropout,
        max_len,
        device,
    ):
        super(Transformer, self).__init__()
        self.src_word_embedding = nn.Embedding(src_vocab_size, embedding_size)
        self.src_position_embedding = nn.Embedding(max_len, embedding_size)
        self.trg_word_embedding = nn.Embedding(trg_vocab_size, embedding_size)
        self.trg_position_embedding = nn.Embedding(max_len, embedding_size)

        self.device = device
        self.transformer = nn.Transformer(
            embedding_size,
            num_heads,
            num_encoder_layers,
            num_decoder_layers,
            forward_expansion,
            dropout,
        )
        self.fc_out = nn.Linear(embedding_size, trg_vocab_size)
        self.dropout = nn.Dropout(dropout)
        self.src_pad_idx = src_pad_idx

    def make_src_mask(self, src):
        src_mask = src.transpose(0, 1) == self.src_pad_idx

        # (N, src_len)
        return src_mask.to(self.device)

    def forward(self, src, trg):
        src_seq_length, N = src.shape
        trg_seq_length, N = trg.shape

        src_positions = (
            torch.arange(0, src_seq_length)
            .unsqueeze(1)
            .expand(src_seq_length, N)
            .to(self.device)
        )

        trg_positions = (
            torch.arange(0, trg_seq_length)
            .unsqueeze(1)
            .expand(trg_seq_length, N)
            .to(self.device)
        )

        embed_src = self.dropout(
            (self.src_word_embedding(src) + self.src_position_embedding(src_positions))
        )
        embed_trg = self.dropout(
            (self.trg_word_embedding(trg) + self.trg_position_embedding(trg_positions))
        )

        src_padding_mask = self.make_src_mask(src)
        trg_mask = self.transformer.generate_square_subsequent_mask(trg_seq_length).to(
            self.device
        )

        out = self.transformer(
            embed_src,
            embed_trg,
            src_key_padding_mask=src_padding_mask,
            tgt_mask=trg_mask,
        )
        out = self.fc_out(out)
        return out


In [9]:
src_vocab_size = len(main_field.vocab)
trg_vocab_size = len(piano_field.vocab)
embedding_size = 512
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
dropout = 0.10
max_len = 3000
forward_expansion = 4
src_pad_idx = 1 #english.vocab.stoi["<pad>"]

model = Transformer(
    embedding_size,
    src_vocab_size,
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device,
)
model = model.to(device)


In [10]:
def init_weights(m: nn.Module):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)


model.apply(init_weights)

optimizer = optim.Adam(model.parameters(), lr=1e-5) #non augmented 3e-4


def count_parameters(model: nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model):,} trainable parameters')


def save_best_checkpoint(state, nth,filename="_checkpoint.pt"):
    print("=> Saving checkpoint")
#     torch.save(state, destination_folder + str(nth)+filename)
    torch.save(state, destination_folder + '/metrics.pt')

def save_final_checkpoint(state, nth,filename="_checkpoint.pt"):
    print("=> Saving checkpoint")
    torch.save(state, destination_folder + "/" + str(nth)+filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

The model has 12,969,244 trainable parameters


In [11]:
# stoi input str get int
# intro_field.vocab.stoi
# itos input into get token/str
# intro_field.vocab.itos[4]

In [12]:
PAD_IDX = 1

criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)
#criterion = nn.CrossEntropyLoss()

In [13]:
import math
import time


def train(model: nn.Module,
          iterator: torch.utils.data.DataLoader,
          optimizer: optim.Optimizer,
          criterion: nn.Module,
          clip: float):

    model.train()

    epoch_loss = 0

    #for _, (src, _,trg,_) in enumerate(iterator):
    for ((main, main_len), (piano, piano_len)), _ in (iterator):
        if piano_len.cpu().item()>=3000:
            continue
        src, trg = main.transpose(1,0), piano.transpose(1,0)
        src, trg = src.to(device), trg.to(device)

        optimizer.zero_grad()
        output = model(src, trg[:-1, :])
        
#         print(output.size())
#         print(trg.size())
        
        output = output.view(-1, output.shape[-1])
        trg = trg[1:].reshape(-1)
        loss = criterion(output, trg)
#         print(torch.isfinite(trg).all().cpu().item())
#         print(torch.isfinite(output).all().cpu().item())
#         print(torch.isfinite(loss).all().cpu().item())
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.cpu().detach().item()

    return epoch_loss / len(iterator)


def evaluate(model: nn.Module,
             iterator: torch.utils.data.DataLoader,
             criterion: nn.Module):

    model.eval()

    epoch_loss = 0

    with torch.no_grad():

        #for _, (src, _,trg,_) in enumerate(iterator):
        for ((main, main_len), (piano, piano_len)), _ in (iterator):
            if piano_len.cpu().item()>=3000:
                continue
            src, trg = main.transpose(1,0), piano.transpose(1,0)
            src, trg = src.to(device), trg.to(device)

            output = model(src, trg[:-1, :]) #turn off teacher forcing

            output = output.view(-1, output.shape[-1])
            trg = trg[1:].reshape(-1)

            loss = criterion(output, trg)

            epoch_loss += loss.cpu().detach().item()

    return epoch_loss / len(iterator)


def epoch_time(start_time: int,
               end_time: int):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs



In [14]:
def translate_sentence(model, sentence, german, english, device, max_length=1200):

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    tokens = [token.lower() for token in sentence.split(' ')]
    # print(tokens)

    # sys.exit()
    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    outputs = [english.vocab.stoi["<sos>"]]
    
    for i in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

        with torch.no_grad():
            output = model(sentence_tensor, trg_tensor)

        best_guess = output.argmax(2)[-1, :].item()
        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break
    # print(outputs)
    translated_sentence = [english.vocab.itos[idx] for idx in outputs]

    # remove start token
    return translated_sentence


In [15]:
df_intro = pd.read_csv(source_folder + '/val_torchtext.csv')
val_main = df_intro['main'].values
val_piano = df_intro['piano'].values
val_data=[]
for i in range(len(val_main)):
    temp_dict = {}
    temp_dict['main'] = val_main[i]
    temp_dict['piano'] = val_piano[i]
    val_data.append(temp_dict)
print(len(val_piano))

112


In [16]:
def check_mode_collapse(model):
    count = 0
    translations = []
    for i in range(3):
        main = val_main[i]
        piano = val_piano[i]
        #print(intro)
        list_main = [int(x) for x in main.split(' ')]
        list_piano = [int(x) for x in piano.split(' ')]
        translated_sentence = translate_sentence(model, main, main_field, piano_field, device, max_length=1200)
        
        translated_sentence = [int(x) for x in translated_sentence if x != '<pad>' and x != '<sos>' and x != '<eos>' and x != '<unk>']
        print(translated_sentence)
        translations.append(translated_sentence)
        if i > 0:
            if translations[i-1] == translations[i]:
                count += 1
    return count


In [None]:
N_EPOCHS = 2000
S_EPOCH = 0
CLIP = 1

train_loss_log = []
valid_loss_log = []
best_valid_loss = float('inf')
#torch.autograd.set_detect_anomaly(True)
#model = nn.DataParallel(model, device_ids=[0,1]).to(device)
for epoch in range(S_EPOCH, N_EPOCHS):
    
    start_time = time.time()

    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iter, criterion)
    
    
    train_loss_log.append(train_loss)
    valid_loss_log.append(valid_loss)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        checkpoint = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'valid_loss': valid_loss}
        save_best_checkpoint(checkpoint,N_EPOCHS)
    if (epoch+1) % 20 == 0 or (epoch) % 20 == 0:
        save_final_checkpoint(checkpoint,epoch)
    if (epoch+1) % 25 ==0:
        if check_mode_collapse(model) > 1:
            print("model is mode collapsing")
save_final_checkpoint(checkpoint,N_EPOCHS)
test_loss = evaluate(model, test_iter, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

Epoch: 01 | Time: 5m 33s
	Train Loss: 4.891 | Train PPL: 133.078
	 Val. Loss: 4.393 |  Val. PPL:  80.906
=> Saving checkpoint
=> Saving checkpoint
Epoch: 02 | Time: 5m 37s
	Train Loss: 4.150 | Train PPL:  63.456
	 Val. Loss: 3.786 |  Val. PPL:  44.088
=> Saving checkpoint
Epoch: 03 | Time: 5m 38s
	Train Loss: 3.531 | Train PPL:  34.148
	 Val. Loss: 3.172 |  Val. PPL:  23.848
=> Saving checkpoint
Epoch: 04 | Time: 5m 38s
	Train Loss: 3.108 | Train PPL:  22.366
	 Val. Loss: 2.908 |  Val. PPL:  18.324
=> Saving checkpoint
Epoch: 05 | Time: 5m 39s
	Train Loss: 2.925 | Train PPL:  18.627
	 Val. Loss: 2.768 |  Val. PPL:  15.929
=> Saving checkpoint
Epoch: 06 | Time: 5m 39s
	Train Loss: 2.818 | Train PPL:  16.744
	 Val. Loss: 2.669 |  Val. PPL:  14.420
=> Saving checkpoint
Epoch: 07 | Time: 5m 39s
	Train Loss: 2.734 | Train PPL:  15.387
	 Val. Loss: 2.585 |  Val. PPL:  13.261
=> Saving checkpoint
Epoch: 08 | Time: 5m 39s
	Train Loss: 2.659 | Train PPL:  14.281
	 Val. Loss: 2.531 |  Val. PPL: 

[0, 1, 2, 114, 1, 51, 111, 15, 1, 51, 111, 15, 1, 51, 33, 15, 4, 51, 33, 15, 8, 51, 111, 15, 8, 51, 111, 15, 8, 51, 111, 15, 13, 51, 33, 15, 13, 51, 33, 15, 13, 51, 33, 15, 13, 51, 33, 15, 17, 51, 33, 15, 17, 51, 33, 15, 17, 51, 33, 15, 17, 51, 33, 15, 17, 51, 6, 15, 0, 1, 51, 33, 15, 1, 51, 33, 15, 1, 51, 6, 15, 1, 51, 33, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 8, 51, 6, 15, 8, 51, 6, 15, 8, 51, 6, 15, 8, 51, 6, 15, 8, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 13, 51, 6, 15, 16, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 6, 15, 17, 51, 33, 15, 0, 1, 51, 6, 15, 0, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1, 51, 6, 15, 1

[0, 1, 2, 130, 67, 54, 128, 15, 67, 54, 128, 15, 23, 54, 100, 15, 23, 54, 128, 15, 72, 54, 128, 15, 72, 54, 128, 15, 91, 54, 128, 15, 91, 54, 128, 15, 70, 54, 128, 15, 70, 54, 128, 15, 70, 54, 128, 15, 78, 54, 128, 15, 78, 54, 128, 15, 90, 54, 128, 15, 74, 54, 128, 15, 74, 54, 128, 15, 74, 54, 128, 15, 74, 54, 128, 15, 74, 54, 128, 15, 23, 54, 128, 15, 72, 54, 128, 15, 72, 54, 128, 15, 91, 54, 128, 15, 91, 54, 128, 15, 13, 54, 128, 15, 70, 54, 128, 15, 70, 54, 128, 15, 17, 54, 128, 15, 78, 54, 128, 15, 74, 51, 128, 15, 74, 54, 128, 15, 91, 54, 128, 15, 91, 51, 128, 15, 78, 51, 128, 15, 78, 54, 128, 15, 74, 51, 128, 15, 17, 54, 128, 7, 74, 51, 128, 15, 74, 51, 128, 15, 74, 54, 128, 15, 23, 54, 128, 15, 23, 54, 128, 15, 72, 51, 128, 15, 72, 51, 128, 15, 72, 51, 128, 15, 91, 54, 128, 15, 70, 54, 128, 15, 70, 51, 128, 15, 91, 54, 128, 15, 70, 51, 128, 15, 70, 51, 128, 15, 78, 54, 128, 15, 78, 51, 128, 15, 74, 51, 128, 15, 74, 54, 128, 15, 74, 54, 128, 15, 74, 51, 128, 15, 74, 51, 128, 15, 

Epoch: 51 | Time: 5m 40s
	Train Loss: 2.077 | Train PPL:   7.981
	 Val. Loss: 2.523 |  Val. PPL:  12.465
Epoch: 52 | Time: 5m 40s
	Train Loss: 2.072 | Train PPL:   7.937
	 Val. Loss: 2.532 |  Val. PPL:  12.584
Epoch: 53 | Time: 5m 40s
	Train Loss: 2.066 | Train PPL:   7.892
	 Val. Loss: 2.553 |  Val. PPL:  12.844
Epoch: 54 | Time: 5m 39s
	Train Loss: 2.060 | Train PPL:   7.849
	 Val. Loss: 2.547 |  Val. PPL:  12.763
Epoch: 55 | Time: 5m 40s
	Train Loss: 2.055 | Train PPL:   7.807
	 Val. Loss: 2.556 |  Val. PPL:  12.885
Epoch: 56 | Time: 5m 40s
	Train Loss: 2.050 | Train PPL:   7.771
	 Val. Loss: 2.560 |  Val. PPL:  12.933
Epoch: 57 | Time: 5m 40s
	Train Loss: 2.045 | Train PPL:   7.728
	 Val. Loss: 2.576 |  Val. PPL:  13.139
Epoch: 58 | Time: 5m 40s
	Train Loss: 2.039 | Train PPL:   7.685
	 Val. Loss: 2.565 |  Val. PPL:  12.995
Epoch: 59 | Time: 5m 40s
	Train Loss: 2.034 | Train PPL:   7.644
	 Val. Loss: 2.580 |  Val. PPL:  13.198
Epoch: 60 | Time: 5m 40s
	Train Loss: 2.029 | Train PPL

[0, 1, 2, 69, 1, 51, 111, 52, 4, 51, 111, 52, 4, 51, 33, 52, 4, 51, 33, 52, 8, 51, 33, 52, 8, 51, 33, 52, 10, 51, 33, 52, 10, 51, 33, 52, 10, 51, 33, 52, 13, 51, 33, 52, 13, 51, 33, 52, 16, 51, 33, 52, 16, 51, 33, 52, 17, 51, 33, 52, 17, 51, 33, 52, 17, 51, 33, 52, 27, 51, 33, 52, 0, 1, 51, 33, 31, 1, 51, 33, 52, 4, 51, 33, 52, 4, 51, 33, 52, 8, 51, 33, 52, 8, 51, 33, 52, 8, 51, 33, 52, 10, 51, 33, 52, 13, 51, 33, 52, 13, 51, 33, 52, 13, 51, 33, 52, 16, 51, 33, 52, 16, 51, 33, 52, 78, 51, 33, 52, 17, 51, 33, 52, 17, 51, 33, 52, 27, 51, 33, 52, 0, 1, 51, 33, 52, 4, 51, 33, 52, 4, 51, 33, 52, 8, 51, 33, 52, 8, 51, 33, 52, 8, 51, 33, 52, 13, 51, 33, 52, 13, 51, 33, 52, 16, 51, 6, 31, 17, 51, 6, 52, 17, 51, 33, 52, 0, 1, 51, 33, 52, 1, 51, 33, 52, 4, 51, 6, 52, 8, 51, 33, 52, 8, 51, 33, 52, 8, 51, 6, 52, 8, 51, 33, 52, 13, 51, 33, 31, 13, 51, 33, 52, 13, 51, 33, 31, 16, 51, 6, 52, 17, 51, 33, 52, 17, 51, 33, 52, 17, 51, 33, 52, 17, 51, 33, 52, 0, 1, 51, 6, 52, 4, 51, 33, 52, 4, 51, 33, 31,

[0, 1, 43, 117, 4, 51, 128, 7, 4, 51, 128, 7, 4, 51, 128, 7, 23, 51, 128, 7, 23, 51, 128, 7, 23, 51, 128, 7, 23, 51, 128, 15, 23, 51, 128, 7, 23, 51, 128, 7, 91, 51, 128, 7, 91, 51, 128, 7, 91, 51, 128, 7, 91, 51, 128, 7, 91, 51, 128, 15, 91, 51, 128, 7, 78, 51, 128, 15, 78, 51, 128, 7, 78, 51, 128, 15, 78, 51, 128, 7, 78, 51, 128, 15, 74, 51, 128, 7, 74, 51, 128, 7, 74, 51, 128, 7, 74, 51, 128, 15, 0, 1, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 7, 23, 51, 128, 7, 91, 51, 128, 15, 91, 51, 128, 15, 91, 51, 128, 7, 78, 51, 128, 7, 78, 51, 128, 15, 78, 51, 128, 15, 78, 51, 128, 15, 78, 51, 128, 7, 74, 51, 128, 15, 74, 51, 128, 15, 74, 51, 128, 15, 74, 51, 128, 15, 0, 1, 51, 185, 42, 23, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 15, 23, 51, 128, 7, 23, 51, 128, 15, 23, 51, 128, 15, 91, 51, 128, 15, 91, 51, 128, 15, 91, 51, 128, 15, 91, 51, 128, 15, 70, 51, 128, 15, 70, 51, 128, 7, 78, 51, 128, 15, 78, 51, 128, 15, 78, 51, 128, 15, 78, 5

Epoch: 101 | Time: 5m 41s
	Train Loss: 1.668 | Train PPL:   5.302
	 Val. Loss: 2.377 |  Val. PPL:  10.775
=> Saving checkpoint
Epoch: 102 | Time: 5m 41s
	Train Loss: 1.662 | Train PPL:   5.268
	 Val. Loss: 2.409 |  Val. PPL:  11.128
Epoch: 103 | Time: 5m 40s
	Train Loss: 1.655 | Train PPL:   5.231
	 Val. Loss: 2.383 |  Val. PPL:  10.834
Epoch: 104 | Time: 5m 41s
	Train Loss: 1.649 | Train PPL:   5.202
	 Val. Loss: 2.426 |  Val. PPL:  11.312
Epoch: 105 | Time: 5m 41s
	Train Loss: 1.643 | Train PPL:   5.170
	 Val. Loss: 2.388 |  Val. PPL:  10.893
Epoch: 106 | Time: 5m 41s
	Train Loss: 1.636 | Train PPL:   5.136
	 Val. Loss: 2.405 |  Val. PPL:  11.080
Epoch: 107 | Time: 5m 41s
	Train Loss: 1.631 | Train PPL:   5.109
	 Val. Loss: 2.424 |  Val. PPL:  11.293
Epoch: 108 | Time: 5m 41s
	Train Loss: 1.624 | Train PPL:   5.073
	 Val. Loss: 2.410 |  Val. PPL:  11.130
Epoch: 109 | Time: 5m 41s
	Train Loss: 1.618 | Train PPL:   5.045
	 Val. Loss: 2.384 |  Val. PPL:  10.845
Epoch: 110 | Time: 5m 41s

[0, 1, 2, 162, 1, 51, 55, 15, 67, 51, 33, 15, 23, 51, 50, 15, 23, 51, 50, 15, 8, 51, 57, 15, 72, 51, 57, 15, 10, 51, 57, 15, 91, 51, 57, 15, 91, 51, 57, 15, 70, 51, 57, 15, 70, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 90, 51, 57, 15, 90, 51, 57, 15, 74, 51, 57, 15, 74, 51, 57, 15, 0, 1, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 72, 51, 57, 15, 72, 51, 57, 15, 91, 51, 57, 15, 91, 51, 57, 15, 91, 51, 57, 15, 70, 51, 57, 15, 70, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 90, 51, 57, 15, 90, 51, 57, 15, 74, 51, 57, 15, 0, 1, 51, 57, 15, 67, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 15, 72, 51, 57, 15, 72, 51, 57, 15, 91, 51, 57, 15, 91, 51, 57, 15, 91, 51, 57, 15, 70, 51, 57, 15, 70, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 15, 90, 51, 57, 15, 90, 51, 57, 15, 74, 51, 57, 15, 74, 51, 57, 15, 0, 1, 51, 57, 15, 23, 51, 57, 15, 23, 5

[0, 1, 2, 146, 67, 51, 55, 15, 67, 51, 48, 15, 23, 51, 185, 77, 23, 51, 48, 34, 8, 51, 48, 34, 10, 51, 48, 7, 91, 51, 55, 15, 91, 51, 48, 7, 70, 51, 48, 7, 78, 51, 48, 7, 78, 51, 6, 7, 78, 51, 6, 7, 90, 51, 48, 7, 74, 51, 6, 15, 74, 51, 48, 7, 0, 1, 51, 6, 7, 23, 51, 129, 36, 23, 51, 55, 7, 23, 51, 48, 52, 23, 51, 48, 7, 72, 51, 55, 7, 91, 51, 50, 7, 91, 51, 50, 7, 91, 51, 55, 7, 70, 51, 50, 7, 78, 51, 55, 7, 78, 51, 48, 7, 78, 51, 48, 7, 90, 51, 48, 7, 90, 51, 6, 7, 74, 51, 6, 7, 0, 1, 51, 185, 36, 67, 51, 48, 7, 67, 51, 48, 7, 23, 51, 6, 7, 23, 51, 6, 7, 72, 51, 48, 7, 10, 51, 6, 7, 91, 51, 48, 7, 91, 51, 129, 42, 91, 51, 55, 7, 70, 51, 55, 7, 78, 51, 48, 7, 78, 51, 48, 7, 78, 51, 48, 7, 90, 51, 55, 15, 74, 51, 50, 7, 74, 51, 55, 7, 0, 1, 51, 129, 36, 23, 51, 55, 7, 23, 51, 48, 7, 23, 51, 48, 7, 72, 51, 50, 7, 72, 51, 55, 7, 91, 51, 50, 7, 91, 51, 50, 7, 91, 51, 55, 7, 70, 51, 50, 7, 78, 51, 55, 7, 78, 51, 55, 7, 78, 51, 185, 137, 90, 51, 48, 7, 90, 51, 48, 7, 90, 51, 48, 52, 27, 51,

Epoch: 155 | Time: 5m 40s
	Train Loss: 1.428 | Train PPL:   4.172
	 Val. Loss: 2.535 |  Val. PPL:  12.621
Epoch: 156 | Time: 5m 40s
	Train Loss: 1.425 | Train PPL:   4.158
	 Val. Loss: 2.572 |  Val. PPL:  13.087
Epoch: 157 | Time: 5m 41s
	Train Loss: 1.422 | Train PPL:   4.146
	 Val. Loss: 2.537 |  Val. PPL:  12.636
Epoch: 158 | Time: 5m 41s
	Train Loss: 1.418 | Train PPL:   4.130
	 Val. Loss: 2.553 |  Val. PPL:  12.851
Epoch: 159 | Time: 5m 41s
	Train Loss: 1.416 | Train PPL:   4.119
	 Val. Loss: 2.568 |  Val. PPL:  13.043
Epoch: 160 | Time: 5m 41s
	Train Loss: 1.412 | Train PPL:   4.106
	 Val. Loss: 2.579 |  Val. PPL:  13.190
=> Saving checkpoint
Epoch: 161 | Time: 5m 41s
	Train Loss: 1.409 | Train PPL:   4.091
	 Val. Loss: 2.548 |  Val. PPL:  12.778
=> Saving checkpoint
Epoch: 162 | Time: 5m 40s
	Train Loss: 1.405 | Train PPL:   4.077
	 Val. Loss: 2.575 |  Val. PPL:  13.137
Epoch: 163 | Time: 5m 40s
	Train Loss: 1.403 | Train PPL:   4.067
	 Val. Loss: 2.549 |  Val. PPL:  12.792
Epoc

[0, 1, 2, 69, 1, 51, 55, 15, 67, 54, 33, 15, 23, 51, 50, 15, 23, 51, 57, 15, 23, 51, 55, 7, 23, 51, 33, 7, 23, 51, 50, 15, 23, 51, 57, 7, 91, 51, 57, 7, 91, 51, 129, 36, 13, 2, 57, 7, 70, 65, 33, 7, 78, 51, 57, 7, 78, 51, 57, 7, 78, 51, 57, 7, 78, 51, 57, 7, 78, 51, 57, 7, 78, 64, 57, 7, 78, 51, 57, 7, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 33, 7, 78, 51, 57, 7, 78, 51, 57, 15, 78, 51, 57, 7, 90, 51, 57, 15, 74, 51, 33, 15, 74, 54, 57, 15, 74, 35, 129, 28, 74, 51, 57, 15, 0, 1, 51, 57, 15, 23, 51, 57, 15, 23, 51, 57, 7, 23, 51, 57, 7, 23, 54, 57, 7, 23, 51, 57, 31, 23, 51, 57, 15, 23, 51, 57, 7, 91, 51, 21, 7, 91, 51, 21, 7, 91, 51, 21, 7, 78, 51, 21, 15, 78, 54, 21, 7, 17, 2, 69, 1, 54, 21, 7, 23, 51, 21, 31, 23, 51, 21, 15, 23, 51, 21, 7, 23, 54, 21, 31, 23, 51, 21, 15, 91, 51, 21, 15, 91, 51, 21, 7, 78, 51, 21, 15, 78, 51, 21, 7, 78, 51, 21, 31, 17, 51, 21, 7, 74, 35, 21, 7, 74, 51, 21, 15, 74, 51, 21, 15, 0, 1, 2, 69, 23, 51, 21, 15, 23, 51, 21, 7, 23, 51, 21, 15, 23, 51, 21, 7, 2

[0, 1, 2, 146, 67, 51, 100, 15, 67, 51, 30, 15, 67, 51, 48, 15, 23, 51, 128, 37, 23, 51, 55, 37, 72, 51, 48, 7, 72, 51, 48, 7, 91, 51, 6, 7, 70, 51, 55, 7, 70, 51, 48, 7, 78, 51, 48, 7, 78, 51, 58, 7, 78, 51, 128, 7, 90, 51, 55, 7, 90, 51, 48, 7, 74, 51, 48, 15, 74, 51, 6, 7, 0, 1, 51, 185, 36, 23, 51, 48, 19, 23, 51, 48, 7, 23, 51, 6, 7, 23, 51, 6, 7, 72, 51, 48, 19, 91, 51, 6, 19, 91, 51, 6, 19, 91, 51, 128, 19, 91, 51, 55, 19, 78, 51, 48, 19, 78, 51, 48, 19, 78, 51, 6, 19, 90, 51, 48, 19, 74, 51, 6, 19, 74, 51, 48, 19, 74, 51, 6, 19, 0, 67, 51, 185, 60, 23, 51, 48, 19, 23, 51, 48, 19, 23, 51, 6, 52, 23, 51, 55, 52, 72, 187, 128, 52, 91, 51, 48, 52, 91, 51, 48, 52, 91, 51, 6, 52, 78, 51, 128, 7, 78, 51, 55, 7, 78, 51, 48, 7, 78, 64, 48, 52, 90, 51, 6, 31, 74, 51, 55, 7, 0, 67, 51, 48, 7, 23, 51, 132, 36, 23, 51, 48, 19, 23, 51, 58, 19, 23, 51, 48, 19, 72, 51, 58, 19, 91, 51, 58, 19, 91, 51, 30, 19, 91, 51, 48, 19, 78, 51, 58, 19, 78, 51, 48, 19, 78, 51, 58, 19, 78, 51, 30, 19, 78, 51

Epoch: 207 | Time: 5m 41s
	Train Loss: 1.276 | Train PPL:   3.583
	 Val. Loss: 2.742 |  Val. PPL:  15.521
Epoch: 208 | Time: 5m 40s
	Train Loss: 1.273 | Train PPL:   3.571
	 Val. Loss: 2.752 |  Val. PPL:  15.680
Epoch: 209 | Time: 5m 41s
	Train Loss: 1.271 | Train PPL:   3.565
	 Val. Loss: 2.761 |  Val. PPL:  15.815
Epoch: 210 | Time: 5m 41s
	Train Loss: 1.268 | Train PPL:   3.555
	 Val. Loss: 2.776 |  Val. PPL:  16.061
Epoch: 211 | Time: 5m 40s
	Train Loss: 1.266 | Train PPL:   3.545
	 Val. Loss: 2.772 |  Val. PPL:  15.988
Epoch: 212 | Time: 5m 40s
	Train Loss: 1.263 | Train PPL:   3.535
	 Val. Loss: 2.776 |  Val. PPL:  16.047
Epoch: 213 | Time: 5m 41s
	Train Loss: 1.260 | Train PPL:   3.525
	 Val. Loss: 2.773 |  Val. PPL:  16.007
Epoch: 214 | Time: 5m 41s
	Train Loss: 1.257 | Train PPL:   3.516
	 Val. Loss: 2.777 |  Val. PPL:  16.063
Epoch: 215 | Time: 5m 41s
	Train Loss: 1.254 | Train PPL:   3.505
	 Val. Loss: 2.757 |  Val. PPL:  15.757
Epoch: 216 | Time: 5m 40s
	Train Loss: 1.252 |

[0, 1, 2, 175, 1, 54, 55, 15, 4, 54, 33, 15, 23, 54, 50, 15, 8, 51, 55, 15, 72, 51, 33, 15, 10, 54, 50, 15, 91, 51, 57, 15, 91, 35, 129, 28, 70, 54, 33, 15, 16, 51, 50, 28, 78, 54, 55, 15, 78, 53, 33, 7, 17, 51, 50, 7, 90, 51, 57, 15, 27, 51, 57, 7, 74, 51, 57, 7, 74, 51, 33, 7, 0, 1, 51, 57, 7, 4, 54, 129, 28, 23, 35, 33, 31, 23, 51, 50, 31, 23, 51, 57, 7, 23, 51, 57, 31, 72, 54, 33, 7, 91, 51, 57, 7, 91, 51, 57, 7, 91, 51, 33, 7, 70, 54, 50, 7, 70, 54, 57, 7, 16, 51, 57, 7, 78, 51, 57, 15, 78, 51, 57, 15, 78, 51, 57, 7, 78, 51, 57, 7, 78, 51, 33, 7, 90, 51, 50, 7, 74, 51, 57, 15, 74, 51, 57, 7, 0, 1, 51, 57, 15, 67, 51, 129, 28, 23, 51, 33, 7, 23, 51, 50, 7, 23, 51, 57, 7, 23, 51, 57, 7, 23, 51, 57, 7, 91, 51, 57, 7, 91, 51, 57, 7, 91, 51, 21, 31, 13, 64, 21, 31, 70, 53, 33, 7, 78, 64, 57, 31, 78, 51, 57, 31, 78, 51, 57, 7, 78, 51, 57, 7, 78, 51, 21, 7, 17, 2, 190, 90, 51, 21, 15, 74, 54, 57, 15, 74, 54, 33, 15, 0, 1, 54, 57, 15, 67, 53, 33, 28, 4, 51, 6, 15, 23, 64, 57, 31, 23, 54, 

[0, 1, 2, 147, 67, 51, 55, 15, 23, 64, 48, 7, 23, 51, 6, 7, 23, 51, 128, 36, 72, 51, 48, 52, 91, 51, 6, 7, 91, 51, 55, 7, 91, 51, 48, 7, 91, 51, 48, 7, 91, 51, 6, 19, 78, 51, 185, 125, 78, 51, 48, 7, 78, 51, 6, 7, 78, 51, 6, 7, 78, 51, 48, 7, 78, 51, 6, 31, 90, 51, 6, 7, 90, 51, 128, 103, 90, 51, 48, 7, 74, 51, 6, 15, 74, 51, 6, 7, 0, 67, 51, 128, 36, 23, 51, 48, 52, 23, 51, 6, 52, 72, 51, 6, 52, 91, 51, 6, 52, 91, 51, 128, 52, 91, 51, 48, 52, 91, 51, 6, 52, 91, 51, 128, 52, 91, 51, 48, 31, 91, 51, 6, 31, 78, 51, 48, 31, 78, 51, 6, 31, 78, 51, 128, 94, 78, 51, 48, 31, 78, 51, 6, 31, 90, 51, 55, 31, 27, 51, 48, 31, 74, 51, 6, 7, 74, 51, 128, 22, 0, 23, 51, 55, 31, 23, 51, 48, 31, 72, 51, 132, 112, 91, 51, 48, 52, 91, 51, 128, 52, 91, 51, 100, 31, 91, 51, 48, 52, 78, 51, 58, 52, 78, 51, 128, 52, 78, 51, 128, 52, 78, 11, 48, 52, 78, 51, 48, 52, 90, 51, 55, 52, 74, 51, 48, 52, 0, 67, 51, 200, 102, 67, 51, 100, 31, 23, 51, 30, 52, 23, 51, 48, 52, 72, 51, 100, 52, 72, 51, 48, 52, 72, 51, 100

Epoch: 260 | Time: 5m 40s
	Train Loss: 1.135 | Train PPL:   3.110
	 Val. Loss: 2.979 |  Val. PPL:  19.671
=> Saving checkpoint
Epoch: 261 | Time: 5m 40s
	Train Loss: 1.132 | Train PPL:   3.102
	 Val. Loss: 2.925 |  Val. PPL:  18.627
=> Saving checkpoint
Epoch: 262 | Time: 5m 41s
	Train Loss: 1.130 | Train PPL:   3.095
	 Val. Loss: 2.955 |  Val. PPL:  19.208
Epoch: 263 | Time: 5m 40s
	Train Loss: 1.127 | Train PPL:   3.086
	 Val. Loss: 2.972 |  Val. PPL:  19.525
Epoch: 264 | Time: 5m 41s
	Train Loss: 1.124 | Train PPL:   3.079
	 Val. Loss: 3.018 |  Val. PPL:  20.446
Epoch: 265 | Time: 5m 40s
	Train Loss: 1.122 | Train PPL:   3.070
	 Val. Loss: 3.006 |  Val. PPL:  20.208
Epoch: 266 | Time: 5m 41s
	Train Loss: 1.119 | Train PPL:   3.062
	 Val. Loss: 2.979 |  Val. PPL:  19.671
Epoch: 267 | Time: 5m 41s
	Train Loss: 1.116 | Train PPL:   3.054
	 Val. Loss: 2.979 |  Val. PPL:  19.677
Epoch: 268 | Time: 5m 41s
	Train Loss: 1.114 | Train PPL:   3.045
	 Val. Loss: 3.020 |  Val. PPL:  20.499
Epoc

[0, 1, 2, 162, 1, 54, 111, 15, 67, 54, 30, 15, 4, 51, 33, 15, 23, 54, 50, 15, 23, 54, 57, 15, 8, 51, 21, 15, 10, 51, 21, 15, 10, 51, 21, 15, 91, 51, 21, 15, 91, 54, 21, 15, 91, 54, 21, 15, 91, 51, 21, 15, 91, 51, 21, 15, 70, 35, 129, 28, 16, 51, 33, 28, 78, 51, 57, 28, 90, 54, 50, 15, 74, 54, 57, 15, 0, 1, 54, 21, 7, 4, 54, 122, 28, 23, 54, 57, 15, 23, 51, 57, 7, 23, 54, 57, 7, 72, 54, 33, 7, 91, 51, 57, 7, 91, 51, 21, 7, 13, 51, 21, 7, 70, 54, 55, 7, 70, 54, 21, 7, 16, 51, 21, 31, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 7, 90, 51, 21, 15, 90, 51, 33, 15, 74, 35, 92, 28, 0, 1, 51, 50, 31, 67, 35, 57, 7, 23, 51, 21, 15, 23, 54, 21, 7, 23, 54, 21, 7, 23, 54, 33, 7, 23, 51, 21, 7, 23, 51, 21, 15, 23, 54, 21, 31, 72, 54, 21, 31, 10, 51, 21, 31, 91, 51, 21, 7, 91, 51, 21, 31, 91, 51, 21, 31, 91, 51, 21, 15, 91, 51, 21, 7, 70, 35, 21, 15, 70, 51, 21, 7, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 15, 78, 51, 21, 15, 17, 54, 21, 15, 90, 53, 

[0, 1, 2, 147, 23, 51, 128, 7, 23, 54, 128, 7, 23, 51, 55, 7, 72, 63, 48, 52, 72, 54, 128, 7, 91, 54, 48, 7, 91, 51, 100, 7, 91, 51, 48, 31, 91, 51, 55, 15, 91, 51, 48, 31, 78, 51, 200, 125, 78, 35, 128, 7, 78, 51, 100, 19, 78, 51, 30, 19, 78, 54, 128, 7, 78, 54, 100, 31, 90, 54, 48, 52, 74, 54, 100, 52, 0, 23, 54, 132, 47, 23, 35, 128, 52, 72, 54, 100, 7, 91, 51, 128, 22, 91, 51, 200, 137, 91, 51, 100, 52, 91, 54, 128, 31, 78, 51, 100, 19, 78, 51, 128, 52, 78, 51, 100, 52, 78, 51, 30, 31, 78, 51, 128, 31, 90, 35, 100, 31, 74, 51, 48, 31, 0, 23, 54, 128, 7, 23, 51, 100, 31, 23, 51, 30, 31, 72, 51, 48, 31, 91, 51, 200, 60, 91, 51, 128, 31, 91, 51, 100, 41, 91, 51, 30, 19, 78, 51, 100, 15, 78, 51, 128, 15, 78, 51, 100, 15, 78, 51, 30, 15, 90, 51, 48, 15, 74, 51, 185, 137, 0, 67, 51, 128, 52, 23, 51, 111, 19, 23, 51, 128, 19, 23, 51, 128, 7, 23, 51, 111, 52, 72, 51, 55, 31, 91, 51, 48, 52, 91, 51, 128, 15, 91, 51, 128, 31, 91, 51, 200, 94, 91, 51, 100, 41, 78, 51, 128, 19, 78, 51, 100, 19

Epoch: 311 | Time: 5m 40s
	Train Loss: 0.996 | Train PPL:   2.707
	 Val. Loss: 3.258 |  Val. PPL:  26.009
Epoch: 312 | Time: 5m 41s
	Train Loss: 0.994 | Train PPL:   2.701
	 Val. Loss: 3.287 |  Val. PPL:  26.752
Epoch: 313 | Time: 5m 40s
	Train Loss: 0.990 | Train PPL:   2.692
	 Val. Loss: 3.270 |  Val. PPL:  26.323
Epoch: 314 | Time: 5m 41s
	Train Loss: 0.989 | Train PPL:   2.687
	 Val. Loss: 3.290 |  Val. PPL:  26.839
Epoch: 315 | Time: 5m 41s
	Train Loss: 0.985 | Train PPL:   2.679
	 Val. Loss: 3.310 |  Val. PPL:  27.393
Epoch: 316 | Time: 5m 41s
	Train Loss: 0.983 | Train PPL:   2.671
	 Val. Loss: 3.286 |  Val. PPL:  26.729
Epoch: 317 | Time: 5m 40s
	Train Loss: 0.980 | Train PPL:   2.664
	 Val. Loss: 3.270 |  Val. PPL:  26.305
Epoch: 318 | Time: 5m 41s
	Train Loss: 0.977 | Train PPL:   2.656
	 Val. Loss: 3.328 |  Val. PPL:  27.896
Epoch: 319 | Time: 5m 41s
	Train Loss: 0.975 | Train PPL:   2.650
	 Val. Loss: 3.276 |  Val. PPL:  26.466
Epoch: 320 | Time: 5m 41s
	Train Loss: 0.972 |

[0, 1, 2, 175, 67, 54, 30, 15, 4, 64, 50, 15, 23, 32, 9, 15, 8, 54, 149, 28, 8, 32, 30, 15, 72, 123, 48, 52, 10, 54, 6, 15, 91, 53, 111, 66, 91, 54, 30, 15, 13, 54, 6, 7, 70, 54, 30, 15, 70, 54, 48, 7, 16, 54, 6, 7, 78, 64, 57, 15, 78, 54, 33, 28, 78, 54, 50, 7, 90, 54, 33, 7, 74, 54, 6, 7, 74, 51, 57, 28, 0, 1, 54, 33, 15, 4, 54, 6, 15, 23, 51, 57, 15, 23, 35, 33, 31, 23, 54, 6, 31, 23, 64, 57, 7, 72, 51, 21, 15, 91, 54, 33, 15, 91, 64, 6, 15, 70, 45, 57, 7, 16, 54, 33, 7, 78, 51, 57, 7, 78, 64, 21, 15, 17, 51, 50, 7, 90, 54, 57, 7, 74, 54, 21, 7, 74, 54, 21, 31, 0, 1, 54, 33, 15, 67, 53, 6, 15, 4, 32, 57, 31, 23, 51, 21, 31, 23, 51, 21, 31, 23, 54, 6, 15, 72, 64, 21, 7, 72, 54, 33, 7, 10, 51, 57, 7, 91, 51, 21, 7, 91, 51, 21, 7, 91, 54, 21, 15, 91, 51, 21, 15, 70, 49, 50, 7, 78, 64, 21, 15, 78, 51, 21, 7, 78, 51, 50, 15, 78, 51, 12, 7, 78, 51, 21, 15, 17, 123, 33, 34, 90, 123, 50, 31, 74, 51, 57, 31, 74, 54, 33, 31, 74, 54, 57, 31, 74, 54, 21, 31, 0, 67, 54, 149, 28, 23, 51, 6, 52, 2

[0, 1, 2, 147, 23, 51, 128, 7, 23, 54, 128, 7, 23, 51, 55, 7, 72, 63, 48, 7, 72, 54, 128, 15, 91, 51, 48, 7, 91, 51, 55, 7, 91, 51, 48, 15, 91, 51, 55, 15, 91, 51, 48, 15, 78, 51, 185, 77, 78, 35, 128, 7, 78, 51, 111, 15, 78, 54, 55, 15, 78, 51, 48, 7, 78, 51, 128, 31, 90, 49, 128, 7, 90, 54, 111, 41, 74, 51, 55, 31, 74, 51, 48, 15, 74, 51, 6, 7, 0, 67, 49, 128, 22, 23, 51, 185, 56, 23, 51, 55, 19, 23, 49, 128, 52, 72, 51, 48, 19, 91, 54, 128, 52, 91, 51, 111, 52, 91, 51, 55, 31, 91, 54, 128, 15, 91, 51, 48, 31, 70, 49, 128, 22, 78, 51, 55, 15, 78, 51, 48, 31, 78, 51, 128, 7, 78, 54, 128, 15, 78, 51, 111, 15, 78, 51, 55, 7, 90, 54, 185, 47, 74, 54, 128, 19, 74, 51, 111, 19, 74, 51, 55, 19, 0, 67, 54, 128, 19, 23, 51, 111, 31, 23, 51, 55, 31, 23, 51, 185, 47, 72, 49, 128, 31, 91, 51, 111, 31, 91, 51, 55, 31, 91, 51, 48, 31, 91, 51, 128, 15, 91, 51, 111, 7, 91, 51, 55, 7, 78, 51, 48, 7, 78, 51, 128, 15, 17, 51, 111, 7, 90, 51, 55, 22, 74, 51, 48, 22, 74, 51, 128, 66, 74, 51, 48, 66, 74, 

Epoch: 360 | Time: 5m 41s
	Train Loss: 0.868 | Train PPL:   2.381
	 Val. Loss: 3.563 |  Val. PPL:  35.280
=> Saving checkpoint
Epoch: 361 | Time: 5m 41s
	Train Loss: 0.865 | Train PPL:   2.374
	 Val. Loss: 3.560 |  Val. PPL:  35.179
=> Saving checkpoint
Epoch: 362 | Time: 5m 42s
	Train Loss: 0.862 | Train PPL:   2.369
	 Val. Loss: 3.548 |  Val. PPL:  34.735
Epoch: 363 | Time: 5m 42s
	Train Loss: 0.860 | Train PPL:   2.362
	 Val. Loss: 3.619 |  Val. PPL:  37.295
Epoch: 364 | Time: 5m 41s
	Train Loss: 0.857 | Train PPL:   2.357
	 Val. Loss: 3.606 |  Val. PPL:  36.830
Epoch: 365 | Time: 5m 41s
	Train Loss: 0.855 | Train PPL:   2.351
	 Val. Loss: 3.606 |  Val. PPL:  36.826
Epoch: 366 | Time: 5m 41s
	Train Loss: 0.852 | Train PPL:   2.345
	 Val. Loss: 3.597 |  Val. PPL:  36.505
Epoch: 367 | Time: 5m 42s
	Train Loss: 0.849 | Train PPL:   2.338
	 Val. Loss: 3.598 |  Val. PPL:  36.537
Epoch: 368 | Time: 5m 42s
	Train Loss: 0.848 | Train PPL:   2.335
	 Val. Loss: 3.609 |  Val. PPL:  36.931
Epoc

[0, 1, 2, 162, 1, 54, 33, 7, 67, 54, 50, 15, 4, 51, 9, 15, 23, 54, 33, 15, 8, 32, 50, 15, 8, 51, 57, 7, 10, 54, 33, 15, 10, 51, 50, 15, 91, 35, 57, 7, 13, 54, 21, 28, 70, 54, 33, 7, 16, 54, 50, 7, 78, 51, 57, 7, 78, 54, 33, 15, 17, 51, 33, 28, 74, 51, 50, 15, 0, 1, 51, 33, 7, 67, 51, 50, 7, 4, 54, 57, 52, 8, 51, 33, 15, 8, 54, 92, 36, 10, 51, 33, 31, 10, 54, 33, 31, 91, 54, 50, 31, 13, 64, 50, 7, 70, 35, 33, 31, 16, 54, 50, 15, 78, 64, 33, 7, 78, 35, 50, 15, 17, 51, 57, 34, 27, 51, 33, 15, 27, 51, 50, 15, 0, 1, 64, 33, 15, 67, 51, 50, 7, 4, 53, 57, 7, 23, 51, 21, 41, 8, 51, 33, 15, 8, 51, 33, 15, 10, 64, 50, 15, 10, 51, 57, 7, 13, 64, 33, 31, 13, 51, 50, 7, 70, 54, 57, 7, 16, 51, 33, 15, 78, 51, 50, 15, 78, 51, 57, 15, 17, 54, 33, 15, 17, 51, 50, 15, 27, 51, 57, 7, 27, 51, 33, 7, 74, 64, 21, 15, 0, 1, 64, 21, 7, 1, 51, 33, 15, 4, 54, 50, 15, 8, 54, 57, 7, 8, 64, 21, 31, 10, 51, 33, 15, 10, 51, 50, 15, 13, 51, 33, 7, 13, 51, 57, 7, 13, 51, 21, 15, 16, 51, 33, 7, 78, 51, 50, 7, 78, 51, 3

[0, 1, 2, 147, 23, 51, 128, 15, 23, 54, 48, 7, 23, 51, 6, 7, 72, 63, 128, 77, 72, 54, 48, 7, 91, 54, 6, 7, 91, 51, 55, 7, 91, 51, 48, 15, 91, 51, 48, 15, 91, 51, 6, 15, 78, 51, 185, 36, 78, 35, 128, 52, 78, 51, 55, 31, 78, 54, 128, 31, 78, 51, 111, 52, 78, 54, 55, 31, 90, 53, 48, 7, 74, 51, 6, 31, 74, 35, 55, 31, 0, 23, 35, 128, 7, 72, 54, 48, 15, 91, 51, 128, 28, 91, 51, 6, 52, 91, 51, 55, 7, 91, 54, 48, 7, 91, 51, 6, 7, 70, 54, 128, 7, 78, 51, 48, 7, 78, 51, 6, 15, 78, 51, 55, 15, 78, 51, 185, 66, 78, 54, 48, 15, 78, 51, 6, 19, 74, 51, 128, 19, 74, 51, 55, 7, 74, 54, 48, 31, 0, 23, 51, 128, 22, 23, 51, 111, 15, 23, 51, 55, 15, 72, 49, 128, 19, 91, 64, 48, 31, 91, 51, 128, 15, 91, 51, 200, 42, 70, 35, 128, 15, 78, 51, 100, 31, 78, 51, 75, 52, 78, 51, 48, 19, 78, 51, 58, 19, 74, 51, 128, 19, 74, 35, 100, 7, 74, 54, 30, 19, 74, 35, 128, 7, 0, 67, 54, 100, 7, 23, 54, 128, 7, 23, 35, 100, 31, 23, 51, 200, 47, 23, 51, 128, 41, 91, 51, 100, 19, 91, 51, 30, 41, 91, 51, 48, 19, 78, 54, 128, 1

Epoch: 401 | Time: 5m 41s
	Train Loss: 0.771 | Train PPL:   2.161
	 Val. Loss: 3.873 |  Val. PPL:  48.086
=> Saving checkpoint
Epoch: 402 | Time: 5m 41s
	Train Loss: 0.769 | Train PPL:   2.158
	 Val. Loss: 3.876 |  Val. PPL:  48.220
Epoch: 403 | Time: 5m 41s
	Train Loss: 0.767 | Train PPL:   2.154
	 Val. Loss: 3.848 |  Val. PPL:  46.920
Epoch: 404 | Time: 5m 41s
	Train Loss: 0.764 | Train PPL:   2.147
	 Val. Loss: 3.894 |  Val. PPL:  49.083
Epoch: 405 | Time: 5m 41s
	Train Loss: 0.763 | Train PPL:   2.144
	 Val. Loss: 3.907 |  Val. PPL:  49.740
Epoch: 406 | Time: 5m 41s
	Train Loss: 0.761 | Train PPL:   2.140
	 Val. Loss: 3.891 |  Val. PPL:  48.967
Epoch: 407 | Time: 5m 42s
	Train Loss: 0.759 | Train PPL:   2.135
	 Val. Loss: 3.936 |  Val. PPL:  51.220
Epoch: 408 | Time: 5m 41s
	Train Loss: 0.756 | Train PPL:   2.130
	 Val. Loss: 3.906 |  Val. PPL:  49.695
Epoch: 409 | Time: 5m 41s
	Train Loss: 0.755 | Train PPL:   2.127
	 Val. Loss: 3.868 |  Val. PPL:  47.848
Epoch: 410 | Time: 5m 41s

[0, 1, 2, 162, 1, 54, 33, 7, 67, 54, 50, 15, 4, 51, 57, 15, 23, 54, 122, 28, 8, 32, 33, 15, 72, 32, 57, 7, 10, 64, 30, 15, 91, 35, 50, 7, 91, 35, 30, 15, 13, 54, 50, 15, 16, 54, 33, 7, 78, 54, 50, 7, 78, 51, 9, 7, 78, 64, 33, 15, 17, 51, 33, 28, 90, 54, 50, 41, 74, 51, 33, 7, 0, 1, 64, 33, 7, 1, 51, 50, 15, 67, 54, 57, 15, 4, 51, 33, 7, 23, 51, 111, 28, 8, 35, 33, 31, 8, 54, 33, 31, 10, 64, 6, 52, 10, 51, 33, 31, 91, 51, 6, 31, 13, 54, 33, 52, 70, 54, 6, 7, 16, 51, 57, 7, 78, 51, 33, 7, 17, 54, 50, 15, 17, 51, 33, 7, 90, 54, 92, 36, 27, 54, 33, 41, 74, 54, 50, 31, 0, 1, 54, 29, 15, 67, 54, 33, 15, 4, 51, 33, 7, 23, 51, 50, 7, 8, 51, 9, 7, 10, 51, 29, 15, 91, 54, 33, 15, 13, 51, 50, 7, 70, 53, 57, 15, 16, 51, 92, 28, 78, 51, 33, 31, 78, 49, 50, 31, 17, 51, 9, 31, 90, 54, 33, 7, 27, 51, 50, 15, 74, 35, 33, 7, 0, 1, 51, 6, 7, 67, 54, 33, 7, 4, 53, 50, 31, 23, 51, 57, 52, 8, 54, 33, 15, 10, 54, 50, 22, 10, 51, 33, 36, 13, 35, 55, 7, 70, 54, 33, 15, 16, 54, 50, 7, 78, 35, 33, 7, 78, 54, 50,

[0, 1, 2, 147, 23, 51, 128, 15, 23, 64, 48, 31, 23, 51, 128, 7, 72, 49, 128, 7, 72, 54, 100, 15, 72, 54, 48, 7, 91, 51, 55, 7, 91, 54, 128, 66, 91, 54, 48, 15, 70, 54, 55, 31, 78, 51, 185, 36, 78, 54, 48, 52, 78, 51, 128, 31, 78, 54, 128, 15, 78, 51, 111, 52, 90, 54, 55, 31, 74, 51, 48, 7, 0, 72, 51, 128, 15, 91, 51, 185, 77, 91, 51, 128, 19, 91, 64, 55, 19, 91, 51, 128, 22, 78, 51, 128, 7, 78, 51, 111, 31, 78, 49, 128, 7, 90, 51, 55, 31, 74, 54, 128, 31, 0, 67, 49, 128, 19, 23, 32, 100, 52, 23, 51, 48, 19, 72, 35, 128, 22, 91, 49, 128, 15, 91, 35, 185, 47, 91, 51, 111, 31, 91, 51, 55, 31, 78, 35, 128, 31, 78, 51, 128, 15, 78, 51, 111, 15, 78, 51, 185, 36, 78, 35, 128, 31, 78, 35, 128, 52, 90, 49, 111, 15, 74, 51, 55, 19, 74, 51, 128, 15, 0, 67, 51, 128, 41, 23, 35, 111, 19, 23, 51, 185, 36, 72, 49, 128, 19, 91, 51, 111, 15, 91, 51, 55, 19, 91, 54, 128, 7, 91, 51, 111, 15, 91, 51, 55, 7, 78, 54, 185, 7, 78, 35, 128, 31, 78, 51, 200, 47, 78, 35, 128, 19, 78, 51, 100, 19, 90, 35, 185, 37

Epoch: 454 | Time: 5m 40s
	Train Loss: 0.668 | Train PPL:   1.951
	 Val. Loss: 4.163 |  Val. PPL:  64.244
Epoch: 455 | Time: 5m 41s
	Train Loss: 0.667 | Train PPL:   1.949
	 Val. Loss: 4.194 |  Val. PPL:  66.306
Epoch: 456 | Time: 5m 41s
	Train Loss: 0.666 | Train PPL:   1.946
	 Val. Loss: 4.208 |  Val. PPL:  67.233
Epoch: 457 | Time: 5m 40s
	Train Loss: 0.663 | Train PPL:   1.940
	 Val. Loss: 4.206 |  Val. PPL:  67.078
Epoch: 458 | Time: 5m 41s
	Train Loss: 0.661 | Train PPL:   1.937
	 Val. Loss: 4.191 |  Val. PPL:  66.081
Epoch: 459 | Time: 5m 41s
	Train Loss: 0.659 | Train PPL:   1.933
	 Val. Loss: 4.226 |  Val. PPL:  68.475
Epoch: 460 | Time: 5m 41s
	Train Loss: 0.658 | Train PPL:   1.931
	 Val. Loss: 4.197 |  Val. PPL:  66.513
=> Saving checkpoint
Epoch: 461 | Time: 5m 41s
	Train Loss: 0.657 | Train PPL:   1.928
	 Val. Loss: 4.213 |  Val. PPL:  67.579
=> Saving checkpoint
Epoch: 462 | Time: 5m 41s
	Train Loss: 0.655 | Train PPL:   1.925
	 Val. Loss: 4.202 |  Val. PPL:  66.849
Epoc

[0, 1, 2, 159, 1, 32, 55, 15, 67, 54, 33, 15, 4, 51, 50, 7, 23, 51, 149, 66, 8, 54, 30, 7, 72, 32, 6, 7, 91, 51, 30, 15, 91, 11, 30, 15, 91, 51, 48, 7, 91, 51, 30, 15, 13, 32, 6, 15, 16, 51, 30, 15, 78, 51, 149, 28, 78, 64, 30, 15, 78, 54, 26, 41, 78, 51, 30, 31, 90, 32, 6, 52, 74, 51, 30, 41, 74, 51, 30, 31, 0, 1, 51, 26, 7, 23, 51, 6, 31, 23, 51, 30, 22, 8, 54, 30, 7, 72, 51, 26, 7, 10, 64, 6, 7, 91, 51, 30, 15, 91, 32, 48, 7, 91, 64, 6, 7, 70, 38, 12, 7, 16, 64, 30, 7, 78, 51, 6, 7, 78, 64, 12, 31, 17, 54, 30, 22, 78, 51, 50, 7, 78, 64, 9, 7, 90, 54, 30, 15, 27, 51, 33, 15, 74, 51, 50, 7, 74, 51, 9, 15, 74, 51, 30, 15, 0, 1, 54, 50, 15, 23, 51, 33, 15, 8, 51, 57, 7, 8, 51, 50, 15, 8, 51, 57, 7, 72, 214, 55, 15, 10, 51, 33, 15, 91, 51, 50, 15, 91, 51, 57, 7, 91, 51, 33, 7, 13, 38, 6, 15, 70, 35, 33, 7, 16, 54, 50, 22, 78, 51, 55, 15, 78, 64, 33, 15, 17, 38, 6, 7, 90, 11, 33, 22, 27, 51, 50, 7, 74, 51, 55, 7, 0, 1, 38, 33, 15, 67, 53, 50, 15, 4, 54, 6, 15, 23, 51, 33, 7, 23, 51, 55, 7

[0, 1, 2, 147, 67, 54, 55, 52, 23, 54, 48, 41, 23, 51, 128, 7, 72, 49, 128, 7, 72, 54, 55, 31, 72, 54, 48, 7, 91, 54, 55, 7, 91, 51, 48, 15, 91, 51, 48, 15, 70, 54, 6, 66, 78, 53, 128, 52, 78, 35, 48, 7, 78, 51, 55, 15, 78, 64, 128, 31, 78, 64, 48, 52, 90, 51, 55, 19, 74, 51, 6, 52, 0, 72, 51, 128, 7, 91, 51, 185, 77, 91, 51, 55, 31, 91, 64, 48, 31, 78, 51, 128, 66, 78, 54, 111, 52, 78, 51, 55, 52, 90, 53, 128, 7, 74, 51, 48, 15, 74, 54, 128, 52, 74, 51, 55, 52, 0, 67, 49, 128, 15, 23, 51, 48, 41, 23, 32, 185, 60, 23, 54, 128, 31, 23, 54, 111, 19, 91, 51, 128, 94, 91, 51, 55, 15, 78, 54, 48, 19, 78, 51, 128, 19, 78, 54, 111, 31, 78, 51, 185, 36, 78, 35, 128, 19, 78, 54, 111, 52, 90, 49, 128, 52, 74, 51, 55, 19, 0, 23, 51, 128, 15, 23, 51, 111, 52, 23, 35, 128, 31, 23, 51, 55, 52, 72, 49, 185, 103, 72, 35, 128, 31, 91, 51, 111, 19, 91, 51, 128, 52, 91, 51, 55, 52, 70, 54, 128, 52, 78, 54, 111, 52, 78, 35, 128, 7, 78, 51, 55, 52, 78, 54, 48, 19, 74, 51, 111, 19, 74, 51, 55, 19, 0, 23, 51

Epoch: 523 | Time: 5m 40s
	Train Loss: 0.566 | Train PPL:   1.760
	 Val. Loss: 4.570 |  Val. PPL:  96.551
Epoch: 524 | Time: 5m 40s
	Train Loss: 0.565 | Train PPL:   1.759
	 Val. Loss: 4.626 |  Val. PPL: 102.115
Epoch: 525 | Time: 5m 40s
	Train Loss: 0.563 | Train PPL:   1.756
	 Val. Loss: 4.553 |  Val. PPL:  94.887
[0, 1, 2, 130, 67, 32, 127, 15, 4, 32, 199, 28, 23, 71, 132, 41, 23, 38, 127, 41, 8, 45, 127, 31, 72, 38, 100, 41, 10, 49, 132, 85, 91, 71, 127, 7, 13, 123, 132, 15, 70, 53, 127, 31, 16, 32, 100, 31, 78, 53, 199, 37, 90, 38, 127, 52, 74, 32, 100, 52, 0, 1, 63, 132, 28, 67, 63, 127, 15, 4, 54, 127, 31, 23, 32, 100, 19, 8, 35, 127, 15, 72, 32, 127, 15, 10, 35, 185, 66, 91, 45, 104, 52, 70, 49, 55, 52, 78, 32, 48, 52, 78, 49, 129, 15, 17, 49, 128, 15, 90, 11, 104, 15, 27, 11, 55, 31, 74, 49, 185, 36, 0, 1, 49, 129, 31, 67, 63, 128, 31, 4, 63, 129, 19, 23, 54, 128, 31, 8, 35, 199, 28, 72, 32, 100, 7, 10, 63, 132, 7, 91, 32, 127, 41, 13, 32, 100, 7, 70, 32, 127, 15, 78, 35, 199,

Epoch: 526 | Time: 5m 40s
	Train Loss: 0.561 | Train PPL:   1.753
	 Val. Loss: 4.531 |  Val. PPL:  92.818
Epoch: 527 | Time: 5m 40s
	Train Loss: 0.560 | Train PPL:   1.752
	 Val. Loss: 4.566 |  Val. PPL:  96.115
Epoch: 528 | Time: 5m 41s
	Train Loss: 0.559 | Train PPL:   1.749
	 Val. Loss: 4.594 |  Val. PPL:  98.858
Epoch: 529 | Time: 5m 40s
	Train Loss: 0.559 | Train PPL:   1.748
	 Val. Loss: 4.575 |  Val. PPL:  97.031
Epoch: 530 | Time: 5m 40s
	Train Loss: 0.557 | Train PPL:   1.745
	 Val. Loss: 4.623 |  Val. PPL: 101.765
Epoch: 531 | Time: 5m 40s
	Train Loss: 0.556 | Train PPL:   1.743
	 Val. Loss: 4.605 |  Val. PPL:  99.990
Epoch: 532 | Time: 5m 41s
	Train Loss: 0.554 | Train PPL:   1.741
	 Val. Loss: 4.601 |  Val. PPL:  99.616
Epoch: 533 | Time: 5m 40s
	Train Loss: 0.553 | Train PPL:   1.738
	 Val. Loss: 4.622 |  Val. PPL: 101.646
Epoch: 534 | Time: 5m 41s
	Train Loss: 0.551 | Train PPL:   1.735
	 Val. Loss: 4.630 |  Val. PPL: 102.499
Epoch: 535 | Time: 5m 40s
	Train Loss: 0.550 |

[0, 1, 2, 159, 1, 32, 33, 7, 67, 64, 50, 15, 4, 51, 57, 7, 23, 53, 92, 28, 8, 54, 33, 7, 72, 53, 50, 7, 10, 54, 33, 15, 91, 51, 50, 15, 91, 51, 33, 7, 91, 54, 50, 31, 13, 54, 92, 42, 70, 53, 33, 7, 16, 51, 50, 7, 78, 64, 9, 15, 17, 53, 33, 41, 90, 51, 50, 31, 74, 51, 57, 52, 0, 1, 51, 33, 7, 67, 51, 129, 28, 4, 53, 33, 52, 4, 51, 50, 31, 23, 51, 57, 41, 8, 54, 33, 31, 72, 35, 50, 31, 10, 32, 57, 7, 91, 51, 33, 15, 13, 64, 57, 31, 16, 54, 33, 19, 16, 51, 50, 7, 78, 54, 57, 41, 17, 35, 92, 42, 27, 51, 33, 7, 74, 51, 50, 7, 74, 51, 9, 7, 0, 1, 54, 33, 7, 4, 54, 50, 7, 4, 54, 9, 31, 23, 54, 33, 15, 8, 51, 57, 31, 8, 54, 33, 15, 72, 35, 50, 15, 10, 32, 57, 15, 91, 54, 33, 15, 13, 64, 50, 15, 70, 35, 57, 15, 16, 51, 33, 15, 78, 51, 57, 15, 78, 49, 55, 7, 17, 54, 33, 7, 90, 51, 57, 7, 27, 51, 92, 36, 74, 35, 33, 7, 74, 51, 33, 31, 0, 1, 51, 50, 7, 67, 54, 9, 31, 4, 64, 33, 31, 4, 51, 33, 31, 8, 51, 50, 7, 72, 51, 33, 7, 10, 51, 50, 31, 91, 51, 55, 31, 91, 51, 33, 31, 13, 51, 50, 31, 70, 53, 5

[0, 1, 2, 147, 1, 54, 55, 15, 67, 54, 48, 7, 23, 51, 128, 7, 23, 64, 55, 7, 8, 54, 48, 7, 72, 51, 6, 7, 91, 54, 55, 7, 91, 51, 48, 15, 70, 54, 6, 7, 70, 54, 55, 31, 78, 51, 48, 7, 78, 54, 6, 7, 78, 51, 55, 19, 90, 51, 128, 31, 27, 51, 55, 52, 0, 1, 51, 48, 7, 23, 54, 128, 22, 23, 51, 55, 15, 23, 51, 48, 19, 91, 51, 6, 15, 91, 51, 55, 15, 91, 51, 48, 7, 91, 51, 128, 77, 70, 49, 55, 7, 70, 53, 48, 7, 78, 51, 6, 7, 78, 64, 55, 31, 78, 51, 48, 15, 90, 51, 6, 66, 74, 51, 55, 31, 74, 51, 48, 31, 0, 67, 49, 55, 15, 23, 54, 48, 15, 23, 51, 132, 60, 23, 54, 100, 19, 23, 51, 48, 41, 72, 51, 58, 41, 91, 51, 100, 31, 91, 51, 48, 7, 70, 49, 100, 41, 78, 64, 30, 15, 78, 51, 48, 15, 90, 51, 58, 19, 74, 51, 100, 15, 0, 67, 51, 100, 36, 23, 51, 128, 7, 23, 51, 55, 41, 23, 51, 185, 7, 91, 51, 128, 15, 91, 51, 111, 15, 91, 32, 55, 52, 70, 51, 48, 52, 78, 51, 128, 52, 78, 54, 185, 37, 78, 35, 111, 31, 78, 54, 55, 31, 78, 35, 128, 66, 74, 51, 55, 19, 74, 51, 48, 19, 0, 67, 51, 132, 36, 23, 51, 100, 7, 23, 

Epoch: 596 | Time: 5m 41s
	Train Loss: 0.484 | Train PPL:   1.622
	 Val. Loss: 4.925 |  Val. PPL: 137.627
Epoch: 597 | Time: 5m 41s
	Train Loss: 0.483 | Train PPL:   1.621
	 Val. Loss: 4.880 |  Val. PPL: 131.621
Epoch: 598 | Time: 5m 42s
	Train Loss: 0.482 | Train PPL:   1.619
	 Val. Loss: 4.912 |  Val. PPL: 135.846
Epoch: 599 | Time: 5m 42s
	Train Loss: 0.481 | Train PPL:   1.618
	 Val. Loss: 4.926 |  Val. PPL: 137.859
Epoch: 600 | Time: 5m 41s
	Train Loss: 0.480 | Train PPL:   1.617
	 Val. Loss: 4.912 |  Val. PPL: 135.977
=> Saving checkpoint
[0, 1, 2, 130, 67, 32, 127, 22, 4, 32, 199, 77, 23, 71, 132, 41, 23, 38, 127, 41, 8, 53, 132, 41, 72, 38, 127, 7, 10, 11, 122, 41, 91, 45, 132, 15, 70, 38, 127, 15, 78, 38, 93, 7, 78, 32, 100, 31, 17, 11, 46, 19, 90, 11, 199, 37, 74, 32, 132, 19, 0, 67, 38, 127, 19, 67, 38, 100, 31, 4, 32, 127, 15, 23, 32, 122, 7, 8, 35, 199, 28, 72, 32, 127, 31, 10, 64, 127, 31, 91, 32, 122, 34, 13, 49, 132, 31, 70, 123, 132, 31, 78, 49, 127, 19, 78, 38, 127, 1

Epoch: 601 | Time: 5m 42s
	Train Loss: 0.479 | Train PPL:   1.615
	 Val. Loss: 4.993 |  Val. PPL: 147.325
=> Saving checkpoint
Epoch: 602 | Time: 5m 41s
	Train Loss: 0.479 | Train PPL:   1.614
	 Val. Loss: 4.932 |  Val. PPL: 138.604
Epoch: 603 | Time: 5m 41s
	Train Loss: 0.478 | Train PPL:   1.613
	 Val. Loss: 4.959 |  Val. PPL: 142.456
Epoch: 604 | Time: 5m 42s
	Train Loss: 0.477 | Train PPL:   1.611
	 Val. Loss: 4.909 |  Val. PPL: 135.533
Epoch: 605 | Time: 5m 41s
	Train Loss: 0.477 | Train PPL:   1.610
	 Val. Loss: 4.881 |  Val. PPL: 131.782
Epoch: 606 | Time: 5m 42s
	Train Loss: 0.475 | Train PPL:   1.608
	 Val. Loss: 4.923 |  Val. PPL: 137.434
Epoch: 607 | Time: 5m 41s
	Train Loss: 0.474 | Train PPL:   1.607
	 Val. Loss: 4.943 |  Val. PPL: 140.130
Epoch: 608 | Time: 5m 41s
	Train Loss: 0.474 | Train PPL:   1.606
	 Val. Loss: 4.955 |  Val. PPL: 141.946
Epoch: 609 | Time: 5m 41s
	Train Loss: 0.473 | Train PPL:   1.604
	 Val. Loss: 4.999 |  Val. PPL: 148.203
Epoch: 610 | Time: 5m 41s

[0, 1, 2, 147, 1, 54, 33, 15, 67, 54, 57, 15, 4, 53, 50, 7, 23, 49, 9, 7, 8, 54, 21, 7, 72, 53, 50, 22, 10, 54, 57, 15, 91, 51, 30, 47, 91, 53, 50, 7, 91, 54, 57, 15, 13, 54, 21, 7, 70, 53, 50, 7, 16, 51, 6, 15, 78, 54, 57, 15, 78, 54, 50, 7, 78, 64, 30, 66, 90, 51, 6, 7, 27, 51, 30, 41, 74, 51, 48, 15, 0, 1, 54, 6, 7, 23, 51, 57, 15, 8, 51, 30, 7, 72, 64, 6, 7, 10, 64, 33, 31, 91, 51, 55, 22, 91, 51, 57, 15, 13, 64, 57, 15, 70, 64, 33, 22, 16, 51, 57, 22, 78, 51, 21, 15, 78, 51, 57, 15, 90, 64, 21, 15, 27, 51, 33, 15, 27, 64, 57, 7, 74, 54, 33, 7, 74, 51, 33, 15, 0, 1, 54, 92, 42, 67, 53, 33, 31, 4, 54, 62, 15, 23, 51, 9, 52, 23, 54, 21, 7, 72, 49, 29, 28, 72, 64, 21, 7, 10, 51, 50, 15, 91, 35, 33, 7, 91, 51, 9, 15, 13, 54, 33, 15, 70, 54, 50, 15, 16, 51, 57, 15, 78, 64, 21, 7, 17, 35, 111, 28, 90, 51, 33, 31, 27, 51, 6, 31, 74, 54, 57, 15, 74, 35, 33, 7, 0, 67, 64, 57, 52, 4, 54, 33, 15, 23, 51, 57, 7, 23, 51, 33, 7, 8, 35, 111, 66, 72, 54, 33, 31, 72, 54, 55, 19, 91, 51, 33, 52, 91,

[0, 1, 2, 147, 67, 54, 55, 15, 23, 54, 48, 7, 23, 51, 128, 7, 72, 49, 55, 7, 72, 54, 48, 7, 91, 51, 6, 7, 91, 51, 55, 22, 70, 54, 48, 15, 70, 49, 50, 7, 70, 54, 55, 31, 78, 53, 128, 41, 78, 54, 48, 52, 78, 51, 6, 19, 90, 51, 55, 52, 90, 51, 48, 52, 74, 51, 55, 31, 0, 67, 54, 48, 15, 23, 51, 55, 7, 23, 51, 132, 47, 72, 53, 100, 19, 91, 64, 75, 15, 91, 51, 48, 7, 70, 35, 100, 41, 78, 51, 48, 52, 90, 64, 48, 7, 74, 51, 100, 22, 0, 67, 54, 55, 15, 23, 51, 132, 66, 23, 54, 48, 7, 23, 51, 100, 41, 91, 49, 128, 52, 91, 54, 100, 52, 91, 51, 48, 19, 70, 53, 128, 66, 78, 64, 48, 7, 78, 51, 100, 31, 78, 54, 132, 28, 90, 54, 128, 19, 74, 51, 100, 31, 74, 35, 128, 19, 0, 67, 54, 100, 31, 23, 54, 128, 7, 72, 38, 100, 15, 72, 51, 48, 7, 91, 51, 100, 52, 91, 51, 48, 52, 78, 51, 128, 41, 78, 51, 55, 7, 78, 51, 48, 7, 78, 35, 129, 66, 90, 51, 122, 52, 74, 51, 55, 52, 0, 1, 54, 55, 19, 23, 51, 48, 34, 72, 51, 55, 19, 91, 51, 55, 19, 91, 51, 48, 19, 78, 51, 132, 19, 78, 51, 100, 31, 78, 51, 30, 7, 90, 32,

Epoch: 659 | Time: 5m 41s
	Train Loss: 0.431 | Train PPL:   1.539
	 Val. Loss: 5.134 |  Val. PPL: 169.637
Epoch: 660 | Time: 5m 42s
	Train Loss: 0.430 | Train PPL:   1.538
	 Val. Loss: 5.164 |  Val. PPL: 174.776
=> Saving checkpoint
Epoch: 661 | Time: 5m 42s
	Train Loss: 0.430 | Train PPL:   1.538
	 Val. Loss: 5.138 |  Val. PPL: 170.354
=> Saving checkpoint
Epoch: 662 | Time: 5m 42s
	Train Loss: 0.429 | Train PPL:   1.535
	 Val. Loss: 5.149 |  Val. PPL: 172.324
Epoch: 663 | Time: 5m 42s
	Train Loss: 0.428 | Train PPL:   1.534
	 Val. Loss: 5.168 |  Val. PPL: 175.601
Epoch: 664 | Time: 5m 42s
	Train Loss: 0.427 | Train PPL:   1.532
	 Val. Loss: 5.167 |  Val. PPL: 175.435
Epoch: 665 | Time: 5m 42s
	Train Loss: 0.427 | Train PPL:   1.532
	 Val. Loss: 5.143 |  Val. PPL: 171.210
Epoch: 666 | Time: 5m 42s
	Train Loss: 0.426 | Train PPL:   1.531
	 Val. Loss: 5.229 |  Val. PPL: 186.549
Epoch: 667 | Time: 5m 42s
	Train Loss: 0.425 | Train PPL:   1.529
	 Val. Loss: 5.187 |  Val. PPL: 178.865
Epoc

[0, 1, 2, 162, 1, 54, 33, 15, 67, 45, 50, 15, 4, 51, 57, 15, 4, 54, 33, 31, 4, 49, 50, 7, 8, 53, 6, 7, 10, 54, 57, 15, 10, 51, 55, 52, 10, 54, 33, 7, 91, 54, 50, 15, 13, 54, 92, 36, 16, 54, 50, 7, 16, 51, 9, 7, 78, 64, 33, 15, 78, 54, 50, 7, 78, 64, 33, 41, 27, 51, 50, 7, 74, 51, 57, 31, 0, 1, 51, 92, 47, 67, 54, 33, 7, 4, 35, 50, 31, 4, 51, 33, 7, 8, 35, 33, 31, 8, 49, 50, 7, 10, 54, 55, 42, 10, 51, 33, 15, 13, 64, 50, 31, 16, 54, 33, 15, 16, 51, 57, 15, 78, 54, 50, 7, 17, 35, 92, 42, 27, 35, 33, 7, 27, 51, 50, 15, 27, 51, 9, 7, 74, 64, 33, 22, 0, 1, 54, 50, 22, 67, 35, 55, 15, 4, 54, 33, 15, 23, 51, 57, 7, 8, 51, 92, 28, 10, 51, 50, 15, 10, 49, 29, 52, 91, 53, 33, 15, 13, 64, 50, 15, 70, 53, 9, 7, 16, 38, 33, 15, 78, 64, 50, 52, 17, 49, 29, 41, 27, 51, 50, 7, 27, 51, 9, 7, 0, 1, 51, 92, 37, 4, 53, 33, 15, 4, 54, 50, 7, 23, 53, 33, 7, 8, 54, 50, 7, 72, 51, 9, 52, 10, 51, 33, 15, 10, 35, 50, 22, 10, 51, 33, 7, 13, 35, 50, 7, 16, 51, 57, 7, 16, 123, 33, 31, 16, 51, 50, 15, 78, 64, 92, 1

[0, 1, 2, 142, 23, 32, 55, 15, 72, 49, 128, 7, 72, 49, 128, 7, 91, 32, 55, 41, 91, 32, 48, 41, 70, 54, 128, 7, 78, 51, 55, 7, 78, 51, 48, 31, 78, 38, 6, 31, 90, 51, 128, 66, 74, 49, 55, 19, 0, 67, 49, 128, 52, 23, 54, 55, 19, 23, 51, 128, 52, 72, 51, 132, 66, 91, 51, 100, 19, 91, 32, 30, 19, 91, 51, 128, 7, 78, 51, 132, 47, 78, 51, 100, 66, 90, 64, 30, 19, 74, 35, 128, 37, 0, 67, 32, 100, 15, 67, 54, 30, 7, 23, 51, 48, 22, 72, 51, 132, 41, 91, 51, 100, 52, 91, 51, 128, 7, 91, 54, 185, 28, 70, 53, 111, 34, 78, 64, 55, 7, 78, 54, 128, 7, 78, 54, 55, 31, 90, 35, 128, 37, 74, 64, 48, 7, 0, 67, 49, 128, 7, 23, 32, 55, 7, 23, 51, 48, 31, 91, 51, 128, 15, 91, 51, 55, 52, 70, 53, 129, 42, 78, 64, 55, 7, 78, 64, 122, 19, 78, 51, 55, 22, 90, 51, 46, 52, 74, 51, 122, 31, 0, 67, 63, 129, 22, 67, 51, 55, 15, 23, 51, 55, 19, 23, 51, 48, 41, 72, 51, 129, 34, 91, 51, 122, 52, 91, 51, 55, 19, 70, 51, 33, 41, 78, 64, 55, 19, 78, 51, 50, 19, 78, 51, 55, 19, 78, 51, 185, 19, 90, 51, 128, 19, 74, 51, 55, 5

Epoch: 724 | Time: 5m 42s
	Train Loss: 0.387 | Train PPL:   1.473
	 Val. Loss: 5.376 |  Val. PPL: 216.165
Epoch: 725 | Time: 5m 42s
	Train Loss: 0.386 | Train PPL:   1.472
	 Val. Loss: 5.346 |  Val. PPL: 209.712
[0, 1, 2, 130, 67, 32, 127, 22, 4, 32, 104, 31, 23, 11, 55, 7, 23, 38, 179, 15, 23, 32, 127, 15, 8, 38, 199, 28, 8, 38, 127, 41, 72, 38, 122, 52, 10, 38, 100, 31, 91, 38, 127, 7, 13, 79, 100, 7, 70, 38, 46, 41, 16, 32, 199, 19, 78, 64, 127, 15, 78, 32, 199, 28, 90, 38, 127, 41, 74, 11, 100, 41, 0, 67, 64, 127, 15, 67, 11, 122, 15, 23, 64, 100, 31, 23, 35, 185, 66, 23, 64, 128, 52, 8, 54, 129, 31, 72, 32, 128, 52, 10, 32, 128, 19, 91, 38, 104, 7, 13, 32, 129, 15, 70, 32, 185, 37, 78, 54, 129, 52, 78, 54, 128, 7, 17, 38, 55, 41, 90, 32, 128, 15, 27, 38, 128, 7, 74, 32, 104, 7, 0, 67, 38, 127, 22, 23, 53, 199, 22, 23, 11, 93, 41, 8, 32, 100, 41, 72, 32, 127, 15, 10, 11, 93, 15, 91, 32, 199, 36, 91, 38, 127, 7, 13, 53, 132, 66, 70, 38, 127, 15, 78, 38, 127, 52, 78, 38, 93, 15, 17, 

Epoch: 726 | Time: 5m 42s
	Train Loss: 0.386 | Train PPL:   1.470
	 Val. Loss: 5.357 |  Val. PPL: 212.032
Epoch: 727 | Time: 5m 41s
	Train Loss: 0.385 | Train PPL:   1.470
	 Val. Loss: 5.340 |  Val. PPL: 208.511
Epoch: 728 | Time: 5m 42s
	Train Loss: 0.385 | Train PPL:   1.469
	 Val. Loss: 5.360 |  Val. PPL: 212.800
Epoch: 729 | Time: 5m 41s
	Train Loss: 0.384 | Train PPL:   1.469
	 Val. Loss: 5.355 |  Val. PPL: 211.637
Epoch: 730 | Time: 5m 41s
	Train Loss: 0.383 | Train PPL:   1.466
	 Val. Loss: 5.371 |  Val. PPL: 215.022
Epoch: 731 | Time: 5m 41s
	Train Loss: 0.383 | Train PPL:   1.467
	 Val. Loss: 5.355 |  Val. PPL: 211.652
Epoch: 732 | Time: 5m 41s
	Train Loss: 0.382 | Train PPL:   1.465
	 Val. Loss: 5.368 |  Val. PPL: 214.527
Epoch: 733 | Time: 5m 41s
	Train Loss: 0.382 | Train PPL:   1.465
	 Val. Loss: 5.376 |  Val. PPL: 216.114
Epoch: 734 | Time: 5m 41s
	Train Loss: 0.381 | Train PPL:   1.464
	 Val. Loss: 5.356 |  Val. PPL: 211.922
Epoch: 735 | Time: 5m 41s
	Train Loss: 0.381 |

[0, 1, 2, 162, 1, 54, 33, 15, 67, 54, 50, 15, 4, 51, 57, 15, 23, 54, 50, 31, 8, 53, 57, 7, 72, 53, 50, 7, 10, 54, 57, 15, 91, 63, 55, 52, 13, 53, 33, 7, 70, 49, 50, 15, 16, 54, 57, 7, 78, 54, 57, 7, 17, 64, 50, 7, 90, 53, 57, 15, 27, 54, 50, 15, 74, 35, 30, 12, 15, 0, 1, 54, 50, 15, 67, 53, 57, 15, 4, 53, 30, 47, 23, 51, 6, 52, 8, 54, 48, 7, 10, 53, 57, 15, 91, 51, 6, 7, 91, 51, 30, 66, 70, 54, 50, 7, 16, 54, 57, 7, 78, 54, 50, 15, 17, 51, 57, 7, 90, 54, 96, 7, 27, 54, 50, 7, 74, 35, 30, 47, 0, 1, 54, 50, 7, 67, 51, 9, 15, 4, 49, 30, 66, 23, 35, 50, 15, 8, 51, 50, 7, 72, 51, 30, 41, 10, 51, 6, 15, 13, 49, 50, 7, 13, 35, 30, 15, 13, 49, 111, 47, 13, 54, 50, 7, 16, 35, 55, 66, 78, 64, 57, 19, 78, 51, 33, 7, 17, 35, 50, 52, 27, 32, 57, 7, 0, 1, 32, 55, 15, 1, 51, 33, 7, 1, 51, 50, 15, 67, 51, 57, 15, 4, 49, 33, 15, 23, 35, 50, 7, 8, 63, 6, 7, 72, 64, 57, 31, 10, 35, 55, 15, 13, 51, 57, 7, 70, 51, 57, 15, 16, 51, 92, 36, 78, 54, 50, 52, 78, 51, 9, 7, 17, 35, 33, 41, 90, 35, 50, 7, 27, 51, 

[0, 1, 2, 147, 67, 54, 55, 15, 23, 51, 185, 7, 23, 51, 128, 7, 23, 64, 55, 7, 72, 53, 128, 31, 72, 54, 48, 7, 91, 51, 55, 22, 91, 49, 128, 7, 91, 54, 48, 7, 70, 54, 129, 66, 78, 54, 55, 7, 78, 54, 48, 52, 78, 64, 55, 15, 90, 51, 122, 52, 74, 51, 55, 7, 0, 67, 51, 55, 7, 23, 54, 132, 77, 23, 51, 100, 15, 72, 35, 128, 19, 91, 51, 100, 15, 91, 64, 48, 15, 91, 51, 128, 41, 91, 51, 100, 7, 78, 51, 48, 7, 90, 51, 132, 47, 74, 51, 100, 22, 74, 54, 128, 52, 74, 51, 100, 15, 0, 67, 32, 48, 15, 23, 51, 100, 15, 72, 35, 128, 19, 91, 54, 55, 15, 91, 51, 185, 47, 91, 51, 128, 15, 70, 54, 55, 15, 78, 54, 48, 19, 78, 51, 132, 15, 78, 51, 128, 15, 78, 51, 200, 102, 90, 51, 100, 19, 74, 54, 48, 52, 0, 67, 54, 100, 15, 23, 51, 100, 15, 23, 51, 30, 7, 23, 51, 48, 52, 72, 35, 128, 7, 91, 51, 100, 7, 91, 51, 185, 19, 91, 51, 48, 7, 78, 35, 128, 15, 78, 51, 55, 52, 78, 51, 48, 52, 90, 51, 48, 52, 74, 54, 178, 37, 74, 35, 100, 7, 0, 67, 49, 122, 19, 23, 51, 100, 19, 23, 51, 149, 19, 72, 51, 100, 19, 91, 51, 

Epoch: 793 | Time: 5m 42s
	Train Loss: 0.349 | Train PPL:   1.417
	 Val. Loss: 5.534 |  Val. PPL: 253.115
Epoch: 794 | Time: 5m 42s
	Train Loss: 0.348 | Train PPL:   1.416
	 Val. Loss: 5.588 |  Val. PPL: 267.077
Epoch: 795 | Time: 5m 42s
	Train Loss: 0.348 | Train PPL:   1.416
	 Val. Loss: 5.563 |  Val. PPL: 260.568
Epoch: 796 | Time: 5m 42s
	Train Loss: 0.347 | Train PPL:   1.415
	 Val. Loss: 5.588 |  Val. PPL: 267.228
Epoch: 797 | Time: 5m 43s
	Train Loss: 0.346 | Train PPL:   1.414
	 Val. Loss: 5.575 |  Val. PPL: 263.866
Epoch: 798 | Time: 5m 42s
	Train Loss: 0.346 | Train PPL:   1.414
	 Val. Loss: 5.580 |  Val. PPL: 265.195
Epoch: 799 | Time: 5m 43s
	Train Loss: 0.345 | Train PPL:   1.412
	 Val. Loss: 5.523 |  Val. PPL: 250.441
Epoch: 800 | Time: 5m 42s
	Train Loss: 0.345 | Train PPL:   1.413
	 Val. Loss: 5.558 |  Val. PPL: 259.258
=> Saving checkpoint
[0, 1, 2, 130, 67, 32, 127, 22, 4, 32, 104, 31, 23, 38, 55, 7, 23, 38, 199, 52, 8, 2, 130, 72, 38, 127, 41, 10, 38, 122, 7, 91, 65,

[0, 1, 2, 162, 1, 54, 50, 15, 67, 54, 57, 15, 4, 51, 21, 15, 23, 54, 50, 31, 8, 54, 57, 7, 8, 53, 50, 7, 10, 54, 57, 15, 91, 63, 30, 52, 13, 54, 50, 7, 70, 53, 9, 15, 16, 51, 149, 36, 78, 54, 6, 7, 17, 54, 12, 7, 90, 53, 30, 31, 27, 54, 6, 7, 74, 35, 30, 31, 0, 1, 54, 6, 7, 67, 51, 48, 15, 4, 35, 6, 31, 4, 54, 30, 7, 23, 35, 6, 15, 8, 51, 57, 7, 8, 35, 30, 31, 10, 64, 50, 7, 91, 51, 30, 47, 70, 51, 50, 15, 16, 64, 9, 15, 78, 54, 50, 15, 17, 51, 57, 15, 90, 51, 57, 15, 27, 51, 21, 15, 0, 1, 35, 33, 7, 67, 51, 50, 15, 4, 64, 149, 37, 23, 35, 30, 41, 8, 51, 48, 7, 10, 35, 111, 42, 91, 51, 30, 15, 13, 54, 6, 15, 70, 51, 12, 19, 16, 35, 30, 15, 17, 53, 48, 15, 17, 54, 6, 15, 27, 32, 12, 7, 0, 1, 35, 6, 7, 67, 64, 12, 31, 4, 54, 33, 52, 23, 35, 57, 31, 8, 54, 50, 7, 8, 64, 149, 66, 10, 54, 6, 52, 10, 54, 12, 15, 91, 51, 12, 7, 13, 32, 30, 52, 70, 54, 50, 15, 16, 54, 12, 15, 16, 51, 30, 15, 16, 35, 48, 22, 78, 51, 12, 52, 17, 54, 6, 31, 17, 51, 12, 31, 90, 54, 30, 52, 27, 51, 50, 7, 74, 51, 1

[0, 1, 2, 142, 67, 49, 100, 15, 67, 51, 48, 15, 23, 54, 128, 7, 23, 32, 55, 7, 8, 51, 48, 15, 72, 54, 48, 7, 91, 51, 55, 22, 72, 35, 128, 47, 91, 64, 48, 15, 70, 49, 128, 7, 78, 54, 55, 19, 78, 49, 48, 19, 78, 64, 6, 52, 90, 35, 128, 19, 74, 35, 55, 52, 0, 67, 51, 48, 7, 23, 54, 128, 7, 23, 32, 55, 7, 72, 53, 48, 19, 91, 51, 6, 52, 70, 54, 132, 37, 78, 64, 100, 19, 78, 35, 30, 19, 78, 51, 48, 52, 90, 54, 128, 47, 74, 51, 100, 7, 0, 67, 54, 100, 15, 67, 51, 128, 7, 23, 54, 48, 15, 23, 51, 48, 7, 72, 35, 55, 15, 91, 53, 48, 52, 91, 51, 6, 52, 70, 53, 128, 66, 78, 64, 48, 31, 78, 51, 6, 31, 90, 54, 132, 37, 74, 54, 100, 19, 0, 67, 49, 128, 15, 67, 51, 48, 52, 23, 49, 100, 19, 23, 32, 132, 19, 72, 49, 128, 41, 72, 51, 100, 19, 91, 51, 30, 52, 13, 35, 100, 7, 78, 54, 48, 52, 78, 64, 132, 42, 78, 51, 100, 15, 78, 63, 128, 66, 90, 53, 30, 52, 74, 51, 48, 52, 0, 1, 54, 100, 15, 67, 49, 48, 7, 23, 51, 132, 68, 8, 54, 100, 19, 72, 54, 48, 19, 91, 35, 100, 15, 91, 54, 100, 15, 13, 51, 30, 7, 70, 

Epoch: 840 | Time: 5m 42s
	Train Loss: 0.325 | Train PPL:   1.384
	 Val. Loss: 5.721 |  Val. PPL: 305.084
=> Saving checkpoint
Epoch: 841 | Time: 5m 42s
	Train Loss: 0.325 | Train PPL:   1.384
	 Val. Loss: 5.649 |  Val. PPL: 284.112
=> Saving checkpoint
Epoch: 842 | Time: 5m 42s
	Train Loss: 0.324 | Train PPL:   1.383
	 Val. Loss: 5.663 |  Val. PPL: 288.016
Epoch: 843 | Time: 5m 43s
	Train Loss: 0.324 | Train PPL:   1.383
	 Val. Loss: 5.690 |  Val. PPL: 295.929
Epoch: 844 | Time: 5m 42s
	Train Loss: 0.323 | Train PPL:   1.382
	 Val. Loss: 5.702 |  Val. PPL: 299.371
Epoch: 845 | Time: 5m 42s
	Train Loss: 0.324 | Train PPL:   1.383
	 Val. Loss: 5.710 |  Val. PPL: 301.826
Epoch: 846 | Time: 5m 43s
	Train Loss: 0.323 | Train PPL:   1.381
	 Val. Loss: 5.707 |  Val. PPL: 300.936
Epoch: 847 | Time: 5m 42s
	Train Loss: 0.323 | Train PPL:   1.381
	 Val. Loss: 5.694 |  Val. PPL: 297.079
Epoch: 848 | Time: 5m 43s
	Train Loss: 0.322 | Train PPL:   1.380
	 Val. Loss: 5.702 |  Val. PPL: 299.386
Epoc

[0, 1, 2, 162, 1, 54, 30, 15, 67, 54, 50, 31, 4, 54, 57, 15, 23, 54, 50, 31, 8, 53, 57, 7, 8, 53, 50, 7, 72, 54, 57, 15, 10, 51, 30, 7, 91, 54, 50, 7, 91, 54, 57, 15, 13, 64, 128, 47, 70, 53, 6, 7, 16, 51, 57, 7, 78, 54, 48, 15, 78, 54, 6, 15, 78, 54, 57, 15, 17, 64, 21, 31, 90, 35, 6, 15, 27, 51, 57, 7, 74, 54, 33, 22, 74, 35, 50, 7, 0, 67, 53, 55, 15, 4, 35, 33, 7, 4, 35, 50, 7, 23, 51, 57, 7, 23, 51, 33, 15, 8, 54, 50, 15, 72, 51, 57, 15, 10, 54, 57, 15, 91, 54, 21, 15, 91, 51, 96, 15, 13, 35, 50, 15, 70, 51, 57, 15, 16, 51, 96, 7, 78, 64, 50, 22, 78, 64, 57, 15, 90, 51, 33, 15, 27, 35, 50, 15, 74, 54, 57, 15, 0, 67, 51, 92, 28, 4, 51, 21, 7, 23, 54, 33, 7, 23, 35, 62, 31, 72, 54, 9, 7, 10, 51, 33, 15, 91, 35, 50, 15, 13, 54, 9, 52, 70, 35, 33, 15, 16, 51, 9, 7, 78, 54, 50, 7, 17, 54, 9, 15, 17, 35, 33, 7, 27, 51, 50, 15, 27, 51, 57, 15, 74, 35, 33, 15, 0, 67, 54, 57, 15, 4, 54, 33, 15, 4, 35, 50, 7, 23, 51, 57, 7, 8, 54, 57, 7, 72, 51, 149, 66, 10, 54, 30, 52, 91, 35, 48, 52, 70, 5

[0, 1, 2, 147, 67, 54, 55, 15, 23, 51, 132, 7, 23, 54, 128, 7, 23, 64, 100, 7, 8, 51, 30, 7, 72, 53, 48, 7, 91, 51, 100, 22, 91, 64, 48, 15, 70, 54, 132, 37, 70, 54, 128, 31, 78, 54, 100, 31, 78, 54, 48, 52, 78, 64, 48, 19, 90, 51, 128, 19, 74, 51, 100, 52, 0, 67, 51, 100, 52, 23, 54, 128, 15, 72, 51, 100, 7, 72, 51, 132, 28, 72, 51, 100, 31, 91, 64, 48, 15, 91, 51, 100, 7, 70, 49, 100, 7, 78, 51, 48, 52, 90, 51, 132, 47, 90, 51, 100, 7, 74, 54, 100, 31, 0, 67, 54, 128, 15, 23, 54, 100, 15, 23, 51, 48, 19, 72, 35, 128, 52, 72, 54, 100, 52, 91, 54, 48, 52, 70, 51, 100, 15, 78, 64, 48, 15, 78, 51, 132, 66, 90, 54, 100, 19, 74, 51, 128, 31, 0, 67, 51, 100, 15, 67, 51, 30, 7, 23, 32, 132, 42, 23, 54, 100, 15, 72, 49, 128, 19, 72, 51, 100, 15, 91, 35, 30, 52, 13, 35, 100, 31, 70, 51, 48, 7, 16, 54, 100, 15, 78, 51, 48, 15, 78, 63, 128, 15, 78, 51, 100, 52, 90, 51, 48, 52, 74, 51, 48, 52, 0, 67, 51, 100, 7, 23, 51, 48, 41, 23, 35, 100, 19, 23, 35, 48, 19, 72, 51, 129, 31, 91, 51, 55, 19, 70,

Epoch: 885 | Time: 5m 42s
	Train Loss: 0.306 | Train PPL:   1.357
	 Val. Loss: 5.765 |  Val. PPL: 319.003
Epoch: 886 | Time: 5m 43s
	Train Loss: 0.305 | Train PPL:   1.357
	 Val. Loss: 5.794 |  Val. PPL: 328.393
Epoch: 887 | Time: 5m 42s
	Train Loss: 0.304 | Train PPL:   1.356
	 Val. Loss: 5.819 |  Val. PPL: 336.633
Epoch: 888 | Time: 5m 43s
	Train Loss: 0.306 | Train PPL:   1.357
	 Val. Loss: 5.790 |  Val. PPL: 326.930
Epoch: 889 | Time: 5m 43s
	Train Loss: 0.304 | Train PPL:   1.355
	 Val. Loss: 5.802 |  Val. PPL: 331.004
Epoch: 890 | Time: 5m 43s
	Train Loss: 0.304 | Train PPL:   1.355
	 Val. Loss: 5.776 |  Val. PPL: 322.520
Epoch: 891 | Time: 5m 42s
	Train Loss: 0.303 | Train PPL:   1.354
	 Val. Loss: 5.853 |  Val. PPL: 348.406
Epoch: 892 | Time: 5m 43s
	Train Loss: 0.303 | Train PPL:   1.354
	 Val. Loss: 5.899 |  Val. PPL: 364.736
Epoch: 893 | Time: 5m 43s
	Train Loss: 0.303 | Train PPL:   1.354
	 Val. Loss: 5.788 |  Val. PPL: 326.439
Epoch: 894 | Time: 5m 44s
	Train Loss: 0.302 |

[0, 1, 2, 162, 1, 54, 30, 15, 67, 54, 50, 31, 4, 54, 57, 31, 23, 54, 50, 31, 8, 54, 57, 7, 8, 53, 50, 7, 72, 54, 57, 15, 10, 51, 30, 52, 10, 54, 50, 7, 91, 54, 57, 15, 13, 64, 128, 47, 70, 53, 6, 7, 16, 51, 57, 7, 78, 54, 48, 15, 17, 53, 55, 52, 90, 35, 6, 31, 27, 51, 57, 31, 74, 51, 149, 137, 74, 35, 30, 37, 0, 1, 35, 48, 7, 67, 54, 6, 15, 4, 54, 48, 7, 23, 35, 30, 7, 8, 51, 48, 7, 72, 51, 6, 7, 10, 51, 30, 15, 91, 64, 6, 15, 13, 54, 48, 15, 70, 54, 57, 15, 16, 64, 12, 15, 78, 51, 96, 15, 17, 64, 96, 15, 90, 51, 50, 15, 27, 51, 57, 7, 27, 64, 50, 22, 74, 51, 55, 7, 0, 1, 64, 55, 15, 1, 64, 33, 15, 1, 54, 57, 15, 67, 51, 92, 28, 4, 51, 50, 52, 23, 51, 29, 52, 72, 64, 21, 7, 10, 54, 50, 15, 10, 51, 29, 15, 91, 51, 92, 66, 91, 51, 33, 15, 13, 32, 57, 31, 16, 51, 21, 7, 78, 64, 50, 7, 17, 54, 9, 19, 27, 51, 33, 7, 27, 51, 50, 15, 0, 1, 51, 149, 36, 1, 54, 30, 7, 67, 64, 6, 52, 4, 51, 30, 15, 23, 51, 6, 7, 8, 51, 30, 15, 72, 51, 6, 31, 10, 54, 30, 15, 91, 32, 6, 31, 13, 51, 149, 36, 70, 51

[0, 1, 2, 147, 67, 54, 55, 15, 23, 51, 132, 7, 23, 54, 128, 7, 23, 64, 100, 7, 72, 53, 30, 19, 72, 53, 48, 7, 91, 54, 100, 22, 91, 49, 128, 52, 91, 54, 132, 47, 91, 54, 128, 31, 91, 49, 100, 7, 91, 49, 48, 52, 70, 51, 48, 7, 78, 51, 128, 19, 78, 54, 100, 7, 78, 54, 30, 52, 90, 51, 48, 52, 74, 53, 100, 15, 74, 51, 30, 52, 74, 51, 128, 15, 0, 67, 45, 132, 77, 23, 51, 128, 19, 23, 54, 100, 19, 23, 35, 30, 52, 72, 53, 128, 7, 91, 51, 100, 22, 91, 54, 30, 22, 91, 51, 48, 15, 78, 51, 132, 37, 78, 54, 100, 31, 78, 51, 48, 19, 90, 35, 100, 7, 74, 35, 30, 7, 74, 51, 48, 31, 74, 54, 100, 22, 74, 54, 128, 31, 0, 67, 49, 129, 15, 23, 54, 122, 15, 23, 51, 55, 52, 72, 35, 122, 31, 91, 54, 132, 47, 91, 51, 100, 52, 91, 54, 30, 19, 78, 54, 128, 15, 78, 54, 100, 15, 78, 54, 132, 19, 74, 51, 100, 15, 74, 51, 30, 7, 0, 67, 51, 132, 66, 23, 35, 100, 15, 23, 54, 30, 52, 72, 51, 100, 22, 91, 51, 30, 15, 91, 51, 132, 37, 91, 54, 100, 31, 91, 54, 30, 52, 91, 51, 100, 15, 78, 54, 100, 52, 78, 64, 48, 7, 78, 64

Epoch: 933 | Time: 5m 42s
	Train Loss: 0.286 | Train PPL:   1.331
	 Val. Loss: 5.953 |  Val. PPL: 384.937
Epoch: 934 | Time: 5m 43s
	Train Loss: 0.286 | Train PPL:   1.332
	 Val. Loss: 5.871 |  Val. PPL: 354.772
Epoch: 935 | Time: 5m 43s
	Train Loss: 0.286 | Train PPL:   1.332
	 Val. Loss: 5.929 |  Val. PPL: 375.822
Epoch: 936 | Time: 5m 42s
	Train Loss: 0.286 | Train PPL:   1.331
	 Val. Loss: 5.931 |  Val. PPL: 376.472
Epoch: 937 | Time: 5m 43s
	Train Loss: 0.286 | Train PPL:   1.331
	 Val. Loss: 5.948 |  Val. PPL: 382.949
Epoch: 938 | Time: 5m 43s
	Train Loss: 0.285 | Train PPL:   1.330
	 Val. Loss: 5.897 |  Val. PPL: 364.090
Epoch: 939 | Time: 5m 42s
	Train Loss: 0.285 | Train PPL:   1.330
	 Val. Loss: 5.925 |  Val. PPL: 374.296
Epoch: 940 | Time: 5m 43s
	Train Loss: 0.284 | Train PPL:   1.329
	 Val. Loss: 5.960 |  Val. PPL: 387.625
=> Saving checkpoint
Epoch: 941 | Time: 5m 43s
	Train Loss: 0.284 | Train PPL:   1.329
	 Val. Loss: 5.912 |  Val. PPL: 369.556
=> Saving checkpoint
Epoc

[0, 1, 2, 162, 1, 54, 30, 31, 67, 54, 50, 15, 4, 54, 57, 15, 23, 54, 12, 31, 8, 54, 96, 7, 8, 51, 50, 7, 72, 54, 57, 15, 10, 51, 30, 52, 10, 54, 50, 7, 91, 54, 57, 31, 91, 51, 128, 47, 70, 53, 48, 7, 16, 51, 57, 7, 78, 54, 6, 15, 78, 54, 48, 7, 78, 54, 57, 15, 17, 64, 86, 15, 90, 51, 149, 137, 27, 51, 30, 37, 74, 51, 48, 7, 74, 35, 6, 15, 0, 1, 51, 30, 7, 67, 64, 48, 7, 4, 35, 30, 52, 23, 51, 50, 7, 8, 54, 30, 15, 8, 54, 48, 15, 10, 51, 6, 15, 10, 51, 12, 15, 91, 54, 48, 15, 91, 51, 12, 15, 13, 35, 6, 15, 70, 51, 149, 28, 70, 51, 30, 19, 16, 64, 48, 7, 78, 54, 30, 7, 78, 51, 48, 15, 17, 51, 6, 31, 90, 54, 30, 31, 27, 51, 50, 31, 74, 51, 6, 15, 0, 67, 64, 30, 22, 67, 51, 12, 15, 4, 64, 50, 31, 23, 51, 30, 15, 23, 51, 50, 7, 72, 54, 30, 15, 10, 51, 129, 96, 7, 10, 54, 50, 7, 91, 51, 122, 66, 91, 51, 55, 41, 91, 51, 50, 52, 13, 49, 55, 15, 70, 51, 33, 7, 16, 35, 50, 7, 78, 51, 57, 7, 17, 54, 55, 15, 90, 51, 129, 36, 27, 51, 33, 7, 74, 54, 50, 7, 74, 51, 57, 31, 0, 1, 51, 55, 28, 67, 51, 3

[0, 1, 2, 147, 67, 54, 55, 15, 23, 51, 132, 7, 23, 54, 128, 7, 23, 54, 100, 7, 72, 53, 128, 19, 72, 49, 100, 7, 91, 54, 55, 22, 91, 49, 128, 7, 91, 54, 132, 7, 70, 53, 128, 31, 70, 53, 100, 7, 78, 49, 48, 7, 78, 64, 30, 19, 90, 51, 128, 31, 74, 53, 100, 22, 0, 67, 51, 100, 7, 23, 54, 132, 22, 23, 51, 128, 7, 23, 51, 100, 7, 72, 51, 30, 15, 91, 54, 48, 15, 91, 54, 128, 7, 91, 54, 100, 7, 70, 49, 30, 7, 78, 53, 128, 22, 78, 51, 100, 7, 90, 54, 132, 31, 74, 51, 100, 15, 74, 51, 48, 15, 0, 1, 54, 100, 15, 67, 63, 128, 15, 23, 54, 100, 15, 23, 54, 48, 7, 72, 53, 128, 19, 91, 51, 100, 7, 91, 53, 55, 31, 70, 35, 128, 15, 78, 54, 132, 7, 78, 54, 100, 31, 78, 51, 48, 52, 74, 54, 199, 59, 74, 54, 100, 19, 0, 67, 54, 100, 15, 23, 51, 46, 7, 23, 51, 100, 52, 72, 35, 122, 7, 91, 51, 100, 7, 91, 54, 46, 7, 78, 54, 100, 15, 78, 63, 132, 19, 90, 53, 48, 52, 74, 64, 100, 52, 0, 1, 54, 100, 15, 67, 49, 185, 19, 23, 51, 55, 7, 23, 35, 128, 15, 23, 53, 55, 15, 72, 53, 48, 19, 91, 45, 128, 19, 91, 54, 55, 

Epoch: 991 | Time: 5m 43s
	Train Loss: 0.265 | Train PPL:   1.304
	 Val. Loss: 6.067 |  Val. PPL: 431.204
Epoch: 992 | Time: 5m 42s
	Train Loss: 0.266 | Train PPL:   1.304
	 Val. Loss: 6.067 |  Val. PPL: 431.461
Epoch: 993 | Time: 5m 42s
	Train Loss: 0.265 | Train PPL:   1.303
	 Val. Loss: 6.078 |  Val. PPL: 436.118
Epoch: 994 | Time: 5m 42s
	Train Loss: 0.265 | Train PPL:   1.304
	 Val. Loss: 6.095 |  Val. PPL: 443.637
Epoch: 995 | Time: 5m 43s
	Train Loss: 0.265 | Train PPL:   1.303
	 Val. Loss: 6.069 |  Val. PPL: 432.178
Epoch: 996 | Time: 5m 43s
	Train Loss: 0.264 | Train PPL:   1.302
	 Val. Loss: 6.088 |  Val. PPL: 440.596
Epoch: 997 | Time: 5m 43s
	Train Loss: 0.264 | Train PPL:   1.302
	 Val. Loss: 6.054 |  Val. PPL: 425.994
Epoch: 998 | Time: 5m 43s
	Train Loss: 0.263 | Train PPL:   1.301
	 Val. Loss: 6.084 |  Val. PPL: 438.688
Epoch: 999 | Time: 5m 42s
	Train Loss: 0.263 | Train PPL:   1.300
	 Val. Loss: 6.144 |  Val. PPL: 466.028
Epoch: 1000 | Time: 5m 42s
	Train Loss: 0.263 

Epoch: 1001 | Time: 5m 43s
	Train Loss: 0.262 | Train PPL:   1.300
	 Val. Loss: 6.100 |  Val. PPL: 446.001
=> Saving checkpoint
Epoch: 1002 | Time: 5m 43s
	Train Loss: 0.263 | Train PPL:   1.301
	 Val. Loss: 6.063 |  Val. PPL: 429.699
Epoch: 1003 | Time: 5m 43s
	Train Loss: 0.262 | Train PPL:   1.299
	 Val. Loss: 6.084 |  Val. PPL: 438.945
Epoch: 1004 | Time: 5m 43s
	Train Loss: 0.261 | Train PPL:   1.298
	 Val. Loss: 6.114 |  Val. PPL: 452.275
Epoch: 1005 | Time: 5m 43s
	Train Loss: 0.261 | Train PPL:   1.298
	 Val. Loss: 6.097 |  Val. PPL: 444.510
Epoch: 1006 | Time: 5m 43s
	Train Loss: 0.261 | Train PPL:   1.298
	 Val. Loss: 6.070 |  Val. PPL: 432.673
Epoch: 1007 | Time: 5m 43s
	Train Loss: 0.261 | Train PPL:   1.298
	 Val. Loss: 6.049 |  Val. PPL: 423.618
Epoch: 1008 | Time: 5m 42s
	Train Loss: 0.260 | Train PPL:   1.297
	 Val. Loss: 6.060 |  Val. PPL: 428.398
Epoch: 1009 | Time: 5m 44s
	Train Loss: 0.260 | Train PPL:   1.296
	 Val. Loss: 6.086 |  Val. PPL: 439.495
Epoch: 1010 | Ti

[0, 1, 2, 162, 1, 54, 30, 31, 67, 54, 50, 15, 4, 64, 57, 15, 23, 54, 50, 31, 8, 54, 57, 7, 8, 51, 128, 47, 72, 53, 30, 15, 10, 38, 6, 15, 91, 54, 57, 15, 70, 54, 96, 15, 16, 51, 128, 36, 78, 53, 48, 7, 17, 35, 6, 7, 90, 53, 6, 31, 27, 54, 55, 52, 74, 51, 6, 7, 0, 1, 54, 48, 31, 67, 51, 6, 31, 4, 53, 55, 28, 8, 53, 33, 7, 72, 35, 6, 15, 10, 53, 57, 7, 10, 51, 33, 7, 91, 54, 6, 7, 13, 38, 55, 42, 70, 35, 33, 15, 16, 64, 50, 15, 78, 54, 6, 15, 17, 51, 57, 15, 90, 51, 21, 15, 27, 51, 57, 15, 74, 64, 0, 1, 51, 128, 66, 67, 51, 6, 31, 4, 35, 57, 7, 23, 51, 48, 15, 8, 53, 6, 7, 8, 32, 21, 7, 72, 53, 6, 15, 10, 54, 57, 15, 10, 35, 57, 7, 13, 38, 55, 66, 70, 53, 50, 15, 16, 35, 57, 15, 78, 35, 50, 22, 17, 32, 21, 52, 27, 38, 57, 15, 27, 51, 50, 19, 74, 32, 57, 15, 0, 1, 54, 50, 7, 67, 64, 55, 47, 67, 35, 33, 7, 67, 54, 50, 31, 4, 35, 6, 7, 8, 51, 57, 7, 10, 64, 50, 31, 10, 51, 57, 15, 13, 32, 21, 15, 16, 54, 33, 7, 78, 35, 50, 15, 17, 64, 55, 7, 27, 51, 33, 7, 27, 64, 55, 15, 74, 51, 50, 15, 0,

[0, 1, 2, 147, 67, 54, 55, 15, 23, 51, 185, 77, 23, 54, 128, 7, 23, 64, 55, 7, 72, 53, 128, 47, 91, 49, 55, 7, 91, 51, 55, 22, 70, 49, 48, 15, 78, 49, 128, 7, 78, 54, 128, 19, 78, 54, 55, 19, 78, 49, 48, 52, 90, 64, 55, 52, 74, 51, 128, 7, 0, 67, 11, 55, 15, 23, 51, 132, 59, 23, 54, 128, 31, 72, 49, 100, 19, 91, 54, 128, 7, 91, 64, 100, 31, 70, 54, 128, 31, 78, 64, 100, 7, 78, 54, 30, 52, 90, 64, 48, 52, 74, 51, 132, 47, 74, 51, 128, 52, 0, 67, 53, 100, 15, 23, 54, 128, 7, 23, 54, 100, 15, 72, 51, 30, 15, 72, 53, 128, 19, 91, 54, 100, 15, 91, 54, 48, 19, 78, 51, 132, 94, 78, 54, 100, 7, 78, 53, 30, 31, 90, 54, 48, 52, 74, 35, 128, 31, 0, 67, 49, 132, 59, 67, 54, 100, 52, 23, 49, 128, 19, 23, 54, 100, 15, 72, 49, 128, 15, 91, 51, 48, 7, 70, 53, 100, 52, 78, 35, 128, 31, 78, 54, 129, 68, 78, 54, 55, 19, 78, 51, 48, 7, 78, 63, 129, 56, 90, 51, 122, 52, 74, 51, 55, 52, 0, 67, 49, 122, 15, 23, 54, 48, 7, 23, 51, 55, 52, 72, 53, 55, 15, 91, 49, 48, 7, 91, 49, 129, 19, 91, 54, 122, 19, 78, 54

Epoch: 1063 | Time: 5m 43s
	Train Loss: 0.244 | Train PPL:   1.276
	 Val. Loss: 6.193 |  Val. PPL: 489.546
Epoch: 1064 | Time: 5m 43s
	Train Loss: 0.243 | Train PPL:   1.275
	 Val. Loss: 6.204 |  Val. PPL: 494.560
Epoch: 1065 | Time: 5m 43s
	Train Loss: 0.242 | Train PPL:   1.274
	 Val. Loss: 6.241 |  Val. PPL: 513.360
Epoch: 1066 | Time: 5m 42s
	Train Loss: 0.242 | Train PPL:   1.274
	 Val. Loss: 6.250 |  Val. PPL: 517.802
Epoch: 1067 | Time: 5m 43s
	Train Loss: 0.241 | Train PPL:   1.273
	 Val. Loss: 6.215 |  Val. PPL: 500.223
Epoch: 1068 | Time: 5m 43s
	Train Loss: 0.242 | Train PPL:   1.273
	 Val. Loss: 6.249 |  Val. PPL: 517.391
Epoch: 1069 | Time: 5m 43s
	Train Loss: 0.240 | Train PPL:   1.272
	 Val. Loss: 6.211 |  Val. PPL: 498.217
Epoch: 1070 | Time: 5m 42s
	Train Loss: 0.241 | Train PPL:   1.272
	 Val. Loss: 6.190 |  Val. PPL: 487.916
Epoch: 1071 | Time: 5m 42s
	Train Loss: 0.240 | Train PPL:   1.272
	 Val. Loss: 6.218 |  Val. PPL: 501.620
Epoch: 1072 | Time: 5m 41s
	Train Los

[0, 1, 2, 162, 1, 54, 30, 31, 67, 54, 50, 31, 4, 54, 6, 7, 23, 54, 57, 31, 8, 54, 12, 7, 8, 51, 128, 137, 8, 54, 48, 15, 10, 51, 6, 15, 10, 53, 55, 7, 91, 35, 6, 15, 13, 54, 48, 7, 70, 53, 6, 7, 16, 51, 57, 15, 78, 54, 48, 15, 17, 51, 55, 52, 27, 51, 6, 7, 27, 51, 86, 15, 74, 51, 48, 15, 0, 1, 51, 128, 36, 1, 51, 30, 41, 67, 54, 48, 7, 4, 53, 6, 7, 8, 54, 30, 7, 10, 51, 48, 7, 10, 54, 6, 7, 13, 54, 30, 15, 13, 32, 50, 15, 16, 54, 6, 15, 16, 51, 57, 15, 78, 51, 48, 15, 78, 51, 6, 15, 17, 51, 57, 31, 27, 51, 30, 15, 27, 51, 50, 41, 27, 51, 9, 7, 74, 51, 30, 31, 74, 51, 50, 31, 0, 1, 64, 9, 7, 67, 64, 12, 31, 4, 51, 50, 52, 23, 51, 9, 52, 23, 54, 30, 22, 8, 54, 50, 31, 8, 54, 9, 31, 10, 51, 50, 7, 91, 51, 9, 7, 13, 64, 30, 15, 13, 54, 50, 15, 16, 51, 12, 7, 78, 64, 50, 7, 17, 64, 30, 28, 17, 51, 6, 31, 27, 51, 48, 15, 27, 51, 6, 15, 0, 1, 54, 30, 52, 67, 32, 9, 52, 4, 54, 30, 15, 23, 51, 50, 7, 8, 54, 9, 7, 8, 54, 30, 31, 10, 64, 50, 19, 10, 54, 9, 15, 10, 51, 30, 15, 10, 51, 50, 7, 13, 6

[0, 1, 2, 147, 67, 53, 55, 15, 67, 51, 132, 7, 23, 54, 128, 7, 23, 54, 100, 7, 8, 54, 30, 15, 72, 53, 48, 7, 91, 54, 100, 22, 72, 35, 128, 52, 91, 54, 132, 47, 91, 54, 128, 31, 91, 54, 100, 15, 70, 49, 48, 7, 78, 51, 48, 7, 90, 51, 128, 19, 90, 51, 100, 7, 74, 53, 128, 19, 74, 51, 48, 52, 0, 67, 53, 128, 22, 23, 51, 132, 68, 72, 53, 100, 15, 91, 64, 30, 15, 91, 54, 128, 47, 91, 54, 100, 15, 91, 54, 48, 7, 70, 53, 128, 7, 78, 51, 100, 7, 78, 54, 30, 7, 78, 53, 48, 15, 90, 51, 132, 47, 74, 54, 100, 15, 74, 51, 30, 15, 74, 35, 100, 15, 0, 67, 49, 128, 15, 23, 54, 100, 15, 23, 54, 30, 22, 72, 51, 48, 7, 91, 54, 129, 15, 91, 54, 122, 31, 91, 49, 55, 22, 70, 53, 46, 15, 78, 64, 55, 15, 78, 35, 178, 47, 78, 54, 100, 19, 78, 54, 30, 19, 90, 54, 122, 15, 90, 32, 100, 19, 74, 51, 30, 15, 74, 51, 149, 52, 0, 67, 51, 122, 15, 67, 53, 100, 15, 67, 63, 30, 22, 23, 51, 100, 22, 72, 49, 149, 42, 72, 51, 122, 7, 72, 35, 30, 7, 91, 54, 30, 7, 91, 51, 100, 15, 91, 54, 122, 52, 70, 53, 30, 7, 78, 64, 100,

Epoch: 1101 | Time: 5m 42s
	Train Loss: 0.232 | Train PPL:   1.261
	 Val. Loss: 6.299 |  Val. PPL: 544.217
=> Saving checkpoint
Epoch: 1102 | Time: 5m 42s
	Train Loss: 0.231 | Train PPL:   1.260
	 Val. Loss: 6.269 |  Val. PPL: 528.064
Epoch: 1103 | Time: 5m 42s
	Train Loss: 0.231 | Train PPL:   1.260
	 Val. Loss: 6.327 |  Val. PPL: 559.690
Epoch: 1104 | Time: 5m 41s
	Train Loss: 0.230 | Train PPL:   1.259
	 Val. Loss: 6.306 |  Val. PPL: 547.579
Epoch: 1105 | Time: 5m 41s
	Train Loss: 0.230 | Train PPL:   1.259
	 Val. Loss: 6.335 |  Val. PPL: 563.842
Epoch: 1106 | Time: 5m 42s
	Train Loss: 0.231 | Train PPL:   1.259
	 Val. Loss: 6.242 |  Val. PPL: 514.027
Epoch: 1107 | Time: 5m 42s
	Train Loss: 0.229 | Train PPL:   1.258
	 Val. Loss: 6.303 |  Val. PPL: 546.010
Epoch: 1108 | Time: 5m 42s
	Train Loss: 0.230 | Train PPL:   1.258
	 Val. Loss: 6.302 |  Val. PPL: 545.734
Epoch: 1109 | Time: 5m 42s
	Train Loss: 0.229 | Train PPL:   1.258
	 Val. Loss: 6.296 |  Val. PPL: 542.256
Epoch: 1110 | Ti

[0, 1, 2, 162, 1, 54, 30, 31, 67, 54, 48, 15, 4, 51, 6, 15, 4, 54, 57, 31, 23, 54, 12, 7, 8, 51, 128, 36, 8, 54, 48, 15, 8, 51, 6, 15, 72, 35, 55, 7, 10, 54, 6, 31, 91, 51, 48, 7, 13, 32, 57, 7, 13, 64, 96, 31, 16, 54, 55, 31, 78, 51, 33, 7, 78, 54, 62, 15, 17, 38, 57, 15, 90, 51, 149, 103, 27, 51, 48, 37, 74, 51, 30, 52, 74, 51, 6, 15, 0, 1, 54, 48, 7, 4, 54, 30, 7, 4, 51, 48, 7, 23, 51, 6, 7, 8, 54, 30, 15, 10, 64, 48, 15, 13, 54, 6, 15, 16, 51, 12, 15, 16, 51, 128, 28, 78, 51, 6, 15, 17, 35, 57, 7, 27, 51, 48, 31, 74, 51, 6, 31, 0, 1, 54, 55, 15, 1, 32, 48, 22, 1, 54, 6, 15, 67, 54, 149, 47, 4, 51, 48, 15, 4, 51, 6, 7, 23, 35, 30, 15, 8, 54, 48, 15, 72, 54, 6, 15, 10, 54, 6, 15, 10, 51, 30, 15, 91, 51, 6, 15, 13, 64, 30, 15, 70, 51, 50, 31, 16, 64, 149, 36, 16, 35, 30, 7, 78, 51, 48, 19, 78, 51, 6, 52, 17, 35, 6, 31, 90, 53, 30, 15, 27, 54, 26, 7, 27, 51, 6, 15, 74, 35, 12, 7, 0, 1, 51, 30, 36, 1, 51, 6, 7, 67, 54, 9, 15, 4, 54, 30, 15, 4, 54, 6, 15, 8, 54, 50, 15, 8, 49, 30, 22, 8,

[0, 1, 2, 134, 23, 49, 55, 15, 23, 54, 132, 77, 72, 49, 128, 52, 72, 49, 100, 7, 91, 54, 48, 19, 91, 53, 100, 22, 91, 53, 55, 22, 70, 53, 128, 52, 70, 53, 100, 15, 78, 54, 132, 68, 78, 54, 100, 19, 78, 53, 48, 7, 90, 51, 48, 7, 74, 51, 128, 59, 0, 67, 49, 100, 15, 67, 11, 30, 15, 23, 45, 128, 7, 72, 54, 100, 15, 91, 54, 132, 19, 91, 53, 100, 15, 70, 49, 48, 15, 78, 64, 128, 7, 90, 54, 100, 52, 74, 54, 48, 52, 74, 53, 100, 22, 74, 51, 128, 7, 0, 67, 53, 100, 15, 23, 54, 55, 7, 23, 54, 48, 7, 72, 54, 100, 15, 72, 53, 30, 7, 91, 54, 48, 15, 70, 53, 50, 15, 78, 49, 129, 94, 78, 54, 55, 7, 78, 53, 55, 7, 90, 54, 48, 19, 74, 35, 122, 15, 0, 67, 49, 55, 15, 67, 54, 48, 7, 23, 49, 122, 15, 23, 54, 132, 19, 72, 54, 100, 15, 72, 51, 48, 7, 91, 53, 100, 52, 91, 49, 30, 7, 78, 54, 128, 52, 78, 54, 48, 52, 74, 51, 100, 15, 0, 67, 49, 55, 15, 23, 51, 132, 19, 23, 54, 30, 52, 72, 35, 48, 19, 91, 49, 100, 52, 78, 64, 48, 7, 78, 35, 100, 15, 90, 35, 48, 19, 74, 51, 129, 19, 0, 67, 51, 55, 7, 67, 54, 12

Epoch: 1158 | Time: 5m 42s
	Train Loss: 0.216 | Train PPL:   1.241
	 Val. Loss: 6.467 |  Val. PPL: 643.529
Epoch: 1159 | Time: 5m 42s
	Train Loss: 0.215 | Train PPL:   1.240
	 Val. Loss: 6.403 |  Val. PPL: 603.785
Epoch: 1160 | Time: 5m 42s
	Train Loss: 0.216 | Train PPL:   1.241
	 Val. Loss: 6.431 |  Val. PPL: 620.975
=> Saving checkpoint
Epoch: 1161 | Time: 5m 41s
	Train Loss: 0.215 | Train PPL:   1.239
	 Val. Loss: 6.437 |  Val. PPL: 624.500
=> Saving checkpoint
Epoch: 1162 | Time: 5m 42s
	Train Loss: 0.215 | Train PPL:   1.239
	 Val. Loss: 6.461 |  Val. PPL: 639.560
Epoch: 1163 | Time: 5m 42s
	Train Loss: 0.214 | Train PPL:   1.239
	 Val. Loss: 6.469 |  Val. PPL: 645.020
Epoch: 1164 | Time: 5m 42s
	Train Loss: 0.214 | Train PPL:   1.239
	 Val. Loss: 6.484 |  Val. PPL: 654.414
Epoch: 1165 | Time: 5m 42s
	Train Loss: 0.214 | Train PPL:   1.239
	 Val. Loss: 6.441 |  Val. PPL: 627.083
Epoch: 1166 | Time: 5m 42s
	Train Loss: 0.214 | Train PPL:   1.239
	 Val. Loss: 6.469 |  Val. PPL: 644

[0, 1, 2, 162, 1, 54, 30, 31, 67, 54, 50, 15, 4, 51, 57, 15, 4, 54, 50, 31, 23, 54, 57, 7, 8, 51, 30, 7, 72, 51, 50, 15, 10, 38, 9, 15, 91, 54, 12, 7, 70, 54, 50, 15, 16, 51, 128, 28, 78, 53, 6, 7, 17, 35, 57, 15, 90, 53, 55, 31, 27, 51, 55, 47, 74, 35, 6, 7, 0, 1, 54, 48, 31, 67, 51, 55, 22, 4, 35, 6, 15, 23, 51, 48, 7, 8, 54, 6, 15, 8, 51, 57, 7, 8, 54, 48, 7, 10, 51, 6, 7, 10, 51, 128, 36, 13, 54, 30, 15, 13, 64, 48, 31, 16, 51, 6, 15, 16, 51, 57, 15, 78, 35, 55, 52, 17, 54, 48, 31, 27, 51, 6, 7, 74, 51, 55, 19, 74, 51, 50, 41, 0, 1, 54, 6, 15, 4, 51, 128, 137, 23, 35, 48, 19, 23, 54, 6, 15, 8, 35, 6, 15, 10, 51, 57, 15, 91, 54, 48, 15, 13, 53, 6, 15, 16, 49, 55, 22, 17, 35, 6, 15, 90, 35, 48, 7, 27, 51, 6, 15, 0, 1, 51, 55, 34, 4, 35, 6, 15, 23, 51, 57, 7, 8, 35, 55, 15, 10, 35, 48, 15, 10, 51, 6, 15, 91, 51, 57, 15, 13, 54, 152, 28, 70, 54, 30, 52, 16, 49, 29, 52, 78, 53, 30, 15, 78, 51, 26, 22, 17, 54, 9, 22, 17, 54, 152, 66, 27, 54, 30, 19, 74, 54, 26, 19, 74, 51, 9, 19, 0, 1, 3

[0, 1, 2, 147, 67, 53, 55, 15, 23, 54, 132, 19, 23, 54, 128, 7, 23, 49, 100, 7, 72, 53, 128, 19, 72, 53, 100, 7, 91, 54, 55, 22, 91, 49, 128, 52, 91, 54, 132, 47, 91, 54, 128, 19, 91, 49, 100, 15, 70, 53, 128, 15, 70, 51, 100, 7, 78, 51, 128, 7, 78, 54, 100, 15, 78, 54, 30, 19, 90, 51, 199, 137, 90, 51, 122, 52, 74, 51, 100, 19, 74, 51, 100, 7, 0, 67, 49, 127, 19, 23, 51, 122, 22, 23, 54, 100, 52, 23, 35, 46, 52, 72, 53, 127, 7, 91, 51, 122, 7, 91, 54, 100, 15, 91, 51, 46, 52, 70, 51, 50, 19, 78, 54, 100, 7, 78, 51, 48, 19, 90, 35, 100, 7, 74, 51, 46, 7, 0, 67, 49, 122, 15, 23, 51, 132, 15, 72, 51, 100, 7, 91, 64, 132, 15, 91, 54, 128, 15, 91, 32, 100, 7, 91, 54, 30, 31, 91, 64, 132, 47, 70, 51, 100, 15, 78, 54, 30, 7, 78, 54, 48, 41, 90, 49, 128, 31, 74, 51, 100, 15, 74, 51, 132, 47, 74, 54, 100, 7, 0, 67, 51, 100, 15, 67, 53, 30, 15, 67, 123, 48, 22, 23, 51, 100, 22, 72, 35, 128, 22, 72, 51, 132, 66, 72, 53, 100, 31, 91, 54, 30, 52, 91, 54, 100, 15, 91, 54, 128, 52, 70, 53, 100, 22, 

Epoch: 1210 | Time: 5m 43s
	Train Loss: 0.203 | Train PPL:   1.225
	 Val. Loss: 6.535 |  Val. PPL: 688.851
Epoch: 1211 | Time: 5m 42s
	Train Loss: 0.203 | Train PPL:   1.225
	 Val. Loss: 6.545 |  Val. PPL: 695.545
Epoch: 1212 | Time: 5m 43s
	Train Loss: 0.202 | Train PPL:   1.224
	 Val. Loss: 6.551 |  Val. PPL: 699.623
Epoch: 1213 | Time: 5m 42s
	Train Loss: 0.202 | Train PPL:   1.224
	 Val. Loss: 6.516 |  Val. PPL: 676.098
Epoch: 1214 | Time: 5m 42s
	Train Loss: 0.202 | Train PPL:   1.224
	 Val. Loss: 6.585 |  Val. PPL: 724.239
Epoch: 1215 | Time: 5m 42s
	Train Loss: 0.202 | Train PPL:   1.223
	 Val. Loss: 6.604 |  Val. PPL: 737.882
Epoch: 1216 | Time: 5m 42s
	Train Loss: 0.201 | Train PPL:   1.223
	 Val. Loss: 6.518 |  Val. PPL: 676.977
Epoch: 1217 | Time: 5m 42s
	Train Loss: 0.201 | Train PPL:   1.222
	 Val. Loss: 6.550 |  Val. PPL: 699.122
Epoch: 1218 | Time: 5m 43s
	Train Loss: 0.201 | Train PPL:   1.223
	 Val. Loss: 6.529 |  Val. PPL: 684.716
Epoch: 1219 | Time: 5m 42s
	Train Los

[0, 1, 2, 162, 1, 54, 55, 31, 67, 54, 33, 31, 4, 51, 50, 15, 4, 54, 57, 31, 23, 54, 50, 7, 8, 51, 128, 47, 8, 64, 48, 15, 8, 51, 6, 15, 72, 51, 55, 7, 10, 54, 48, 15, 13, 32, 128, 15, 13, 32, 129, 36, 13, 64, 57, 31, 16, 54, 33, 31, 78, 51, 55, 7, 78, 54, 50, 7, 17, 49, 57, 15, 27, 51, 55, 15, 74, 51, 33, 41, 74, 51, 33, 15, 0, 1, 54, 50, 15, 67, 51, 129, 28, 4, 54, 33, 7, 4, 51, 55, 7, 23, 51, 50, 7, 8, 54, 55, 15, 72, 64, 33, 15, 10, 51, 50, 15, 10, 51, 57, 15, 91, 54, 33, 15, 13, 51, 6, 7, 70, 51, 149, 47, 16, 51, 30, 31, 16, 51, 48, 7, 78, 64, 6, 7, 17, 54, 30, 31, 90, 51, 6, 15, 27, 51, 30, 31, 74, 51, 26, 15, 74, 51, 6, 15, 74, 51, 30, 15, 0, 1, 64, 26, 7, 67, 51, 9, 7, 4, 54, 6, 15, 8, 51, 129, 36, 8, 49, 55, 31, 72, 35, 33, 31, 10, 51, 50, 7, 10, 54, 57, 7, 13, 54, 55, 28, 13, 51, 33, 19, 16, 35, 50, 52, 17, 35, 55, 66, 17, 53, 33, 7, 27, 51, 50, 7, 0, 1, 54, 33, 52, 67, 51, 55, 15, 4, 54, 50, 7, 8, 54, 9, 7, 8, 54, 33, 7, 8, 32, 55, 15, 10, 54, 50, 15, 13, 51, 128, 36, 13, 51,

[0, 1, 2, 147, 67, 49, 55, 15, 23, 54, 132, 19, 23, 54, 128, 7, 23, 54, 100, 7, 72, 53, 128, 19, 72, 53, 100, 7, 91, 54, 55, 22, 72, 35, 128, 52, 91, 51, 48, 31, 91, 54, 132, 47, 70, 53, 100, 15, 70, 53, 48, 15, 78, 35, 48, 7, 90, 51, 128, 19, 90, 51, 100, 52, 74, 51, 48, 19, 74, 51, 58, 52, 0, 67, 51, 100, 15, 67, 49, 128, 7, 23, 51, 55, 15, 72, 64, 48, 15, 91, 54, 132, 59, 70, 54, 100, 52, 78, 54, 30, 52, 90, 51, 48, 7, 74, 51, 100, 7, 0, 67, 54, 100, 15, 23, 51, 128, 7, 72, 54, 30, 7, 91, 51, 48, 7, 91, 51, 100, 15, 70, 54, 48, 15, 78, 54, 55, 7, 78, 51, 132, 19, 78, 54, 100, 7, 90, 51, 48, 52, 74, 51, 48, 19, 0, 67, 51, 100, 7, 23, 54, 48, 7, 8, 51, 100, 7, 72, 53, 128, 19, 91, 64, 132, 19, 91, 32, 100, 15, 70, 64, 30, 7, 78, 51, 48, 52, 90, 51, 58, 19, 74, 51, 100, 52, 0, 67, 49, 129, 19, 23, 54, 55, 19, 72, 49, 33, 19, 72, 51, 128, 7, 91, 54, 55, 52, 70, 54, 48, 52, 78, 51, 128, 7, 78, 64, 55, 52, 90, 51, 48, 52, 74, 63, 129, 47, 0, 67, 35, 55, 7, 23, 51, 48, 52, 72, 64, 122, 7, 

Epoch: 1259 | Time: 5m 43s
	Train Loss: 0.191 | Train PPL:   1.211
	 Val. Loss: 6.645 |  Val. PPL: 768.835
Epoch: 1260 | Time: 5m 43s
	Train Loss: 0.191 | Train PPL:   1.210
	 Val. Loss: 6.625 |  Val. PPL: 753.472
=> Saving checkpoint
Epoch: 1261 | Time: 5m 42s
	Train Loss: 0.191 | Train PPL:   1.210
	 Val. Loss: 6.705 |  Val. PPL: 816.471
=> Saving checkpoint
Epoch: 1262 | Time: 5m 42s
	Train Loss: 0.191 | Train PPL:   1.210
	 Val. Loss: 6.641 |  Val. PPL: 765.915
Epoch: 1263 | Time: 5m 43s
	Train Loss: 0.190 | Train PPL:   1.209
	 Val. Loss: 6.685 |  Val. PPL: 800.242
Epoch: 1264 | Time: 5m 43s
	Train Loss: 0.190 | Train PPL:   1.209
	 Val. Loss: 6.630 |  Val. PPL: 757.146
Epoch: 1265 | Time: 5m 43s
	Train Loss: 0.190 | Train PPL:   1.209
	 Val. Loss: 6.616 |  Val. PPL: 746.778
Epoch: 1266 | Time: 5m 42s
	Train Loss: 0.189 | Train PPL:   1.208
	 Val. Loss: 6.621 |  Val. PPL: 750.849
Epoch: 1267 | Time: 5m 43s
	Train Loss: 0.190 | Train PPL:   1.209
	 Val. Loss: 6.644 |  Val. PPL: 768

[0, 1, 2, 162, 1, 54, 55, 31, 67, 54, 33, 31, 4, 51, 50, 7, 4, 54, 57, 31, 23, 54, 50, 15, 8, 51, 128, 47, 8, 64, 6, 15, 8, 51, 55, 15, 72, 24, 57, 15, 10, 64, 128, 28, 91, 51, 55, 7, 13, 32, 6, 7, 13, 64, 57, 31, 70, 53, 55, 15, 16, 51, 55, 15, 78, 54, 62, 7, 17, 38, 57, 15, 90, 51, 55, 15, 27, 51, 50, 41, 74, 51, 33, 22, 74, 35, 50, 15, 0, 1, 54, 6, 15, 67, 64, 55, 7, 4, 51, 33, 7, 4, 54, 50, 7, 23, 51, 55, 15, 8, 54, 50, 7, 72, 51, 55, 15, 10, 51, 57, 15, 91, 38, 129, 47, 13, 35, 33, 15, 70, 51, 50, 7, 16, 51, 55, 15, 16, 51, 50, 7, 17, 54, 57, 7, 27, 64, 33, 31, 27, 51, 50, 15, 0, 1, 64, 129, 103, 1, 54, 33, 31, 4, 51, 50, 52, 4, 51, 57, 31, 8, 54, 55, 7, 72, 51, 33, 31, 10, 54, 50, 31, 13, 54, 57, 31, 70, 51, 33, 15, 16, 51, 55, 15, 78, 51, 129, 31, 17, 54, 50, 7, 27, 64, 55, 19, 27, 51, 128, 28, 0, 1, 53, 48, 19, 4, 51, 6, 41, 4, 35, 55, 31, 8, 54, 33, 31, 10, 51, 62, 31, 10, 51, 55, 15, 10, 51, 33, 15, 13, 51, 6, 7, 13, 35, 33, 31, 16, 51, 55, 19, 16, 51, 50, 15, 17, 51, 33, 19,

[0, 1, 2, 147, 67, 49, 100, 15, 67, 51, 132, 15, 23, 54, 128, 7, 23, 54, 100, 7, 72, 54, 48, 7, 72, 53, 128, 7, 91, 53, 100, 22, 72, 35, 132, 47, 91, 51, 100, 52, 91, 54, 128, 19, 91, 54, 100, 15, 70, 53, 128, 31, 78, 64, 100, 19, 90, 51, 128, 19, 90, 51, 100, 52, 74, 53, 128, 52, 74, 51, 48, 52, 0, 67, 51, 128, 15, 67, 49, 132, 22, 23, 51, 100, 15, 72, 64, 30, 15, 72, 64, 128, 7, 91, 54, 100, 52, 91, 32, 48, 52, 70, 53, 128, 22, 78, 51, 227, 102, 78, 54, 55, 41, 90, 51, 46, 19, 74, 51, 55, 31, 0, 67, 51, 122, 15, 67, 35, 55, 31, 23, 54, 55, 31, 23, 54, 46, 31, 72, 51, 50, 31, 91, 51, 185, 19, 91, 54, 55, 7, 70, 35, 128, 15, 78, 54, 132, 15, 78, 54, 100, 52, 78, 51, 128, 19, 78, 54, 48, 52, 90, 54, 100, 19, 74, 51, 48, 52, 0, 67, 51, 100, 19, 67, 54, 30, 52, 23, 51, 48, 19, 72, 51, 100, 52, 91, 54, 185, 52, 70, 51, 111, 15, 78, 53, 129, 7, 78, 54, 55, 52, 90, 51, 48, 7, 74, 54, 55, 7, 0, 67, 51, 132, 19, 23, 51, 100, 7, 8, 54, 30, 15, 72, 54, 48, 31, 91, 51, 111, 15, 70, 51, 55, 15, 78

Epoch: 1301 | Time: 5m 42s
	Train Loss: 0.182 | Train PPL:   1.200
	 Val. Loss: 6.773 |  Val. PPL: 873.781
=> Saving checkpoint
Epoch: 1302 | Time: 5m 42s
	Train Loss: 0.182 | Train PPL:   1.200
	 Val. Loss: 6.726 |  Val. PPL: 833.693
Epoch: 1303 | Time: 5m 42s
	Train Loss: 0.182 | Train PPL:   1.199
	 Val. Loss: 6.678 |  Val. PPL: 794.352
Epoch: 1304 | Time: 5m 42s
	Train Loss: 0.182 | Train PPL:   1.199
	 Val. Loss: 6.708 |  Val. PPL: 819.070
Epoch: 1305 | Time: 5m 41s
	Train Loss: 0.181 | Train PPL:   1.199
	 Val. Loss: 6.730 |  Val. PPL: 837.006
Epoch: 1306 | Time: 5m 42s
	Train Loss: 0.181 | Train PPL:   1.198
	 Val. Loss: 6.704 |  Val. PPL: 815.396
Epoch: 1307 | Time: 5m 43s
	Train Loss: 0.181 | Train PPL:   1.198
	 Val. Loss: 6.746 |  Val. PPL: 850.296
Epoch: 1308 | Time: 5m 42s
	Train Loss: 0.180 | Train PPL:   1.198
	 Val. Loss: 6.697 |  Val. PPL: 809.919
Epoch: 1309 | Time: 5m 42s
	Train Loss: 0.180 | Train PPL:   1.197
	 Val. Loss: 6.722 |  Val. PPL: 830.483
Epoch: 1310 | Ti

[0, 1, 2, 162, 1, 49, 55, 31, 67, 54, 33, 31, 4, 54, 50, 22, 4, 54, 57, 31, 23, 54, 96, 7, 8, 51, 128, 47, 72, 51, 55, 15, 10, 38, 48, 15, 72, 24, 57, 15, 91, 64, 57, 15, 13, 32, 128, 36, 70, 53, 48, 7, 16, 64, 57, 15, 16, 54, 6, 31, 78, 51, 55, 22, 17, 54, 48, 31, 27, 51, 57, 15, 27, 51, 128, 28, 74, 51, 48, 41, 74, 51, 55, 15, 0, 1, 54, 6, 15, 67, 54, 57, 7, 4, 54, 48, 7, 23, 35, 55, 7, 8, 54, 6, 7, 8, 54, 48, 15, 10, 64, 6, 15, 13, 54, 48, 15, 70, 35, 6, 15, 16, 64, 149, 47, 17, 51, 48, 52, 17, 51, 6, 7, 27, 51, 30, 31, 27, 51, 48, 7, 74, 54, 6, 7, 0, 1, 32, 30, 7, 4, 54, 48, 15, 4, 54, 6, 15, 8, 51, 129, 103, 72, 51, 33, 41, 10, 53, 50, 15, 10, 54, 33, 15, 13, 49, 55, 7, 13, 53, 50, 15, 70, 49, 33, 7, 16, 51, 50, 7, 78, 54, 55, 7, 78, 51, 57, 31, 17, 54, 57, 7, 27, 51, 33, 31, 27, 51, 128, 36, 74, 54, 6, 52, 0, 1, 54, 48, 77, 4, 35, 55, 7, 4, 54, 48, 15, 4, 54, 62, 7, 8, 54, 48, 41, 10, 51, 55, 52, 10, 54, 62, 52, 13, 35, 33, 31, 16, 51, 50, 52, 17, 51, 57, 52, 27, 51, 55, 15, 0, 1

[0, 1, 2, 147, 67, 49, 55, 15, 67, 51, 185, 15, 23, 54, 128, 7, 23, 49, 55, 7, 72, 54, 132, 19, 72, 53, 128, 22, 72, 54, 100, 22, 72, 35, 128, 52, 91, 51, 55, 19, 91, 54, 132, 47, 91, 54, 128, 15, 70, 53, 100, 15, 70, 51, 48, 7, 78, 54, 128, 19, 78, 54, 100, 15, 78, 54, 48, 19, 90, 51, 199, 137, 90, 51, 122, 7, 90, 51, 100, 52, 74, 51, 100, 7, 74, 53, 132, 19, 74, 51, 100, 15, 74, 35, 46, 15, 0, 67, 51, 100, 15, 67, 49, 122, 22, 23, 64, 100, 15, 23, 54, 30, 7, 72, 51, 48, 52, 72, 51, 100, 15, 91, 54, 30, 15, 91, 51, 48, 15, 70, 53, 100, 7, 78, 51, 127, 52, 78, 51, 132, 59, 90, 51, 100, 52, 74, 51, 30, 52, 0, 67, 35, 100, 15, 67, 35, 128, 15, 23, 32, 30, 7, 8, 51, 132, 19, 72, 51, 132, 47, 91, 54, 100, 31, 91, 54, 30, 19, 70, 54, 128, 7, 78, 64, 100, 15, 90, 51, 185, 56, 74, 51, 128, 31, 74, 51, 55, 7, 0, 67, 51, 128, 31, 23, 35, 111, 15, 23, 49, 129, 19, 72, 51, 128, 19, 72, 51, 111, 31, 91, 51, 128, 7, 70, 54, 55, 7, 78, 54, 48, 52, 90, 51, 128, 7, 74, 51, 111, 22, 74, 51, 55, 22, 74,

Epoch: 1357 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.186
	 Val. Loss: 6.824 |  Val. PPL: 919.924
Epoch: 1358 | Time: 5m 42s
	Train Loss: 0.171 | Train PPL:   1.186
	 Val. Loss: 6.827 |  Val. PPL: 922.568
Epoch: 1359 | Time: 5m 41s
	Train Loss: 0.170 | Train PPL:   1.185
	 Val. Loss: 6.797 |  Val. PPL: 895.368
Epoch: 1360 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.185
	 Val. Loss: 6.853 |  Val. PPL: 947.185
=> Saving checkpoint
Epoch: 1361 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.185
	 Val. Loss: 6.786 |  Val. PPL: 885.733
=> Saving checkpoint
Epoch: 1362 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.186
	 Val. Loss: 6.869 |  Val. PPL: 962.101
Epoch: 1363 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.185
	 Val. Loss: 6.779 |  Val. PPL: 879.015
Epoch: 1364 | Time: 5m 42s
	Train Loss: 0.170 | Train PPL:   1.185
	 Val. Loss: 6.858 |  Val. PPL: 951.524
Epoch: 1365 | Time: 5m 42s
	Train Loss: 0.169 | Train PPL:   1.185
	 Val. Loss: 6.841 |  Val. PPL: 934

[0, 1, 2, 162, 1, 49, 55, 31, 67, 54, 33, 31, 4, 54, 50, 15, 4, 54, 57, 31, 23, 54, 96, 15, 8, 51, 128, 47, 72, 51, 48, 15, 10, 38, 6, 15, 72, 35, 55, 7, 10, 54, 48, 15, 91, 51, 6, 15, 13, 32, 57, 7, 13, 54, 6, 15, 70, 53, 55, 31, 16, 51, 33, 7, 78, 54, 62, 7, 17, 54, 57, 7, 27, 51, 50, 15, 74, 51, 57, 7, 74, 51, 33, 15, 0, 1, 54, 129, 36, 1, 54, 33, 41, 4, 54, 50, 31, 4, 51, 55, 31, 23, 53, 33, 7, 8, 54, 50, 15, 72, 35, 57, 15, 10, 51, 50, 15, 91, 49, 57, 15, 13, 64, 129, 47, 13, 35, 33, 52, 70, 51, 50, 7, 16, 51, 33, 15, 16, 51, 50, 7, 17, 54, 57, 7, 17, 54, 98, 7, 27, 51, 50, 15, 27, 54, 57, 7, 74, 54, 33, 15, 0, 1, 64, 50, 22, 67, 51, 57, 22, 4, 54, 33, 22, 8, 54, 50, 22, 8, 53, 9, 52, 8, 54, 50, 7, 10, 54, 9, 31, 10, 51, 55, 15, 13, 64, 50, 15, 16, 64, 57, 15, 17, 54, 33, 15, 17, 64, 50, 15, 17, 51, 9, 15, 27, 49, 30, 15, 27, 51, 50, 15, 0, 1, 54, 30, 52, 67, 54, 26, 52, 4, 54, 6, 31, 4, 51, 30, 15, 8, 54, 26, 7, 8, 54, 6, 7, 10, 54, 30, 15, 10, 54, 50, 15, 10, 51, 6, 15, 13, 35, 

[0, 1, 2, 69, 23, 54, 55, 15, 72, 49, 132, 19, 72, 64, 128, 19, 91, 49, 132, 19, 70, 49, 128, 15, 78, 53, 100, 7, 78, 54, 55, 7, 90, 51, 132, 47, 90, 51, 128, 47, 74, 54, 100, 66, 0, 67, 51, 30, 7, 23, 45, 128, 52, 72, 49, 100, 15, 91, 51, 132, 22, 91, 54, 128, 7, 70, 49, 100, 7, 78, 45, 132, 22, 78, 64, 128, 15, 90, 51, 178, 68, 74, 53, 100, 7, 74, 54, 30, 19, 0, 67, 51, 122, 7, 67, 35, 122, 15, 67, 54, 100, 7, 23, 49, 149, 52, 72, 51, 30, 7, 91, 51, 50, 7, 70, 54, 122, 22, 70, 45, 100, 15, 78, 51, 30, 15, 78, 64, 129, 59, 90, 51, 122, 15, 74, 54, 55, 52, 0, 67, 35, 122, 15, 67, 51, 55, 31, 23, 54, 128, 15, 72, 35, 122, 52, 91, 49, 218, 66, 70, 51, 30, 66, 78, 54, 111, 52, 78, 54, 149, 19, 90, 54, 128, 52, 74, 51, 111, 15, 74, 51, 30, 52, 0, 67, 53, 128, 15, 23, 35, 132, 19, 23, 49, 100, 31, 72, 51, 30, 52, 91, 53, 128, 19, 70, 53, 100, 7, 78, 54, 30, 7, 90, 51, 48, 31, 74, 54, 132, 15, 0, 1, 43, 176, 67, 123, 128, 7, 23, 35, 100, 15, 23, 53, 100, 15, 23, 54, 30, 19, 23, 45, 128, 15, 

Epoch: 1402 | Time: 5m 41s
	Train Loss: 0.162 | Train PPL:   1.176
	 Val. Loss: 6.944 |  Val. PPL: 1036.534
Epoch: 1403 | Time: 5m 41s
	Train Loss: 0.162 | Train PPL:   1.176
	 Val. Loss: 6.862 |  Val. PPL: 954.948
Epoch: 1404 | Time: 5m 41s
	Train Loss: 0.162 | Train PPL:   1.175
	 Val. Loss: 6.874 |  Val. PPL: 967.106
Epoch: 1405 | Time: 5m 41s
	Train Loss: 0.161 | Train PPL:   1.175
	 Val. Loss: 6.950 |  Val. PPL: 1042.824
Epoch: 1406 | Time: 5m 41s
	Train Loss: 0.162 | Train PPL:   1.175
	 Val. Loss: 6.924 |  Val. PPL: 1016.575
Epoch: 1407 | Time: 5m 41s
	Train Loss: 0.161 | Train PPL:   1.175
	 Val. Loss: 6.925 |  Val. PPL: 1017.450
Epoch: 1408 | Time: 5m 41s
	Train Loss: 0.161 | Train PPL:   1.175
	 Val. Loss: 6.896 |  Val. PPL: 988.456
Epoch: 1409 | Time: 5m 41s
	Train Loss: 0.161 | Train PPL:   1.175
	 Val. Loss: 6.963 |  Val. PPL: 1056.620
Epoch: 1410 | Time: 5m 41s
	Train Loss: 0.161 | Train PPL:   1.174
	 Val. Loss: 6.981 |  Val. PPL: 1075.776
Epoch: 1411 | Time: 5m 41s
	Tra

[0, 1, 2, 162, 1, 49, 55, 31, 67, 54, 33, 31, 4, 51, 50, 15, 23, 64, 57, 31, 72, 54, 96, 15, 10, 32, 55, 41, 10, 64, 50, 15, 91, 38, 57, 7, 13, 38, 57, 7, 70, 54, 33, 15, 16, 51, 128, 36, 17, 54, 50, 7, 27, 64, 57, 15, 27, 51, 55, 31, 74, 54, 48, 7, 0, 1, 64, 6, 7, 4, 54, 6, 15, 4, 51, 48, 22, 8, 54, 6, 15, 8, 53, 55, 15, 8, 54, 129, 47, 8, 51, 33, 15, 8, 54, 50, 7, 10, 38, 55, 31, 10, 54, 50, 52, 13, 64, 33, 15, 16, 64, 50, 15, 16, 64, 57, 15, 17, 51, 21, 7, 27, 51, 118, 7, 27, 51, 33, 31, 74, 51, 50, 7, 0, 1, 51, 50, 15, 4, 51, 129, 21, 7, 23, 35, 55, 15, 8, 54, 33, 7, 8, 64, 50, 7, 10, 35, 57, 31, 13, 32, 21, 7, 13, 51, 33, 7, 16, 51, 55, 15, 16, 64, 6, 7, 78, 51, 55, 15, 17, 35, 33, 52, 17, 53, 50, 52, 17, 51, 57, 52, 27, 51, 33, 52, 27, 32, 57, 15, 74, 54, 21, 7, 0, 1, 49, 33, 15, 67, 51, 50, 15, 4, 51, 6, 15, 4, 51, 57, 15, 8, 51, 92, 19, 10, 51, 33, 19, 10, 54, 62, 7, 13, 38, 57, 7, 16, 38, 21, 7, 17, 51, 128, 47, 27, 51, 48, 15, 27, 32, 6, 15, 0, 1, 54, 48, 15, 67, 51, 55, 7, 4

[0, 1, 2, 147, 67, 49, 100, 15, 67, 51, 48, 15, 23, 54, 132, 15, 23, 54, 100, 41, 72, 54, 48, 7, 72, 53, 100, 7, 91, 51, 100, 22, 72, 51, 132, 42, 72, 51, 100, 52, 91, 54, 48, 15, 91, 54, 128, 31, 70, 53, 100, 31, 78, 51, 48, 15, 90, 51, 128, 19, 90, 54, 100, 52, 74, 53, 48, 47, 74, 51, 58, 15, 0, 67, 51, 100, 15, 67, 49, 55, 15, 23, 54, 48, 15, 72, 64, 50, 15, 72, 51, 129, 19, 72, 54, 55, 52, 91, 51, 48, 52, 70, 53, 55, 7, 78, 51, 33, 7, 78, 64, 128, 7, 90, 51, 48, 15, 90, 51, 132, 47, 74, 54, 100, 19, 0, 67, 51, 30, 15, 67, 49, 48, 15, 23, 54, 100, 7, 23, 51, 100, 15, 72, 32, 30, 15, 72, 51, 48, 7, 91, 51, 100, 7, 70, 53, 30, 15, 78, 53, 48, 52, 78, 51, 129, 19, 90, 51, 55, 15, 90, 51, 33, 19, 74, 51, 50, 52, 0, 67, 51, 55, 22, 67, 54, 33, 22, 23, 53, 50, 19, 72, 51, 55, 52, 72, 54, 185, 103, 72, 51, 111, 31, 91, 51, 55, 7, 91, 54, 48, 52, 70, 51, 128, 19, 78, 32, 111, 7, 78, 54, 55, 7, 90, 54, 48, 7, 90, 51, 55, 15, 74, 51, 48, 15, 0, 67, 51, 178, 19, 67, 32, 100, 19, 67, 32, 30, 7,

Epoch: 1454 | Time: 5m 42s
	Train Loss: 0.153 | Train PPL:   1.165
	 Val. Loss: 7.029 |  Val. PPL: 1129.092
Epoch: 1455 | Time: 5m 43s
	Train Loss: 0.153 | Train PPL:   1.165
	 Val. Loss: 6.987 |  Val. PPL: 1082.700
Epoch: 1456 | Time: 5m 42s
	Train Loss: 0.153 | Train PPL:   1.165
	 Val. Loss: 7.014 |  Val. PPL: 1111.782
Epoch: 1457 | Time: 5m 42s
	Train Loss: 0.153 | Train PPL:   1.165
	 Val. Loss: 7.076 |  Val. PPL: 1183.510
Epoch: 1458 | Time: 5m 42s
	Train Loss: 0.152 | Train PPL:   1.165
	 Val. Loss: 7.063 |  Val. PPL: 1168.507
Epoch: 1459 | Time: 5m 43s
	Train Loss: 0.152 | Train PPL:   1.165
	 Val. Loss: 7.019 |  Val. PPL: 1118.188
Epoch: 1460 | Time: 5m 42s
	Train Loss: 0.152 | Train PPL:   1.164
	 Val. Loss: 7.020 |  Val. PPL: 1118.238
=> Saving checkpoint
Epoch: 1461 | Time: 5m 43s
	Train Loss: 0.152 | Train PPL:   1.164
	 Val. Loss: 7.008 |  Val. PPL: 1105.920
=> Saving checkpoint
Epoch: 1462 | Time: 5m 42s
	Train Loss: 0.152 | Train PPL:   1.164
	 Val. Loss: 7.045 |  Val. 

[0, 1, 2, 162, 1, 49, 55, 15, 1, 54, 48, 31, 1, 54, 57, 15, 4, 54, 50, 15, 4, 54, 57, 15, 8, 32, 55, 41, 8, 51, 50, 15, 8, 51, 57, 7, 8, 51, 128, 31, 72, 54, 48, 31, 10, 32, 55, 7, 91, 32, 129, 36, 13, 64, 50, 15, 13, 64, 55, 31, 70, 53, 33, 7, 16, 51, 50, 7, 16, 64, 57, 15, 27, 51, 55, 15, 27, 51, 50, 41, 27, 51, 33, 15, 74, 35, 50, 15, 0, 1, 54, 33, 7, 4, 54, 50, 7, 4, 51, 55, 7, 23, 51, 128, 36, 8, 54, 55, 15, 10, 64, 48, 7, 91, 54, 6, 15, 13, 38, 57, 15, 16, 51, 128, 42, 17, 51, 48, 15, 17, 35, 6, 7, 27, 51, 48, 31, 74, 51, 6, 7, 0, 1, 54, 55, 15, 1, 54, 48, 22, 4, 53, 48, 15, 4, 35, 6, 15, 8, 54, 180, 66, 8, 54, 33, 15, 8, 54, 6, 7, 10, 45, 55, 15, 10, 54, 33, 15, 13, 54, 6, 15, 16, 35, 33, 7, 78, 51, 6, 15, 17, 32, 55, 15, 17, 49, 33, 31, 27, 51, 6, 7, 74, 53, 33, 31, 0, 1, 49, 55, 7, 1, 54, 50, 15, 1, 54, 57, 7, 4, 54, 129, 103, 8, 45, 33, 7, 8, 54, 50, 31, 8, 54, 50, 31, 10, 54, 33, 22, 10, 54, 50, 22, 13, 35, 57, 22, 13, 51, 33, 52, 16, 51, 57, 15, 17, 51, 50, 15, 17, 51, 55, 

[0, 1, 2, 147, 67, 49, 100, 15, 67, 51, 185, 19, 23, 54, 128, 7, 23, 49, 132, 19, 72, 54, 128, 52, 72, 53, 100, 7, 91, 54, 55, 22, 72, 51, 128, 52, 91, 51, 100, 52, 70, 53, 128, 15, 78, 54, 129, 15, 90, 32, 185, 59, 90, 64, 128, 19, 90, 51, 111, 41, 74, 54, 55, 15, 0, 67, 51, 128, 52, 23, 53, 111, 19, 72, 51, 55, 19, 91, 54, 128, 7, 70, 53, 111, 7, 78, 49, 55, 19, 90, 51, 128, 7, 74, 54, 185, 47, 74, 53, 111, 52, 0, 67, 35, 111, 52, 23, 51, 55, 7, 72, 49, 178, 19, 72, 64, 122, 19, 91, 45, 100, 19, 70, 53, 149, 7, 78, 54, 122, 52, 90, 51, 100, 52, 74, 54, 30, 52, 0, 67, 51, 100, 52, 23, 51, 128, 31, 23, 54, 185, 66, 72, 35, 128, 19, 91, 35, 111, 41, 70, 51, 55, 41, 78, 51, 128, 52, 78, 54, 111, 19, 90, 51, 129, 7, 74, 51, 128, 19, 74, 51, 100, 52, 0, 67, 53, 128, 15, 67, 35, 55, 15, 23, 54, 128, 19, 23, 32, 111, 15, 23, 53, 55, 52, 72, 53, 128, 52, 91, 54, 185, 19, 70, 51, 111, 52, 70, 49, 129, 31, 78, 54, 128, 7, 78, 64, 111, 7, 90, 51, 55, 15, 74, 63, 129, 52, 0, 67, 51, 111, 7, 23, 3

Epoch: 1501 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.128 |  Val. PPL: 1246.526
=> Saving checkpoint
Epoch: 1502 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.109 |  Val. PPL: 1222.672
Epoch: 1503 | Time: 5m 41s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.126 |  Val. PPL: 1243.759
Epoch: 1504 | Time: 5m 41s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.140 |  Val. PPL: 1261.172
Epoch: 1505 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.084 |  Val. PPL: 1192.679
Epoch: 1506 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.077 |  Val. PPL: 1184.746
Epoch: 1507 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.126 |  Val. PPL: 1243.624
Epoch: 1508 | Time: 5m 42s
	Train Loss: 0.145 | Train PPL:   1.156
	 Val. Loss: 7.120 |  Val. PPL: 1236.384
Epoch: 1509 | Time: 5m 42s
	Train Loss: 0.144 | Train PPL:   1.155
	 Val. Loss: 7.148 |  Val. PPL: 1271.214
Epoch: 

[0, 1, 2, 158, 1, 38, 30, 15, 1, 54, 48, 31, 4, 54, 6, 22, 23, 51, 50, 31, 8, 54, 57, 15, 8, 32, 149, 47, 8, 51, 30, 15, 8, 51, 26, 31, 10, 51, 6, 7, 10, 64, 12, 31, 13, 64, 6, 31, 13, 32, 12, 7, 13, 51, 30, 15, 70, 51, 48, 31, 16, 51, 50, 15, 78, 51, 30, 15, 17, 38, 9, 15, 27, 51, 30, 15, 27, 51, 6, 15, 74, 54, 48, 15, 0, 1, 54, 129, 36, 1, 54, 33, 66, 1, 64, 50, 31, 4, 54, 55, 7, 4, 54, 50, 7, 8, 54, 33, 15, 8, 54, 50, 31, 10, 51, 55, 15, 10, 51, 57, 15, 91, 49, 33, 22, 13, 51, 50, 15, 70, 51, 149, 47, 17, 51, 33, 31, 17, 51, 6, 31, 17, 54, 48, 7, 17, 51, 30, 31, 27, 51, 6, 15, 0, 1, 64, 129, 103, 1, 54, 33, 31, 1, 54, 50, 7, 1, 51, 6, 7, 4, 53, 33, 22, 8, 54, 50, 7, 8, 53, 9, 31, 10, 35, 6, 7, 10, 54, 33, 7, 13, 54, 55, 15, 13, 51, 129, 34, 13, 64, 50, 7, 16, 51, 55, 15, 17, 64, 50, 15, 17, 51, 57, 52, 27, 54, 55, 15, 0, 1, 51, 128, 36, 1, 54, 55, 52, 1, 51, 48, 52, 4, 54, 55, 41, 8, 51, 48, 19, 10, 54, 62, 52, 13, 35, 55, 31, 13, 51, 48, 52, 16, 51, 6, 31, 17, 51, 55, 7, 17, 51, 50

[0, 1, 2, 147, 67, 49, 100, 15, 67, 51, 185, 19, 23, 54, 128, 7, 23, 49, 129, 41, 72, 54, 128, 31, 72, 53, 55, 7, 91, 51, 55, 22, 72, 51, 129, 52, 91, 51, 128, 52, 70, 53, 129, 52, 78, 54, 128, 15, 90, 32, 55, 31, 90, 51, 132, 19, 74, 51, 128, 19, 0, 67, 51, 132, 19, 67, 64, 100, 7, 23, 54, 128, 15, 72, 64, 30, 15, 72, 51, 178, 137, 72, 53, 122, 19, 91, 64, 100, 19, 91, 54, 30, 41, 70, 35, 122, 7, 78, 54, 100, 52, 90, 54, 30, 7, 74, 51, 149, 102, 0, 67, 51, 122, 15, 23, 54, 122, 7, 23, 49, 100, 52, 23, 51, 30, 15, 72, 35, 122, 15, 91, 54, 100, 52, 91, 54, 30, 52, 78, 51, 48, 31, 78, 54, 227, 66, 78, 53, 55, 7, 78, 54, 46, 19, 90, 54, 122, 31, 74, 51, 55, 52, 74, 51, 129, 52, 0, 67, 53, 122, 15, 23, 54, 55, 15, 23, 51, 122, 15, 23, 51, 46, 52, 72, 54, 55, 52, 91, 54, 33, 19, 78, 54, 122, 52, 78, 54, 55, 7, 90, 51, 48, 7, 74, 51, 185, 19, 0, 67, 51, 128, 15, 23, 51, 55, 52, 23, 51, 128, 31, 72, 51, 132, 19, 72, 64, 100, 7, 91, 51, 30, 52, 70, 53, 128, 22, 78, 51, 178, 19, 78, 35, 122, 19

Epoch: 1552 | Time: 5m 41s
	Train Loss: 0.138 | Train PPL:   1.148
	 Val. Loss: 7.257 |  Val. PPL: 1417.824
Epoch: 1553 | Time: 5m 41s
	Train Loss: 0.138 | Train PPL:   1.147
	 Val. Loss: 7.210 |  Val. PPL: 1353.430
Epoch: 1554 | Time: 5m 42s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.197 |  Val. PPL: 1335.835
Epoch: 1555 | Time: 5m 41s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.191 |  Val. PPL: 1327.931
Epoch: 1556 | Time: 5m 41s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.198 |  Val. PPL: 1336.266
Epoch: 1557 | Time: 5m 41s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.264 |  Val. PPL: 1428.120
Epoch: 1558 | Time: 5m 42s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.232 |  Val. PPL: 1382.929
Epoch: 1559 | Time: 5m 42s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.219 |  Val. PPL: 1365.639
Epoch: 1560 | Time: 5m 42s
	Train Loss: 0.137 | Train PPL:   1.147
	 Val. Loss: 7.242 |  Val. PPL: 1397.338
=> Saving checkpoint
Epoch: 

[0, 1, 2, 162, 1, 49, 55, 15, 1, 53, 33, 31, 1, 54, 50, 31, 4, 54, 57, 31, 4, 54, 50, 15, 8, 51, 55, 7, 8, 64, 50, 15, 8, 51, 33, 15, 8, 54, 50, 7, 72, 51, 55, 15, 10, 32, 128, 7, 91, 32, 129, 36, 13, 54, 33, 15, 13, 64, 50, 31, 70, 53, 55, 7, 16, 64, 33, 31, 16, 64, 50, 31, 27, 51, 55, 15, 27, 51, 50, 41, 27, 51, 33, 7, 74, 35, 50, 31, 0, 1, 49, 55, 15, 1, 64, 55, 15, 1, 51, 33, 7, 4, 54, 50, 7, 4, 51, 55, 15, 23, 54, 50, 7, 8, 54, 33, 15, 10, 51, 50, 15, 91, 35, 149, 47, 13, 35, 30, 52, 70, 51, 48, 7, 16, 51, 6, 15, 16, 51, 30, 15, 17, 54, 48, 7, 17, 54, 6, 31, 27, 54, 48, 15, 0, 1, 64, 129, 103, 1, 54, 33, 31, 4, 51, 50, 52, 8, 54, 9, 7, 10, 45, 30, 7, 0, 1, 54, 50, 31, 4, 51, 55, 22, 8, 54, 6, 7, 10, 49, 33, 19, 10, 51, 50, 7, 13, 64, 57, 52, 0, 1, 43, 176, 1, 64, 128, 36, 1, 64, 48, 52, 4, 53, 6, 15, 8, 63, 55, 22, 8, 54, 48, 22, 8, 54, 6, 15, 10, 54, 62, 52, 13, 54, 55, 15, 13, 38, 21, 15, 13, 51, 82, 31, 16, 51, 55, 15, 17, 51, 33, 15, 17, 51, 50, 15, 17, 51, 6, 15, 27, 64, 92, 

[0, 1, 2, 130, 67, 49, 128, 15, 67, 51, 100, 15, 23, 51, 128, 7, 23, 54, 185, 19, 72, 54, 128, 15, 72, 51, 100, 7, 91, 51, 55, 22, 72, 51, 178, 47, 91, 51, 100, 31, 70, 54, 128, 15, 78, 51, 100, 15, 90, 51, 30, 52, 90, 51, 132, 19, 74, 51, 128, 52, 74, 54, 100, 15, 0, 67, 51, 178, 59, 67, 35, 122, 19, 23, 51, 100, 19, 72, 53, 122, 7, 91, 53, 100, 7, 70, 54, 30, 19, 78, 51, 122, 7, 90, 54, 100, 52, 74, 51, 30, 52, 0, 67, 51, 218, 41, 23, 51, 111, 7, 72, 64, 30, 7, 91, 64, 111, 15, 70, 49, 180, 19, 78, 64, 128, 19, 78, 64, 111, 52, 90, 51, 55, 52, 74, 51, 48, 52, 74, 51, 111, 15, 0, 67, 53, 55, 15, 23, 54, 178, 66, 23, 54, 122, 19, 72, 64, 100, 19, 72, 49, 30, 7, 91, 54, 122, 7, 70, 53, 149, 19, 78, 54, 132, 19, 78, 54, 128, 19, 78, 35, 100, 52, 90, 54, 30, 52, 90, 51, 227, 19, 74, 51, 122, 52, 0, 67, 49, 178, 19, 23, 53, 122, 19, 72, 53, 55, 52, 72, 51, 129, 7, 91, 54, 122, 52, 91, 54, 55, 52, 70, 51, 129, 7, 78, 54, 122, 52, 78, 35, 122, 15, 90, 35, 55, 52, 74, 51, 185, 19, 74, 54, 128

Epoch: 1601 | Time: 5m 43s
	Train Loss: 0.131 | Train PPL:   1.140
	 Val. Loss: 7.237 |  Val. PPL: 1390.114
=> Saving checkpoint
Epoch: 1602 | Time: 5m 42s
	Train Loss: 0.131 | Train PPL:   1.139
	 Val. Loss: 7.303 |  Val. PPL: 1484.883
Epoch: 1603 | Time: 5m 42s
	Train Loss: 0.130 | Train PPL:   1.139
	 Val. Loss: 7.260 |  Val. PPL: 1422.152
Epoch: 1604 | Time: 5m 42s
	Train Loss: 0.131 | Train PPL:   1.139
	 Val. Loss: 7.279 |  Val. PPL: 1449.164
Epoch: 1605 | Time: 5m 42s
	Train Loss: 0.131 | Train PPL:   1.139
	 Val. Loss: 7.269 |  Val. PPL: 1435.346
Epoch: 1606 | Time: 5m 42s
	Train Loss: 0.130 | Train PPL:   1.139
	 Val. Loss: 7.274 |  Val. PPL: 1441.696
Epoch: 1607 | Time: 5m 42s
	Train Loss: 0.130 | Train PPL:   1.138
	 Val. Loss: 7.262 |  Val. PPL: 1425.721
Epoch: 1608 | Time: 5m 42s
	Train Loss: 0.130 | Train PPL:   1.138
	 Val. Loss: 7.260 |  Val. PPL: 1421.728
Epoch: 1609 | Time: 5m 42s
	Train Loss: 0.130 | Train PPL:   1.138
	 Val. Loss: 7.230 |  Val. PPL: 1380.497
Epoch: 

[0, 1, 2, 162, 1, 49, 55, 31, 1, 53, 33, 31, 1, 54, 50, 31, 4, 64, 57, 31, 4, 54, 50, 15, 8, 51, 55, 47, 8, 64, 6, 15, 8, 51, 33, 31, 8, 54, 50, 7, 72, 51, 55, 15, 10, 51, 128, 7, 91, 32, 129, 36, 13, 54, 33, 15, 13, 64, 50, 31, 70, 53, 55, 7, 16, 51, 33, 7, 16, 64, 50, 15, 78, 64, 55, 15, 27, 51, 50, 41, 27, 51, 33, 7, 74, 35, 50, 7, 0, 1, 49, 55, 15, 1, 64, 55, 15, 1, 54, 33, 7, 1, 64, 50, 7, 4, 51, 55, 15, 23, 54, 50, 7, 8, 54, 33, 15, 8, 54, 57, 15, 10, 54, 129, 36, 10, 35, 55, 52, 13, 64, 33, 7, 13, 53, 50, 31, 16, 51, 55, 7, 17, 54, 33, 7, 17, 54, 50, 31, 27, 51, 57, 15, 0, 1, 64, 129, 103, 1, 35, 33, 31, 1, 54, 50, 52, 1, 51, 57, 31, 4, 53, 33, 7, 8, 49, 55, 41, 8, 53, 50, 15, 10, 35, 57, 7, 10, 54, 33, 15, 13, 64, 55, 15, 13, 51, 180, 34, 13, 64, 55, 31, 16, 32, 33, 31, 16, 51, 128, 28, 16, 49, 62, 31, 17, 35, 55, 31, 17, 51, 48, 31, 27, 35, 6, 7, 0, 1, 54, 180, 125, 1, 45, 55, 41, 4, 53, 33, 7, 8, 49, 6, 52, 8, 53, 75, 31, 8, 49, 111, 19, 10, 71, 55, 19, 10, 35, 33, 19, 13, 54

[0, 1, 2, 130, 67, 49, 128, 15, 67, 51, 100, 15, 23, 51, 129, 15, 23, 49, 185, 47, 72, 54, 128, 52, 72, 51, 55, 52, 91, 51, 100, 22, 70, 49, 128, 15, 70, 51, 55, 15, 78, 54, 128, 22, 78, 54, 111, 15, 90, 32, 55, 7, 90, 64, 48, 7, 74, 51, 178, 59, 0, 67, 51, 122, 15, 67, 64, 100, 7, 23, 53, 149, 7, 8, 54, 100, 15, 72, 53, 30, 15, 72, 54, 50, 15, 91, 64, 30, 15, 70, 53, 122, 22, 70, 53, 100, 22, 78, 54, 30, 22, 90, 64, 48, 22, 74, 51, 178, 66, 0, 67, 54, 100, 15, 67, 51, 122, 7, 23, 49, 100, 52, 23, 51, 100, 15, 72, 51, 30, 15, 72, 51, 100, 15, 91, 54, 30, 15, 70, 53, 50, 15, 78, 54, 218, 66, 78, 53, 111, 7, 78, 54, 30, 19, 90, 54, 48, 41, 74, 51, 111, 7, 74, 49, 149, 52, 0, 67, 54, 128, 15, 23, 54, 132, 19, 23, 51, 100, 7, 23, 51, 48, 52, 72, 54, 100, 52, 91, 54, 30, 19, 91, 51, 48, 19, 70, 35, 100, 15, 78, 51, 129, 31, 78, 53, 178, 19, 78, 51, 122, 7, 90, 51, 100, 52, 74, 51, 30, 52, 0, 67, 51, 129, 19, 67, 51, 122, 15, 67, 49, 55, 15, 23, 53, 46, 19, 72, 51, 122, 15, 91, 54, 100, 7, 7

Epoch: 1652 | Time: 5m 41s
	Train Loss: 0.124 | Train PPL:   1.132
	 Val. Loss: 7.368 |  Val. PPL: 1585.054
Epoch: 1653 | Time: 5m 42s
	Train Loss: 0.124 | Train PPL:   1.132
	 Val. Loss: 7.348 |  Val. PPL: 1552.718
Epoch: 1654 | Time: 5m 42s
	Train Loss: 0.124 | Train PPL:   1.132
	 Val. Loss: 7.360 |  Val. PPL: 1572.330
Epoch: 1655 | Time: 5m 42s
	Train Loss: 0.124 | Train PPL:   1.132
	 Val. Loss: 7.336 |  Val. PPL: 1533.879
Epoch: 1656 | Time: 5m 43s
	Train Loss: 0.124 | Train PPL:   1.132
	 Val. Loss: 7.334 |  Val. PPL: 1531.370
Epoch: 1657 | Time: 5m 42s
	Train Loss: 0.123 | Train PPL:   1.131
	 Val. Loss: 7.359 |  Val. PPL: 1570.448
Epoch: 1658 | Time: 5m 42s
	Train Loss: 0.123 | Train PPL:   1.131
	 Val. Loss: 7.391 |  Val. PPL: 1621.943
Epoch: 1659 | Time: 5m 42s
	Train Loss: 0.124 | Train PPL:   1.131
	 Val. Loss: 7.368 |  Val. PPL: 1584.065
Epoch: 1660 | Time: 5m 42s
	Train Loss: 0.123 | Train PPL:   1.131
	 Val. Loss: 7.383 |  Val. PPL: 1609.021
=> Saving checkpoint
Epoch: 

In [18]:
# checkpoint = {'model_state_dict': model.state_dict(),
#                   'optimizer_state_dict': optimizer.state_dict(),
#                   'valid_loss': valid_loss}
# save_checkpoint(destination_folder + checkpoint,N_EPOCHS)

In [None]:
output = open(folder + "/train_loss_log.pkl", 'wb')
pickle.dump(train_loss_log, output)
output.close()

output = open(folder + "/valid_loss_log.pkl", 'wb')
pickle.dump(valid_loss_log, output)
output.close()

In [19]:
best_model = Transformer(
    embedding_size,
    src_vocab_size,
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device,
).to(device)
optimizer = optim.Adam(best_model.parameters(), lr=0.001)

In [None]:
state = torch.load(destination_folder + '/1000_checkpoint.pt', map_location=device)
load_checkpoint(state, model, optimizer)

In [None]:
test_loss = evaluate(model, test_iter, criterion)
print(math.exp(test_loss))

In [None]:
generated_outputs = folder +  "/generated_samples_1000epochs"
Path(generated_outputs+"/main").mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/piano").mkdir(parents=True, exist_ok=True)
Path(generated_outputs+"/piano_predict").mkdir(parents=True, exist_ok=True)

In [None]:

df_intro = pd.read_csv(source_folder + '/test_torchtext.csv')
test_main = df_intro['main'].values
test_piano = df_intro['piano'].values
test_data=[]
for i in range(len(val_main)):
    temp_dict = {}
    temp_dict['main'] = test_main[i]
    temp_dict['piano'] = test_piano[i]
    test_data.append(temp_dict)
print(len(test_piano))

In [None]:
for i in range(0,len(test_piano)):
#     if len(test_intro) > 1200:
#         continue
    main = test_main[i]
    piano = test_piano[i]
    list_main = [int(x) for x in main.split(' ')]
    list_piano = [int(x) for x in piano.split(' ')]
    
    translated_sentence = translate_sentence(model, main, main_field, piano_field, device, max_length=3000)
        #print(translated_sentence)
    translated_sentence = [int(x) for x in translated_sentence if x != '<pad>' and x != '<sos>' and x != '<eos>' and x != '<unk>']
    print(translated_sentence)
    utils.write_midi(list_main, word2event, generated_outputs + "/main/" + "/main" + str(i)  + ".mid")
    utils.write_midi(list_piano, word2event, generated_outputs  + "/piano/" + "/piano" + str(i)  + ".mid")
    utils.write_midi(translated_sentence, word2event, generated_outputs + "/piano_predict/" + "/piano_predict" + str(i)  + ".mid")
    print(i)
#     if i == 10:
#         break
        


In [None]:
import mido
for i in range(11):
    piano = mido.MidiFile(generated_outputs + "/piano/" + '/piano' + str(i) + '.mid')
    main = mido.MidiFile(generated_outputs + "/main/" +'/main' + str(i) + '.mid')
    predict = mido.MidiFile(generated_outputs + "/piano_predict/" +'/piano_predict' + str(i) + '.mid')

    piano.tracks[1].name = "piano"
    main.tracks[1].name = "main"
    predict.tracks[1].name = "piano_predict"
    merged_mid = mido.MidiFile()
    merged_mid.ticks_per_beat = main.ticks_per_beat
    merged_mid.tracks = piano.tracks + main.tracks 
    merged_mid.save(generated_outputs + '/merged' + str(i) + '.mid')
    
    merged_mid = mido.MidiFile()
    merged_mid.ticks_per_beat = main.ticks_per_beat
    merged_mid.tracks = predict.tracks + main.tracks 
    merged_mid.save(generated_outputs + '/merged_predict' + str(i) + '.mid')

In [26]:
# dissimilar_interpolation
for i in range(0,len(test_intro)):
#     if len(test_intro) > 1200:
#         continue
    intro = test_intro[i]
    #solo = test_solo[i]
    if i + 3 < (len(test_intro)):
        outro = test_outro[i+3]
    else:
        outro = test_outro[i]
    #print(intro)
    #print(outro)
    list_intro = [int(x) for x in intro.split(' ')]
    #list_solo = [int(x) for x in solo.split(' ')]
    list_outro = [int(x) for x in outro.split(' ')]
    #print(list_sentence)
    translated_sentence = translate_sentence(model, intro, outro, intro_field, outro_field, solo_field, device, max_length=1200)
    #print(translated_sentence)
    translated_sentence = [int(x) for x in translated_sentence if x != '<pad>' and x != '<sos>' and x != '<eos>' and x != '<unk>']
    print(translated_sentence)
    utils.write_midi(list_intro, word2event, dissimilar_interpolation + "/intro/" + "/intro" + str(i)  + ".mid")
    #utils.write_midi(list_solo, word2event, generated_outputs  + "/solo/" + "/solo" + str(i)  + ".mid")
    utils.write_midi(list_outro, word2event, dissimilar_interpolation + "/outro/" + "/outro" + str(i)  + ".mid")
    utils.write_midi(translated_sentence, word2event, dissimilar_interpolation + "/predict/" + "/predict" + str(i)  + ".mid")
    print(i)
#     if i == 10:
#         break
        


NameError: name 'test_intro' is not defined

In [None]:
import mido
for i in range(len(test_intro)):
    intro = mido.MidiFile(dissimilar_interpolation + "/intro/" + '/intro' + str(i) + '.mid')
    outro = mido.MidiFile(dissimilar_interpolation + "/outro/" +'/outro' + str(i) + '.mid')
    predict = mido.MidiFile(dissimilar_interpolation + "/predict/" +'/predict' + str(i) + '.mid')
    total_intro_time = 0
    total_solo_time = 0
    total_predict_time = 0
    for msg in intro.tracks[1]:
        if msg.type == "note_on":
            total_intro_time += msg.time
    for msg in predict.tracks[1]:
        if msg.type == "note_on":
            total_predict_time += msg.time
            
    original_outro_time = 0 + outro.tracks[1][1].time
    
    print(original_outro_time + total_predict_time + total_intro_time)
    predict.tracks[1][1].time += total_intro_time
    outro.tracks[1][1].time = original_outro_time + total_predict_time + total_intro_time
    print(outro.tracks[1][1].time)
    merged_mid = mido.MidiFile()
    merged_mid.ticks_per_beat = intro.ticks_per_beat
    merged_mid.tracks = intro.tracks + predict.tracks + outro.tracks
    merged_mid.save(dissimilar_interpolation + '/merged_predict' + str(i) + '.mid')

In [None]:
class BeamSearchNode(object):
    def __init__(self, prev_node, wid, logp, length):
        self.prev_node = prev_node
        self.wid = wid
        self.logp = logp
        self.length = length

    def eval(self):
        return self.logp / float(self.length - 1 + 1e-6)
# }}}
import copy
from heapq import heappush, heappop

In [None]:
def translate_sentence_beam(model, sentence, german, english, device, max_length=1200,beam_width=2,max_dec_steps=25000):
    
    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    tokens = [token.lower() for token in sentence.split(' ')]
    # print(tokens)

    # sys.exit()
    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    eos_token = english.vocab.stoi["<eos>"]
    sos_token = english.vocab.stoi["<sos>"]
    
    # Go through each german token and convert to an index
    text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    outputs = [english.vocab.stoi["<sos>"]]
    
    n_best_list = []
    
     
    #trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

    #first token as input
    trg_tensor = torch.LongTensor(outputs).to(device)
    
    end_nodes = []

    #starting node
    node = BeamSearchNode(prev_node=None, wid=trg_tensor, logp=0, length=1)

    nodes = []

    heappush(nodes, (-node.eval(), id(node), node))
    n_dec_steps = 0

    while True:
        # Give up when decoding takes too long
        if n_dec_steps > max_dec_steps:
            break
        
        # Fetch the best node
        #print([n[2].wid for n in nodes])
        score, _, n = heappop(nodes)
        decoder_input = n.wid
        
        if n.wid.item() == eos_token and n.prev_node is not None:
            end_nodes.append((score, id(n), n))
            # If we reached maximum # of sentences required
            if len(end_nodes) >= beam_width:
                break
            else:
                continue
   
        sequence = [n.wid.item()]
        a = n
        while a.prev_node is not None:
            a = a.prev_node
            sequence.append(a.wid.item())
        sequence = sequence[::-1] # reverse
        
        #print(sequence)
        
        with torch.no_grad():
            output = model(sentence_tensor, torch.LongTensor(sequence).unsqueeze(1).to(device))
        
        # Get top-k from this decoded result
        topk_log_prob, topk_indexes = torch.topk(output, beam_width)
        #print(topk_indexes)
        #print(topk_log_prob)
        # Then, register new top-k nodes
        for new_k in range(beam_width):
            decoded_t = topk_indexes[0][0][new_k].view(1) # (1)
            logp = topk_log_prob[0][0][new_k].item() # float log probability val

            node = BeamSearchNode(prev_node=n,
                                  wid=decoded_t,
                                  logp=n.logp+logp,
                                  length=n.length+1)
            heappush(nodes, (-node.eval(), id(node), node))
        n_dec_steps += beam_width
        #print(n_dec_steps)
    # if there are no end_nodes, retrieve best nodes (they are probably truncated)
    if len(end_nodes) == 0:
        end_nodes = [heappop(nodes) for _ in range(beam_width)]

    # Construct sequences from end_nodes
    n_best_seq_list = []
    for score, _id, n in sorted(end_nodes, key=lambda x: x[0]):
        sequence = [n.wid.item()]
        # back trace from end node
        while n.prev_node is not None:
            n = n.prev_node
            sequence.append(n.wid.item())
        sequence = sequence[::-1] # reverse

        n_best_seq_list.append(sequence)


    # return n_best_seq_list

    translated_sentence = [english.vocab.itos[idx] for idx in n_best_seq_list[0]]

    # remove start token
    return translated_sentence


In [None]:
def save_vocab(vocab, path):
    output = open(path, 'wb')
    pickle.dump(vocab, output)
    output.close()

In [None]:
save_vocab(intro_field.vocab, vocab + '/intro_vocab.pkl')
save_vocab(solo_field.vocab, vocab + '/solo_vocab.pkl')
save_vocab(outro_field.vocab, vocab + '/outro_vocab.pkl')

In [None]:
def bleu_translate_sentence(model, sentence, german, english, device, max_length=1200):

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    #tokens = [token.lower() for token in sentence.split(' ')]
    # print(tokens)

    # sys.exit()
    # Add <SOS> and <EOS> in beginning and end respectively
    #tokens.insert(0, german.init_token)
    #tokens.append(german.eos_token)

    # Go through each german token and convert to an index
    #text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(sentence).unsqueeze(1).to(device)

    outputs = [english.vocab.stoi["<sos>"]]
    
    for i in range(max_length):
        trg_tensor = torch.LongTensor(outputs).unsqueeze(1).to(device)

        with torch.no_grad():
            output = model(sentence_tensor, trg_tensor)

        best_guess = output.argmax(2)[-1, :].item()
        outputs.append(best_guess)

        if best_guess == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]

    # remove start token
    return translated_sentence


In [None]:
from torchtext.data.metrics import bleu_score

def bleu(data, model, german, english, device):
    targets = []
    outputs = []
    print(len(data))
    for example in data:
        #print( vars(example))
        src = vars(example)["intro"]
        trg = vars(example)["solo"]
        
        src = [int(x) for x in src]
        trg = [int(x) for x in trg]
        
        if len(trg) > 1200 or len(src) > 1200:
            continue
        
        prediction = bleu_translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append(trg)
        outputs.append(prediction)

    return bleu_score(outputs, targets)

In [None]:
# running on entire test data takes a while
score = bleu(test[1:10], model, intro_field, solo_field, device)
print(f"Bleu score {score * 100:.2f}")

In [None]:
# torch.backends.cudnn.enabled = False

In [None]:
train_loss_list, valid_loss_list, global_steps_list = load_metrics(destination_folder + '/metrics.pt')
plt.plot(global_steps_list, train_loss_list, label='Train')
plt.plot(global_steps_list, valid_loss_list, label='Valid')
plt.xlabel('Global Steps')
plt.ylabel('Loss')
plt.legend()
plt.show() 

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns