# CS 287 - HW 4

In [1]:
!pip install -q torch torchtext opt_einsum git+https://github.com/harvardnlp/namedtensor

In [8]:
import random
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torchtext
from torchtext.vocab import Vectors, GloVe
from namedtensor import ntorch, NamedTensor
from namedtensor.text import NamedField

In [3]:
# load data
TEXT = NamedField(names=('seqlen',)) # Our input $x$
LABEL = NamedField(sequential=False, names=()) # Our labels $y$
train, val, test = torchtext.datasets.SNLI.splits(TEXT, LABEL)
print('len(train)', len(train))
TEXT.build_vocab(train)
LABEL.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))
print('len(LABEL.vocab)', len(LABEL.vocab))
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
    (train, val, test), batch_size=16, device=torch.device("cuda"), repeat=False)

len(train) 549367
len(TEXT.vocab) 62998
len(LABEL.vocab) 4


In [4]:
# build the vocabulary with word embeddings
# out-of-vocabulary words are hashed to one of 100 random embeddings each initialized to mean 0, stdev 1 (Sec 5.1)
unk_vectors = [torch.randn(300) for _ in range(100)]
TEXT.vocab.load_vectors(vectors='glove.6B.300d', unk_init=lambda x:random.choice(unk_vectors))
vectors = TEXT.vocab.vectors
vectors = vectors / vectors.norm(dim=1, keepdim=True) # normalized to have l_2 norm of 1
vectors = NamedTensor(vectors, ('word', 'embedding'))
TEXT.vocab.vectors = vectors
print("word embeddings shape:", TEXT.vocab.vectors.shape)

word embeddings shape: OrderedDict([('word', 62998), ('embedding', 300)])


In [7]:
# here's an example of a training example
batch = next(iter(train_iter))
print("Size of premise batch:", batch.premise.shape)
print("Size of hypothesis batch:", batch.hypothesis.shape)
print("Size of label batch:", batch.label.shape)

Size of premise batch: OrderedDict([('seqlen', 26), ('batch', 16)])
Size of hypothesis batch: OrderedDict([('seqlen', 22), ('batch', 16)])
Size of label batch: OrderedDict([('batch', 16)])


## Vanilla Decomposable Attention Model

In [90]:
# dimensions
input_size = TEXT.vocab.vectors.shape['word']
embed_size = TEXT.vocab.vectors.shape['embedding']
hidden_size1 = 200
hidden_size2 = hidden_size1 * 2
output_size = len(LABEL.vocab)
print('DIMENSIONS -- input: %d, embed: %d, hidden1: %d, hidden2: %d, output: %d'%(input_size, embed_size, hidden_size1, hidden_size2, output_size))

DIMENSIONS -- input: 62998, embed: 300, hidden1: 200, hidden2: 400, output: 4


In [91]:
# pre-trained embeddings
weights = TEXT.vocab.vectors.values.cuda()
weights.shape

torch.Size([62998, 300])

In [92]:
class EmbedProject(torch.nn.Module):
    def __init__(self, weights, embed_size, project_size):
        super(EmbedProject, self).__init__()
        self.embed = nn.Embedding.from_pretrained(weights, freeze=True) # weights: input_size x embed_size
        self.linear = nn.Linear(embed_size, project_size)
        torch.nn.init.normal_(self.linear.weight, mean=0, std=0.01)
    def forward(self, inputs):
        embedding = self.embed(inputs)
        output = self.linear(embedding)
        return output

In [93]:
EP1 = EmbedProject(weights, embed_size, hidden_size1).cuda()
EP1

EmbedProject(
  (embed): Embedding(62998, 300)
  (linear): Linear(in_features=300, out_features=200, bias=True)
)

In [94]:
sent1 = batch.premise.values.transpose(0,1)
sent2 = batch.hypothesis.values.transpose(0,1)
sent1.shape, sent2.shape

(torch.Size([16, 26]), torch.Size([16, 22]))

In [95]:
proj1 = EP1(sent1)
proj2 = EP1(sent2)
proj1.shape, proj2.shape

(torch.Size([16, 26, 200]), torch.Size([16, 22, 200]))

In [96]:
class FeedForwardF(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.2):
        super(FeedForwardF, self).__init__()
        self.d = nn.Dropout(dropout)
        self.m = nn.ReLU()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)
        for param in self.parameters():
            torch.nn.init.normal_(param, mean=0, std=0.01)
    def forward(self, inputs):
        hidden = self.m(self.linear1(self.d(inputs)))
        output = self.m(self.linear2(self.d(hidden)))
        return output

In [97]:
F1 = FeedForwardF(hidden_size1, hidden_size1, hidden_size1).cuda()
F1

FeedForwardF(
  (d): Dropout(p=0.2)
  (m): ReLU()
  (linear1): Linear(in_features=200, out_features=200, bias=True)
  (linear2): Linear(in_features=200, out_features=200, bias=True)
)

In [98]:
f1 = F1(proj1)
f2 = F1(proj2)
f1.shape, f2.shape

(torch.Size([16, 26, 200]), torch.Size([16, 22, 200]))

In [99]:
score1 = torch.bmm(f1, f2.transpose(1,2))
score2 = score1.transpose(1,2)
score1.shape, score2.shape

(torch.Size([16, 26, 22]), torch.Size([16, 22, 26]))

In [100]:
prob1 = F.softmax(score1, dim=2)
prob2 = F.softmax(score2, dim=2)
prob1.shape, prob2.shape

(torch.Size([16, 26, 22]), torch.Size([16, 22, 26]))

In [101]:
proj1_soft = torch.bmm(prob2, proj1)
proj2_soft = torch.bmm(prob1, proj2)
proj1_soft.shape, proj2_soft.shape

(torch.Size([16, 22, 200]), torch.Size([16, 26, 200]))

In [102]:
proj1_combined = torch.cat((proj1, proj2_soft), dim=2)
proj2_combined = torch.cat((proj2, proj1_soft), dim=2)
proj1_combined.shape, proj2_combined.shape

(torch.Size([16, 26, 400]), torch.Size([16, 22, 400]))

In [103]:
class FeedForwardG(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.2):
        super(FeedForwardG, self).__init__()
        self.d = nn.Dropout(dropout)
        self.m = nn.ReLU()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)
        for param in self.parameters():
            torch.nn.init.normal_(param, mean=0, std=0.01)
    def forward(self, inputs):
        hidden = self.m(self.linear1(self.d(inputs)))
        output = self.m(self.linear2(self.d(hidden)))
        return output

In [104]:
G1 = FeedForwardG(hidden_size2, hidden_size1, hidden_size1).cuda()
G1

FeedForwardG(
  (d): Dropout(p=0.2)
  (m): ReLU()
  (linear1): Linear(in_features=400, out_features=200, bias=True)
  (linear2): Linear(in_features=200, out_features=200, bias=True)
)

In [105]:
g1 = G1(proj1_combined)
g2 = G1(proj2_combined)
g1.shape, g2.shape

(torch.Size([16, 26, 200]), torch.Size([16, 22, 200]))

In [106]:
g1_sum = g1.sum(dim=1)
g2_sum = g2.sum(dim=1)
g1_sum.shape, g2_sum.shape

(torch.Size([16, 200]), torch.Size([16, 200]))

In [107]:
g_all = torch.cat((g1_sum, g2_sum), dim=1)
g_all.shape

torch.Size([16, 400])

In [108]:
class FeedForwardH(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout=0.2):
        super(FeedForwardH, self).__init__()
        self.d = nn.Dropout(dropout)
        self.m = nn.ReLU()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, output_size)
        for param in self.parameters():
            torch.nn.init.normal_(param, mean=0, std=0.01)
    def forward(self, inputs):
        hidden1 = self.m(self.linear1(self.d(inputs)))
        hidden2 = self.m(self.linear2(self.d(hidden1)))
        output = self.linear3(hidden2)
        return output

In [109]:
H1 = FeedForwardH(hidden_size2, hidden_size1, output_size).cuda()
H1

FeedForwardH(
  (d): Dropout(p=0.2)
  (m): ReLU()
  (linear1): Linear(in_features=400, out_features=200, bias=True)
  (linear2): Linear(in_features=200, out_features=200, bias=True)
  (linear3): Linear(in_features=200, out_features=4, bias=True)
)

In [110]:
h_all = H1(g_all)
h_all.shape

torch.Size([16, 4])

In [112]:
target = batch.label.values
target.shape

torch.Size([16])

In [113]:
criterion = nn.CrossEntropyLoss()
loss = criterion(out_all, target)
loss

tensor(1.3871, device='cuda:0', grad_fn=<NllLossBackward>)

In [121]:
parameters = [param for param in EP1.parameters()] # embed, lnr, bias
print(len(parameters))
parameters.extend([param for param in F1.parameters()]) # lnr1, bias1, lnr2, bias2
print(len(parameters))
parameters.extend([param for param in G1.parameters()]) # lnr1, bias1, lnr2, bias2
print(len(parameters))
parameters.extend([param for param in H1.parameters()]) # lnr1, bias1, lnr2, bias2, lnr3, bias3
print(len(parameters))

3
7
11
17


In [122]:
optimizer = torch.optim.Adagrad(parameters, lr=0.05, initial_accumulator_value=0.1)

In [136]:
for param in EP1.parameters():
    emb1 = param
    break
i = 0
for param in H1.parameters():
    if i == 5:
        print(param)
    i += 1

Parameter containing:
tensor([-0.1111, -0.0486, -0.0384,  0.1044], device='cuda:0',
       requires_grad=True)


In [137]:
EP1.train()
F1.train()
G1.train()
H1.train()
optimizer.zero_grad()

sent1 = batch.premise.values.transpose(0,1)
sent2 = batch.hypothesis.values.transpose(0,1)
target = batch.label.values

proj1 = EP1(sent1)
proj2 = EP1(sent2)
f1 = F1(proj1)
f2 = F1(proj2)
score1 = torch.bmm(f1, f2.transpose(1,2))
score2 = score1.transpose(1,2)
prob1 = F.softmax(score1, dim=2)
prob2 = F.softmax(score2, dim=2)
proj1_soft = torch.bmm(prob2, proj1)
proj2_soft = torch.bmm(prob1, proj2)
proj1_combined = torch.cat((proj1, proj2_soft), dim=2)
proj2_combined = torch.cat((proj2, proj1_soft), dim=2)
g1 = G1(proj1_combined)
g2 = G1(proj2_combined)
g1_sum = g1.sum(dim=1)
g2_sum = g2.sum(dim=1)
g_all = torch.cat((g1_sum, g2_sum), dim=1)
h_all = H1(g_all)

loss = criterion(h_all, target)
loss.backward()
optimizer.step()

In [138]:
for param in EP1.parameters():
    emb2 = param
    break
i = 0
for param in H1.parameters():
    if i == 5:
        print(param)
    i += 1

Parameter containing:
tensor([-0.1295, -0.0567, -0.0467,  0.1243], device='cuda:0',
       requires_grad=True)


In [142]:
torch.sum(emb1 != emb2)

tensor(0, device='cuda:0')

In [156]:
acc = torch.sum(torch.argmax(h_all, dim=1) == target).item() / target.shape[0]
acc

0.625

In [155]:
loss

tensor(1.3147, device='cuda:0', grad_fn=<NllLossBackward>)

## Train

In [176]:
def get_output(sent1, sent2, EP1, F1, G1, H1):
    proj1 = EP1(sent1)
    proj2 = EP1(sent2)
    f1 = F1(proj1)
    f2 = F1(proj2)
    score1 = torch.bmm(f1, f2.transpose(1,2))
    score2 = score1.transpose(1,2)
    prob1 = F.softmax(score1, dim=2)
    prob2 = F.softmax(score2, dim=2)
    proj1_soft = torch.bmm(prob2, proj1)
    proj2_soft = torch.bmm(prob1, proj2)
    proj1_combined = torch.cat((proj1, proj2_soft), dim=2)
    proj2_combined = torch.cat((proj2, proj1_soft), dim=2)
    g1 = G1(proj1_combined)
    g2 = G1(proj2_combined)
    g1_sum = g1.sum(dim=1)
    g2_sum = g2.sum(dim=1)
    g_all = torch.cat((g1_sum, g2_sum), dim=1)
    h_all = H1(g_all)
    return h_all

In [177]:
def training_loop(e, train_iter, EP1, F1, G1, H1, criterion, optimizer):
    EP1.train()
    F1.train()
    G1.train()
    H1.train()
    
    for ix,batch in enumerate(train_iter):
        optimizer.zero_grad()
        sent1 = batch.premise.values.transpose(0,1)
        sent2 = batch.hypothesis.values.transpose(0,1)
        target = batch.label.values
        output = get_output(sent1, sent2, EP1, F1, G1, H1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if ix % 1000 == 0:
            acc = torch.sum(torch.argmax(output, dim=1) == target).item() / target.shape[0]
            print('Epoch: {0}, Batch: {1}, Train NLL: {2:0.4f}, Train Acc:{3:0.4f}'.format(e, ix, loss.cpu().detach(), acc))

In [178]:
def validation_loop(e, val_iter, EP1, F1, G1, H1, criterion):
    EP1.eval()
    F1.eval()
    G1.eval()
    H1.eval()
    
    total_loss = 0
    total_sent = 0
    total_correct = 0
    
    for ix,batch in enumerate(val_iter):
        sent1 = batch.premise.values.transpose(0,1)
        sent2 = batch.hypothesis.values.transpose(0,1)
        target = batch.label.values
        output = get_output(sent1, sent2, EP1, F1, G1, H1)
        
        loss = criterion(output, target).item()
        sent = sent1.shape[0]
        correct = torch.sum(torch.argmax(output, dim=1) == target).item()
        
        total_loss += loss*sent
        total_sent += sent
        total_correct += correct
    
    print('Epoch: {0}, Val NLL: {1:0.4f}, Val Acc: {2:0.4f}'.format(e, total_loss/total_sent, total_correct/total_sent))
    return total_loss

In [179]:
best_loss = 1e8
EP1 = EmbedProject(weights, embed_size, hidden_size1).cuda()
F1 = FeedForwardF(hidden_size1, hidden_size1, hidden_size1).cuda()
G1 = FeedForwardG(hidden_size2, hidden_size1, hidden_size1).cuda()
H1 = FeedForwardH(hidden_size2, hidden_size1, output_size).cuda()

parameters = [param for param in EP1.parameters()] # embed, lnr, bias
parameters.extend([param for param in F1.parameters()]) # lnr1, bias1, lnr2, bias2
parameters.extend([param for param in G1.parameters()]) # lnr1, bias1, lnr2, bias2
parameters.extend([param for param in H1.parameters()]) # lnr1, bias1, lnr2, bias2, lnr3, bias3

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(parameters, lr=0.05, initial_accumulator_value=0.1)

for e in range(100):
    training_loop(e, train_iter, EP1, F1, G1, H1, criterion, optimizer)
    loss = validation_loop(e, val_iter, EP1, F1, G1, H1, criterion)
    if loss < best_loss:
        torch.save(EP1.state_dict(),'best_EP1.pt')
        torch.save(F1.state_dict(),'best_F1.pt')
        torch.save(G1.state_dict(),'best_G1.pt')
        torch.save(H1.state_dict(),'best_H1.pt')
        best_loss = loss
        print('WROTE MODEL')

Epoch: 0, Batch: 0, Train NLL: 1.3873, Train Acc:0.0000
Epoch: 0, Batch: 1000, Train NLL: 1.1520, Train Acc:0.1250
Epoch: 0, Batch: 2000, Train NLL: 1.0946, Train Acc:0.3750
Epoch: 0, Batch: 3000, Train NLL: 1.1047, Train Acc:0.2500
Epoch: 0, Batch: 4000, Train NLL: 1.1011, Train Acc:0.1875
Epoch: 0, Batch: 5000, Train NLL: 1.0955, Train Acc:0.3750
Epoch: 0, Batch: 6000, Train NLL: 1.0917, Train Acc:0.3125
Epoch: 0, Batch: 7000, Train NLL: 1.1013, Train Acc:0.2500
Epoch: 0, Batch: 8000, Train NLL: 1.0966, Train Acc:0.4375
Epoch: 0, Batch: 9000, Train NLL: 1.0991, Train Acc:0.3750
Epoch: 0, Batch: 10000, Train NLL: 1.0853, Train Acc:0.4375
Epoch: 0, Batch: 11000, Train NLL: 1.0906, Train Acc:0.5625
Epoch: 0, Batch: 12000, Train NLL: 1.0966, Train Acc:0.3125
Epoch: 0, Batch: 13000, Train NLL: 1.1013, Train Acc:0.2500
Epoch: 0, Batch: 14000, Train NLL: 1.0933, Train Acc:0.4375
Epoch: 0, Batch: 15000, Train NLL: 1.0978, Train Acc:0.3750
Epoch: 0, Batch: 16000, Train NLL: 1.0977, Train Acc:

Epoch: 3, Batch: 30000, Train NLL: 0.8155, Train Acc:0.6250
Epoch: 3, Batch: 31000, Train NLL: 0.9052, Train Acc:0.6250
Epoch: 3, Batch: 32000, Train NLL: 0.7352, Train Acc:0.6250
Epoch: 3, Batch: 33000, Train NLL: 0.8067, Train Acc:0.6250
Epoch: 3, Batch: 34000, Train NLL: 0.6664, Train Acc:0.7500
Epoch: 3, Val NLL: 0.7658, Val Acc: 0.6622
WROTE MODEL
Epoch: 4, Batch: 0, Train NLL: 0.6772, Train Acc:0.7500
Epoch: 4, Batch: 1000, Train NLL: 1.1631, Train Acc:0.3125
Epoch: 4, Batch: 2000, Train NLL: 0.7690, Train Acc:0.6875
Epoch: 4, Batch: 3000, Train NLL: 0.6274, Train Acc:0.7500
Epoch: 4, Batch: 4000, Train NLL: 0.7589, Train Acc:0.7500
Epoch: 4, Batch: 5000, Train NLL: 0.8741, Train Acc:0.6875
Epoch: 4, Batch: 6000, Train NLL: 0.8634, Train Acc:0.8125
Epoch: 4, Batch: 7000, Train NLL: 0.5980, Train Acc:0.8125
Epoch: 4, Batch: 8000, Train NLL: 0.6585, Train Acc:0.8750
Epoch: 4, Batch: 9000, Train NLL: 0.8687, Train Acc:0.6250
Epoch: 4, Batch: 10000, Train NLL: 0.7466, Train Acc:0.687

Epoch: 7, Batch: 24000, Train NLL: 0.6300, Train Acc:0.7500
Epoch: 7, Batch: 25000, Train NLL: 0.7236, Train Acc:0.8125
Epoch: 7, Batch: 26000, Train NLL: 0.8028, Train Acc:0.5000
Epoch: 7, Batch: 27000, Train NLL: 0.6996, Train Acc:0.7500
Epoch: 7, Batch: 28000, Train NLL: 0.6306, Train Acc:0.8125
Epoch: 7, Batch: 29000, Train NLL: 0.9126, Train Acc:0.6250
Epoch: 7, Batch: 30000, Train NLL: 0.6708, Train Acc:0.6875
Epoch: 7, Batch: 31000, Train NLL: 0.5780, Train Acc:0.8125
Epoch: 7, Batch: 32000, Train NLL: 0.8668, Train Acc:0.7500
Epoch: 7, Batch: 33000, Train NLL: 0.6438, Train Acc:0.8125
Epoch: 7, Batch: 34000, Train NLL: 0.4982, Train Acc:0.8750
Epoch: 7, Val NLL: 0.6520, Val Acc: 0.7239
WROTE MODEL
Epoch: 8, Batch: 0, Train NLL: 0.8189, Train Acc:0.6250
Epoch: 8, Batch: 1000, Train NLL: 0.7028, Train Acc:0.8125
Epoch: 8, Batch: 2000, Train NLL: 0.7617, Train Acc:0.6250
Epoch: 8, Batch: 3000, Train NLL: 0.5058, Train Acc:0.8750
Epoch: 8, Batch: 4000, Train NLL: 0.6435, Train Acc:

Epoch: 11, Batch: 17000, Train NLL: 0.6211, Train Acc:0.6875
Epoch: 11, Batch: 18000, Train NLL: 0.5524, Train Acc:0.8125
Epoch: 11, Batch: 19000, Train NLL: 0.2261, Train Acc:0.9375
Epoch: 11, Batch: 20000, Train NLL: 0.8784, Train Acc:0.6250
Epoch: 11, Batch: 21000, Train NLL: 0.6537, Train Acc:0.6875
Epoch: 11, Batch: 22000, Train NLL: 0.5534, Train Acc:0.7500
Epoch: 11, Batch: 23000, Train NLL: 0.7764, Train Acc:0.5625
Epoch: 11, Batch: 24000, Train NLL: 0.7684, Train Acc:0.7500
Epoch: 11, Batch: 25000, Train NLL: 0.6691, Train Acc:0.8125
Epoch: 11, Batch: 26000, Train NLL: 0.8817, Train Acc:0.7500
Epoch: 11, Batch: 27000, Train NLL: 0.4092, Train Acc:0.8125
Epoch: 11, Batch: 28000, Train NLL: 0.6220, Train Acc:0.8125
Epoch: 11, Batch: 29000, Train NLL: 0.7412, Train Acc:0.6875
Epoch: 11, Batch: 30000, Train NLL: 0.7141, Train Acc:0.7500
Epoch: 11, Batch: 31000, Train NLL: 0.6921, Train Acc:0.8125
Epoch: 11, Batch: 32000, Train NLL: 0.5122, Train Acc:0.8125
Epoch: 11, Batch: 33000,

Epoch: 15, Batch: 9000, Train NLL: 1.1243, Train Acc:0.5625
Epoch: 15, Batch: 10000, Train NLL: 0.7642, Train Acc:0.6875
Epoch: 15, Batch: 11000, Train NLL: 0.7607, Train Acc:0.7500
Epoch: 15, Batch: 12000, Train NLL: 0.6065, Train Acc:0.7500
Epoch: 15, Batch: 13000, Train NLL: 0.5519, Train Acc:0.6875
Epoch: 15, Batch: 14000, Train NLL: 0.5612, Train Acc:0.7500
Epoch: 15, Batch: 15000, Train NLL: 0.4699, Train Acc:0.8750
Epoch: 15, Batch: 16000, Train NLL: 0.6888, Train Acc:0.7500
Epoch: 15, Batch: 17000, Train NLL: 0.7189, Train Acc:0.6875
Epoch: 15, Batch: 18000, Train NLL: 0.4217, Train Acc:0.8125
Epoch: 15, Batch: 19000, Train NLL: 0.3263, Train Acc:0.9375
Epoch: 15, Batch: 20000, Train NLL: 0.6567, Train Acc:0.7500
Epoch: 15, Batch: 21000, Train NLL: 0.7054, Train Acc:0.6250
Epoch: 15, Batch: 22000, Train NLL: 0.7563, Train Acc:0.6875
Epoch: 15, Batch: 23000, Train NLL: 0.6045, Train Acc:0.6875
Epoch: 15, Batch: 24000, Train NLL: 0.6711, Train Acc:0.5625
Epoch: 15, Batch: 25000, 

Epoch: 19, Batch: 1000, Train NLL: 0.6507, Train Acc:0.7500
Epoch: 19, Batch: 2000, Train NLL: 0.6789, Train Acc:0.5625
Epoch: 19, Batch: 3000, Train NLL: 0.4985, Train Acc:0.8125
Epoch: 19, Batch: 4000, Train NLL: 0.8272, Train Acc:0.6875
Epoch: 19, Batch: 5000, Train NLL: 0.8801, Train Acc:0.6875
Epoch: 19, Batch: 6000, Train NLL: 0.3999, Train Acc:0.8125
Epoch: 19, Batch: 7000, Train NLL: 0.7600, Train Acc:0.6250
Epoch: 19, Batch: 8000, Train NLL: 0.4906, Train Acc:0.8125
Epoch: 19, Batch: 9000, Train NLL: 0.5009, Train Acc:0.8125
Epoch: 19, Batch: 10000, Train NLL: 0.6599, Train Acc:0.6875
Epoch: 19, Batch: 11000, Train NLL: 0.5320, Train Acc:0.8125
Epoch: 19, Batch: 12000, Train NLL: 0.8680, Train Acc:0.5625
Epoch: 19, Batch: 13000, Train NLL: 0.5603, Train Acc:0.7500
Epoch: 19, Batch: 14000, Train NLL: 0.3729, Train Acc:0.8750
Epoch: 19, Batch: 15000, Train NLL: 0.8230, Train Acc:0.5625
Epoch: 19, Batch: 16000, Train NLL: 0.4943, Train Acc:0.7500
Epoch: 19, Batch: 17000, Train NL

Epoch: 22, Batch: 29000, Train NLL: 0.5138, Train Acc:0.7500
Epoch: 22, Batch: 30000, Train NLL: 0.5052, Train Acc:0.8750
Epoch: 22, Batch: 31000, Train NLL: 0.6078, Train Acc:0.8125
Epoch: 22, Batch: 32000, Train NLL: 0.3653, Train Acc:0.8750
Epoch: 22, Batch: 33000, Train NLL: 0.5560, Train Acc:0.7500
Epoch: 22, Batch: 34000, Train NLL: 0.5746, Train Acc:0.8125
Epoch: 22, Val NLL: 0.5847, Val Acc: 0.7618
WROTE MODEL
Epoch: 23, Batch: 0, Train NLL: 0.4413, Train Acc:0.7500
Epoch: 23, Batch: 1000, Train NLL: 0.7404, Train Acc:0.6250
Epoch: 23, Batch: 2000, Train NLL: 0.7099, Train Acc:0.6250
Epoch: 23, Batch: 3000, Train NLL: 0.5353, Train Acc:0.7500
Epoch: 23, Batch: 4000, Train NLL: 0.7033, Train Acc:0.6250
Epoch: 23, Batch: 5000, Train NLL: 0.3341, Train Acc:0.9375
Epoch: 23, Batch: 6000, Train NLL: 0.5568, Train Acc:0.8125
Epoch: 23, Batch: 7000, Train NLL: 0.4564, Train Acc:0.8125
Epoch: 23, Batch: 8000, Train NLL: 0.4960, Train Acc:0.7500
Epoch: 23, Batch: 9000, Train NLL: 0.7518

Epoch: 26, Batch: 21000, Train NLL: 0.3917, Train Acc:0.8750
Epoch: 26, Batch: 22000, Train NLL: 0.6910, Train Acc:0.7500
Epoch: 26, Batch: 23000, Train NLL: 0.6221, Train Acc:0.7500
Epoch: 26, Batch: 24000, Train NLL: 0.7829, Train Acc:0.6250
Epoch: 26, Batch: 25000, Train NLL: 0.4272, Train Acc:0.8750
Epoch: 26, Batch: 26000, Train NLL: 0.5013, Train Acc:0.8125
Epoch: 26, Batch: 27000, Train NLL: 0.8522, Train Acc:0.6250
Epoch: 26, Batch: 28000, Train NLL: 0.3609, Train Acc:0.8125
Epoch: 26, Batch: 29000, Train NLL: 0.4394, Train Acc:0.7500
Epoch: 26, Batch: 30000, Train NLL: 0.5271, Train Acc:0.7500
Epoch: 26, Batch: 31000, Train NLL: 0.4842, Train Acc:0.8125
Epoch: 26, Batch: 32000, Train NLL: 0.4137, Train Acc:0.7500
Epoch: 26, Batch: 33000, Train NLL: 0.6246, Train Acc:0.7500
Epoch: 26, Batch: 34000, Train NLL: 0.6720, Train Acc:0.7500
Epoch: 26, Val NLL: 0.5789, Val Acc: 0.7610
Epoch: 27, Batch: 0, Train NLL: 0.8201, Train Acc:0.6250
Epoch: 27, Batch: 1000, Train NLL: 0.7646, Tr

Epoch: 30, Batch: 14000, Train NLL: 0.8464, Train Acc:0.4375
Epoch: 30, Batch: 15000, Train NLL: 0.7288, Train Acc:0.6250
Epoch: 30, Batch: 16000, Train NLL: 0.6558, Train Acc:0.7500
Epoch: 30, Batch: 17000, Train NLL: 0.2875, Train Acc:0.9375
Epoch: 30, Batch: 18000, Train NLL: 0.5003, Train Acc:0.6875
Epoch: 30, Batch: 19000, Train NLL: 0.4755, Train Acc:0.8125
Epoch: 30, Batch: 20000, Train NLL: 0.4992, Train Acc:0.8750
Epoch: 30, Batch: 21000, Train NLL: 0.5599, Train Acc:0.8125
Epoch: 30, Batch: 22000, Train NLL: 0.8044, Train Acc:0.5625
Epoch: 30, Batch: 23000, Train NLL: 0.5320, Train Acc:0.8125
Epoch: 30, Batch: 24000, Train NLL: 0.4407, Train Acc:0.8750
Epoch: 30, Batch: 25000, Train NLL: 0.5636, Train Acc:0.8750
Epoch: 30, Batch: 26000, Train NLL: 0.5664, Train Acc:0.6250
Epoch: 30, Batch: 27000, Train NLL: 0.2653, Train Acc:0.9375
Epoch: 30, Batch: 28000, Train NLL: 0.7480, Train Acc:0.6250
Epoch: 30, Batch: 29000, Train NLL: 0.6200, Train Acc:0.7500
Epoch: 30, Batch: 30000,

Epoch: 34, Batch: 6000, Train NLL: 0.6678, Train Acc:0.8125
Epoch: 34, Batch: 7000, Train NLL: 0.3345, Train Acc:1.0000
Epoch: 34, Batch: 8000, Train NLL: 0.9411, Train Acc:0.6875
Epoch: 34, Batch: 9000, Train NLL: 0.8264, Train Acc:0.6875
Epoch: 34, Batch: 10000, Train NLL: 0.2932, Train Acc:0.9375
Epoch: 34, Batch: 11000, Train NLL: 0.3326, Train Acc:0.8750
Epoch: 34, Batch: 12000, Train NLL: 0.6850, Train Acc:0.6250
Epoch: 34, Batch: 13000, Train NLL: 0.5951, Train Acc:0.8750
Epoch: 34, Batch: 14000, Train NLL: 0.8288, Train Acc:0.5625
Epoch: 34, Batch: 15000, Train NLL: 0.3263, Train Acc:0.8750
Epoch: 34, Batch: 16000, Train NLL: 0.3775, Train Acc:0.9375
Epoch: 34, Batch: 17000, Train NLL: 1.0750, Train Acc:0.5625
Epoch: 34, Batch: 18000, Train NLL: 0.7776, Train Acc:0.6250
Epoch: 34, Batch: 19000, Train NLL: 0.3217, Train Acc:0.9375
Epoch: 34, Batch: 20000, Train NLL: 0.8527, Train Acc:0.6250
Epoch: 34, Batch: 21000, Train NLL: 0.3850, Train Acc:0.8750
Epoch: 34, Batch: 22000, Tra

Epoch: 37, Batch: 34000, Train NLL: 0.3167, Train Acc:0.8750
Epoch: 37, Val NLL: 0.5707, Val Acc: 0.7693
Epoch: 38, Batch: 0, Train NLL: 0.8569, Train Acc:0.6875
Epoch: 38, Batch: 1000, Train NLL: 0.4909, Train Acc:0.8125
Epoch: 38, Batch: 2000, Train NLL: 0.8484, Train Acc:0.5625
Epoch: 38, Batch: 3000, Train NLL: 0.7943, Train Acc:0.7500
Epoch: 38, Batch: 4000, Train NLL: 0.2264, Train Acc:1.0000
Epoch: 38, Batch: 5000, Train NLL: 0.6723, Train Acc:0.6875
Epoch: 38, Batch: 6000, Train NLL: 0.6613, Train Acc:0.6875
Epoch: 38, Batch: 7000, Train NLL: 0.6958, Train Acc:0.6250
Epoch: 38, Batch: 8000, Train NLL: 0.7028, Train Acc:0.7500
Epoch: 38, Batch: 9000, Train NLL: 0.5427, Train Acc:0.8125
Epoch: 38, Batch: 10000, Train NLL: 0.3367, Train Acc:0.8750
Epoch: 38, Batch: 11000, Train NLL: 0.5575, Train Acc:0.8125
Epoch: 38, Batch: 12000, Train NLL: 0.7623, Train Acc:0.6250
Epoch: 38, Batch: 13000, Train NLL: 0.7210, Train Acc:0.7500
Epoch: 38, Batch: 14000, Train NLL: 0.6362, Train Acc:

Epoch: 41, Batch: 26000, Train NLL: 0.3319, Train Acc:0.8750
Epoch: 41, Batch: 27000, Train NLL: 0.8221, Train Acc:0.5625
Epoch: 41, Batch: 28000, Train NLL: 0.5781, Train Acc:0.8125
Epoch: 41, Batch: 29000, Train NLL: 0.6666, Train Acc:0.7500
Epoch: 41, Batch: 30000, Train NLL: 1.0381, Train Acc:0.3125
Epoch: 41, Batch: 31000, Train NLL: 0.5163, Train Acc:0.8125
Epoch: 41, Batch: 32000, Train NLL: 0.6757, Train Acc:0.6250
Epoch: 41, Batch: 33000, Train NLL: 0.8353, Train Acc:0.6875
Epoch: 41, Batch: 34000, Train NLL: 0.7520, Train Acc:0.6250
Epoch: 41, Val NLL: 0.5662, Val Acc: 0.7669
Epoch: 42, Batch: 0, Train NLL: 0.3919, Train Acc:0.8125
Epoch: 42, Batch: 1000, Train NLL: 0.4763, Train Acc:0.7500
Epoch: 42, Batch: 2000, Train NLL: 0.5422, Train Acc:0.8125
Epoch: 42, Batch: 3000, Train NLL: 0.7127, Train Acc:0.6250
Epoch: 42, Batch: 4000, Train NLL: 0.7157, Train Acc:0.7500
Epoch: 42, Batch: 5000, Train NLL: 0.4746, Train Acc:0.8750
Epoch: 42, Batch: 6000, Train NLL: 0.5769, Train A

Epoch: 45, Batch: 19000, Train NLL: 0.6983, Train Acc:0.7500
Epoch: 45, Batch: 20000, Train NLL: 0.5640, Train Acc:0.6875
Epoch: 45, Batch: 21000, Train NLL: 0.4824, Train Acc:0.8125
Epoch: 45, Batch: 22000, Train NLL: 0.9155, Train Acc:0.6875
Epoch: 45, Batch: 23000, Train NLL: 0.6889, Train Acc:0.7500
Epoch: 45, Batch: 24000, Train NLL: 0.4281, Train Acc:0.9375
Epoch: 45, Batch: 25000, Train NLL: 0.5153, Train Acc:0.6875
Epoch: 45, Batch: 26000, Train NLL: 0.6352, Train Acc:0.8125
Epoch: 45, Batch: 27000, Train NLL: 0.4300, Train Acc:0.8125
Epoch: 45, Batch: 28000, Train NLL: 0.4883, Train Acc:0.8125
Epoch: 45, Batch: 29000, Train NLL: 0.8247, Train Acc:0.6250
Epoch: 45, Batch: 30000, Train NLL: 0.4034, Train Acc:0.8750
Epoch: 45, Batch: 31000, Train NLL: 0.5638, Train Acc:0.7500
Epoch: 45, Batch: 32000, Train NLL: 0.3663, Train Acc:0.8750
Epoch: 45, Batch: 33000, Train NLL: 0.8009, Train Acc:0.7500
Epoch: 45, Batch: 34000, Train NLL: 0.5013, Train Acc:0.8125
Epoch: 45, Val NLL: 0.56

Epoch: 49, Batch: 12000, Train NLL: 0.4474, Train Acc:0.8750
Epoch: 49, Batch: 13000, Train NLL: 0.4514, Train Acc:0.7500
Epoch: 49, Batch: 14000, Train NLL: 0.5262, Train Acc:0.6875
Epoch: 49, Batch: 15000, Train NLL: 0.7944, Train Acc:0.6250
Epoch: 49, Batch: 16000, Train NLL: 0.4611, Train Acc:0.8750
Epoch: 49, Batch: 17000, Train NLL: 0.4565, Train Acc:0.8750
Epoch: 49, Batch: 18000, Train NLL: 0.5496, Train Acc:0.7500
Epoch: 49, Batch: 19000, Train NLL: 0.3640, Train Acc:0.8750
Epoch: 49, Batch: 20000, Train NLL: 0.4485, Train Acc:0.8750
Epoch: 49, Batch: 21000, Train NLL: 0.4625, Train Acc:0.8125
Epoch: 49, Batch: 22000, Train NLL: 0.7581, Train Acc:0.6250
Epoch: 49, Batch: 23000, Train NLL: 0.3590, Train Acc:0.8750
Epoch: 49, Batch: 24000, Train NLL: 0.6043, Train Acc:0.7500
Epoch: 49, Batch: 25000, Train NLL: 0.3693, Train Acc:0.8125
Epoch: 49, Batch: 26000, Train NLL: 0.4278, Train Acc:0.8125
Epoch: 49, Batch: 27000, Train NLL: 0.4447, Train Acc:0.8750
Epoch: 49, Batch: 28000,

Epoch: 53, Batch: 4000, Train NLL: 0.7279, Train Acc:0.6250
Epoch: 53, Batch: 5000, Train NLL: 0.2533, Train Acc:0.9375
Epoch: 53, Batch: 6000, Train NLL: 0.6037, Train Acc:0.8125
Epoch: 53, Batch: 7000, Train NLL: 0.6413, Train Acc:0.6875
Epoch: 53, Batch: 8000, Train NLL: 0.3905, Train Acc:0.8125
Epoch: 53, Batch: 9000, Train NLL: 0.8982, Train Acc:0.5625
Epoch: 53, Batch: 10000, Train NLL: 0.6860, Train Acc:0.7500
Epoch: 53, Batch: 11000, Train NLL: 0.4167, Train Acc:0.7500
Epoch: 53, Batch: 12000, Train NLL: 0.8260, Train Acc:0.6250
Epoch: 53, Batch: 13000, Train NLL: 0.9572, Train Acc:0.5000
Epoch: 53, Batch: 14000, Train NLL: 0.4318, Train Acc:0.9375
Epoch: 53, Batch: 15000, Train NLL: 0.8346, Train Acc:0.5625
Epoch: 53, Batch: 16000, Train NLL: 0.5988, Train Acc:0.7500
Epoch: 53, Batch: 17000, Train NLL: 0.5242, Train Acc:0.7500
Epoch: 53, Batch: 18000, Train NLL: 0.8701, Train Acc:0.5625
Epoch: 53, Batch: 19000, Train NLL: 0.5096, Train Acc:0.8125
Epoch: 53, Batch: 20000, Train

Epoch: 57, Batch: 15000, Train NLL: 0.4765, Train Acc:0.7500
Epoch: 57, Batch: 16000, Train NLL: 0.4386, Train Acc:0.8125
Epoch: 57, Batch: 17000, Train NLL: 0.6109, Train Acc:0.8125
Epoch: 57, Batch: 18000, Train NLL: 0.5839, Train Acc:0.6250
Epoch: 57, Batch: 19000, Train NLL: 0.4895, Train Acc:0.8125
Epoch: 57, Batch: 20000, Train NLL: 0.7520, Train Acc:0.6875
Epoch: 57, Batch: 21000, Train NLL: 0.7307, Train Acc:0.7500
Epoch: 57, Batch: 22000, Train NLL: 0.5338, Train Acc:0.8750
Epoch: 57, Batch: 23000, Train NLL: 0.2459, Train Acc:0.9375
Epoch: 57, Batch: 24000, Train NLL: 1.1243, Train Acc:0.5625
Epoch: 57, Batch: 25000, Train NLL: 0.4219, Train Acc:0.8750
Epoch: 57, Batch: 26000, Train NLL: 0.5466, Train Acc:0.8750
Epoch: 57, Batch: 27000, Train NLL: 0.6762, Train Acc:0.8125
Epoch: 57, Batch: 28000, Train NLL: 0.6639, Train Acc:0.7500
Epoch: 57, Batch: 29000, Train NLL: 0.6288, Train Acc:0.6875
Epoch: 57, Batch: 30000, Train NLL: 0.5147, Train Acc:0.9375
Epoch: 57, Batch: 31000,

Epoch: 61, Batch: 8000, Train NLL: 0.9098, Train Acc:0.6250
Epoch: 61, Batch: 9000, Train NLL: 0.5710, Train Acc:0.7500
Epoch: 61, Batch: 10000, Train NLL: 0.7857, Train Acc:0.6875
Epoch: 61, Batch: 11000, Train NLL: 0.4635, Train Acc:0.8125
Epoch: 61, Batch: 12000, Train NLL: 0.6129, Train Acc:0.7500
Epoch: 61, Batch: 13000, Train NLL: 0.7871, Train Acc:0.6875
Epoch: 61, Batch: 14000, Train NLL: 0.5363, Train Acc:0.6875
Epoch: 61, Batch: 15000, Train NLL: 0.7864, Train Acc:0.6250
Epoch: 61, Batch: 16000, Train NLL: 0.5741, Train Acc:0.8125
Epoch: 61, Batch: 17000, Train NLL: 0.6963, Train Acc:0.7500
Epoch: 61, Batch: 18000, Train NLL: 0.5142, Train Acc:0.7500
Epoch: 61, Batch: 19000, Train NLL: 0.7437, Train Acc:0.6875
Epoch: 61, Batch: 20000, Train NLL: 0.6306, Train Acc:0.8125
Epoch: 61, Batch: 21000, Train NLL: 0.5236, Train Acc:0.6875
Epoch: 61, Batch: 22000, Train NLL: 0.7479, Train Acc:0.6875
Epoch: 61, Batch: 23000, Train NLL: 0.6207, Train Acc:0.6875
Epoch: 61, Batch: 24000, T

WROTE MODEL
Epoch: 65, Batch: 0, Train NLL: 0.7156, Train Acc:0.7500
Epoch: 65, Batch: 1000, Train NLL: 0.4973, Train Acc:0.8125
Epoch: 65, Batch: 2000, Train NLL: 0.7916, Train Acc:0.7500
Epoch: 65, Batch: 3000, Train NLL: 0.2395, Train Acc:0.9375
Epoch: 65, Batch: 4000, Train NLL: 0.7562, Train Acc:0.6875
Epoch: 65, Batch: 5000, Train NLL: 0.5735, Train Acc:0.8125
Epoch: 65, Batch: 6000, Train NLL: 0.6173, Train Acc:0.6875
Epoch: 65, Batch: 7000, Train NLL: 0.4980, Train Acc:0.6875
Epoch: 65, Batch: 8000, Train NLL: 0.5949, Train Acc:0.6875
Epoch: 65, Batch: 9000, Train NLL: 0.6281, Train Acc:0.6875
Epoch: 65, Batch: 10000, Train NLL: 0.5088, Train Acc:0.8125
Epoch: 65, Batch: 11000, Train NLL: 0.3535, Train Acc:0.9375
Epoch: 65, Batch: 12000, Train NLL: 0.4020, Train Acc:0.8750
Epoch: 65, Batch: 13000, Train NLL: 0.4156, Train Acc:0.7500
Epoch: 65, Batch: 14000, Train NLL: 0.6092, Train Acc:0.6875
Epoch: 65, Batch: 15000, Train NLL: 0.3757, Train Acc:0.8750
Epoch: 65, Batch: 16000, 

Epoch: 68, Batch: 28000, Train NLL: 0.6671, Train Acc:0.6875
Epoch: 68, Batch: 29000, Train NLL: 0.4872, Train Acc:0.8125
Epoch: 68, Batch: 30000, Train NLL: 1.0328, Train Acc:0.5000
Epoch: 68, Batch: 31000, Train NLL: 0.5861, Train Acc:0.7500
Epoch: 68, Batch: 32000, Train NLL: 0.3558, Train Acc:0.8750
Epoch: 68, Batch: 33000, Train NLL: 0.4613, Train Acc:0.8125
Epoch: 68, Batch: 34000, Train NLL: 0.6579, Train Acc:0.6250
Epoch: 68, Val NLL: 0.5553, Val Acc: 0.7754
Epoch: 69, Batch: 0, Train NLL: 0.2264, Train Acc:0.9375
Epoch: 69, Batch: 1000, Train NLL: 0.6254, Train Acc:0.7500
Epoch: 69, Batch: 2000, Train NLL: 0.7473, Train Acc:0.6250
Epoch: 69, Batch: 3000, Train NLL: 0.7227, Train Acc:0.6875
Epoch: 69, Batch: 4000, Train NLL: 0.3421, Train Acc:0.8750
Epoch: 69, Batch: 5000, Train NLL: 0.8807, Train Acc:0.7500
Epoch: 69, Batch: 6000, Train NLL: 0.5767, Train Acc:0.6875
Epoch: 69, Batch: 7000, Train NLL: 0.4177, Train Acc:0.8750
Epoch: 69, Batch: 8000, Train NLL: 0.6384, Train Acc

Epoch: 72, Batch: 20000, Train NLL: 0.2978, Train Acc:0.9375
Epoch: 72, Batch: 21000, Train NLL: 0.5669, Train Acc:0.8125
Epoch: 72, Batch: 22000, Train NLL: 0.3557, Train Acc:0.9375
Epoch: 72, Batch: 23000, Train NLL: 0.6756, Train Acc:0.7500
Epoch: 72, Batch: 24000, Train NLL: 0.4709, Train Acc:0.8125
Epoch: 72, Batch: 25000, Train NLL: 0.8021, Train Acc:0.7500
Epoch: 72, Batch: 26000, Train NLL: 0.5863, Train Acc:0.8125
Epoch: 72, Batch: 27000, Train NLL: 0.3724, Train Acc:0.8125
Epoch: 72, Batch: 28000, Train NLL: 0.7960, Train Acc:0.6875
Epoch: 72, Batch: 29000, Train NLL: 0.3859, Train Acc:0.8125
Epoch: 72, Batch: 30000, Train NLL: 0.3445, Train Acc:0.8750
Epoch: 72, Batch: 31000, Train NLL: 0.5536, Train Acc:0.8125
Epoch: 72, Batch: 32000, Train NLL: 0.6971, Train Acc:0.6875
Epoch: 72, Batch: 33000, Train NLL: 0.5123, Train Acc:0.8125
Epoch: 72, Batch: 34000, Train NLL: 0.8503, Train Acc:0.6250
Epoch: 72, Val NLL: 0.5549, Val Acc: 0.7741
Epoch: 73, Batch: 0, Train NLL: 0.4849, T

Epoch: 76, Batch: 13000, Train NLL: 0.6484, Train Acc:0.6875
Epoch: 76, Batch: 14000, Train NLL: 0.1935, Train Acc:0.9375
Epoch: 76, Batch: 15000, Train NLL: 0.7863, Train Acc:0.7500
Epoch: 76, Batch: 16000, Train NLL: 0.5853, Train Acc:0.6875
Epoch: 76, Batch: 17000, Train NLL: 0.6887, Train Acc:0.6875
Epoch: 76, Batch: 18000, Train NLL: 0.5541, Train Acc:0.7500
Epoch: 76, Batch: 19000, Train NLL: 0.5070, Train Acc:0.8750
Epoch: 76, Batch: 20000, Train NLL: 0.4188, Train Acc:0.8125
Epoch: 76, Batch: 21000, Train NLL: 0.5727, Train Acc:0.7500
Epoch: 76, Batch: 22000, Train NLL: 0.4292, Train Acc:0.8750
Epoch: 76, Batch: 23000, Train NLL: 0.5211, Train Acc:0.8125
Epoch: 76, Batch: 24000, Train NLL: 0.7726, Train Acc:0.6250
Epoch: 76, Batch: 25000, Train NLL: 0.5302, Train Acc:0.8125
Epoch: 76, Batch: 26000, Train NLL: 1.0294, Train Acc:0.5625
Epoch: 76, Batch: 27000, Train NLL: 0.4932, Train Acc:0.8125
Epoch: 76, Batch: 28000, Train NLL: 0.3832, Train Acc:0.8750
Epoch: 76, Batch: 29000,

Epoch: 80, Batch: 5000, Train NLL: 0.5830, Train Acc:0.7500
Epoch: 80, Batch: 6000, Train NLL: 0.4532, Train Acc:0.8125
Epoch: 80, Batch: 7000, Train NLL: 0.8692, Train Acc:0.6875
Epoch: 80, Batch: 8000, Train NLL: 0.8077, Train Acc:0.6250
Epoch: 80, Batch: 9000, Train NLL: 0.5547, Train Acc:0.8125
Epoch: 80, Batch: 10000, Train NLL: 0.3727, Train Acc:0.8125
Epoch: 80, Batch: 11000, Train NLL: 0.4981, Train Acc:0.8750
Epoch: 80, Batch: 12000, Train NLL: 0.7076, Train Acc:0.7500
Epoch: 80, Batch: 13000, Train NLL: 0.4165, Train Acc:0.8125
Epoch: 80, Batch: 14000, Train NLL: 0.6075, Train Acc:0.6875
Epoch: 80, Batch: 15000, Train NLL: 0.7773, Train Acc:0.6250
Epoch: 80, Batch: 16000, Train NLL: 0.2822, Train Acc:0.9375
Epoch: 80, Batch: 17000, Train NLL: 0.9096, Train Acc:0.5000
Epoch: 80, Batch: 18000, Train NLL: 0.6731, Train Acc:0.6250
Epoch: 80, Batch: 19000, Train NLL: 0.4375, Train Acc:0.8125
Epoch: 80, Batch: 20000, Train NLL: 0.7919, Train Acc:0.6250
Epoch: 80, Batch: 21000, Trai

Epoch: 83, Batch: 33000, Train NLL: 0.6204, Train Acc:0.7500
Epoch: 83, Batch: 34000, Train NLL: 0.3941, Train Acc:0.8125
Epoch: 83, Val NLL: 0.5550, Val Acc: 0.7763
Epoch: 84, Batch: 0, Train NLL: 0.4648, Train Acc:0.8125
Epoch: 84, Batch: 1000, Train NLL: 0.5551, Train Acc:0.8750
Epoch: 84, Batch: 2000, Train NLL: 0.3884, Train Acc:0.9375
Epoch: 84, Batch: 3000, Train NLL: 0.5414, Train Acc:0.6875
Epoch: 84, Batch: 4000, Train NLL: 0.5252, Train Acc:0.6875
Epoch: 84, Batch: 5000, Train NLL: 0.3922, Train Acc:0.8125
Epoch: 84, Batch: 6000, Train NLL: 0.3881, Train Acc:0.8125
Epoch: 84, Batch: 7000, Train NLL: 0.2398, Train Acc:1.0000
Epoch: 84, Batch: 8000, Train NLL: 0.7346, Train Acc:0.5625
Epoch: 84, Batch: 9000, Train NLL: 0.5290, Train Acc:0.7500
Epoch: 84, Batch: 10000, Train NLL: 0.5855, Train Acc:0.6875
Epoch: 84, Batch: 11000, Train NLL: 0.5526, Train Acc:0.7500
Epoch: 84, Batch: 12000, Train NLL: 0.5702, Train Acc:0.6875
Epoch: 84, Batch: 13000, Train NLL: 0.4715, Train Acc:

Epoch: 87, Batch: 26000, Train NLL: 0.5117, Train Acc:0.6875
Epoch: 87, Batch: 27000, Train NLL: 0.6392, Train Acc:0.6875
Epoch: 87, Batch: 28000, Train NLL: 0.5166, Train Acc:0.7500
Epoch: 87, Batch: 29000, Train NLL: 0.3407, Train Acc:0.8750
Epoch: 87, Batch: 30000, Train NLL: 1.2425, Train Acc:0.5000
Epoch: 87, Batch: 31000, Train NLL: 0.3813, Train Acc:0.8750
Epoch: 87, Batch: 32000, Train NLL: 0.2047, Train Acc:1.0000
Epoch: 87, Batch: 33000, Train NLL: 0.9162, Train Acc:0.7500
Epoch: 87, Batch: 34000, Train NLL: 0.6153, Train Acc:0.6875
Epoch: 87, Val NLL: 0.5529, Val Acc: 0.7803
Epoch: 88, Batch: 0, Train NLL: 0.4018, Train Acc:0.8750
Epoch: 88, Batch: 1000, Train NLL: 0.4187, Train Acc:0.7500
Epoch: 88, Batch: 2000, Train NLL: 0.8669, Train Acc:0.6875
Epoch: 88, Batch: 3000, Train NLL: 0.3013, Train Acc:0.9375
Epoch: 88, Batch: 4000, Train NLL: 0.7728, Train Acc:0.6875
Epoch: 88, Batch: 5000, Train NLL: 0.3026, Train Acc:0.8750
Epoch: 88, Batch: 6000, Train NLL: 0.3668, Train A

Epoch: 91, Batch: 19000, Train NLL: 0.2453, Train Acc:1.0000
Epoch: 91, Batch: 20000, Train NLL: 0.5417, Train Acc:0.8125
Epoch: 91, Batch: 21000, Train NLL: 0.8092, Train Acc:0.5000
Epoch: 91, Batch: 22000, Train NLL: 0.2318, Train Acc:0.9375
Epoch: 91, Batch: 23000, Train NLL: 0.5061, Train Acc:0.7500
Epoch: 91, Batch: 24000, Train NLL: 0.4825, Train Acc:0.8750
Epoch: 91, Batch: 25000, Train NLL: 1.0874, Train Acc:0.4375
Epoch: 91, Batch: 26000, Train NLL: 0.6120, Train Acc:0.8125
Epoch: 91, Batch: 27000, Train NLL: 0.4085, Train Acc:0.8750
Epoch: 91, Batch: 28000, Train NLL: 0.5594, Train Acc:0.7500
Epoch: 91, Batch: 29000, Train NLL: 0.4134, Train Acc:0.9375
Epoch: 91, Batch: 30000, Train NLL: 0.7657, Train Acc:0.6875
Epoch: 91, Batch: 31000, Train NLL: 0.6578, Train Acc:0.6250
Epoch: 91, Batch: 32000, Train NLL: 0.3225, Train Acc:0.8125
Epoch: 91, Batch: 33000, Train NLL: 0.5939, Train Acc:0.6250
Epoch: 91, Batch: 34000, Train NLL: 0.5060, Train Acc:0.8125
Epoch: 91, Val NLL: 0.55

Epoch: 95, Batch: 12000, Train NLL: 0.3699, Train Acc:0.8750
Epoch: 95, Batch: 13000, Train NLL: 0.6241, Train Acc:0.6250
Epoch: 95, Batch: 14000, Train NLL: 0.9034, Train Acc:0.5000
Epoch: 95, Batch: 15000, Train NLL: 0.3958, Train Acc:0.8750
Epoch: 95, Batch: 16000, Train NLL: 0.6778, Train Acc:0.8125
Epoch: 95, Batch: 17000, Train NLL: 0.8643, Train Acc:0.7500
Epoch: 95, Batch: 18000, Train NLL: 0.3175, Train Acc:0.9375
Epoch: 95, Batch: 19000, Train NLL: 0.6646, Train Acc:0.6875
Epoch: 95, Batch: 20000, Train NLL: 0.4624, Train Acc:0.8125
Epoch: 95, Batch: 21000, Train NLL: 0.5190, Train Acc:0.8125
Epoch: 95, Batch: 22000, Train NLL: 0.3988, Train Acc:1.0000
Epoch: 95, Batch: 23000, Train NLL: 0.3942, Train Acc:0.8125
Epoch: 95, Batch: 24000, Train NLL: 0.8781, Train Acc:0.6875
Epoch: 95, Batch: 25000, Train NLL: 0.5942, Train Acc:0.7500
Epoch: 95, Batch: 26000, Train NLL: 0.2710, Train Acc:0.9375
Epoch: 95, Batch: 27000, Train NLL: 0.6994, Train Acc:0.7500
Epoch: 95, Batch: 28000,

Epoch: 99, Batch: 4000, Train NLL: 0.5700, Train Acc:0.8750
Epoch: 99, Batch: 5000, Train NLL: 0.4108, Train Acc:0.8125
Epoch: 99, Batch: 6000, Train NLL: 0.6308, Train Acc:0.7500
Epoch: 99, Batch: 7000, Train NLL: 0.5517, Train Acc:0.7500
Epoch: 99, Batch: 8000, Train NLL: 0.4466, Train Acc:0.7500
Epoch: 99, Batch: 9000, Train NLL: 0.6360, Train Acc:0.7500
Epoch: 99, Batch: 10000, Train NLL: 0.3876, Train Acc:0.8125
Epoch: 99, Batch: 11000, Train NLL: 0.3072, Train Acc:0.8125
Epoch: 99, Batch: 12000, Train NLL: 0.6020, Train Acc:0.6875
Epoch: 99, Batch: 13000, Train NLL: 0.7161, Train Acc:0.6250
Epoch: 99, Batch: 14000, Train NLL: 0.6801, Train Acc:0.7500
Epoch: 99, Batch: 15000, Train NLL: 0.4146, Train Acc:0.8750
Epoch: 99, Batch: 16000, Train NLL: 0.4787, Train Acc:0.7500
Epoch: 99, Batch: 17000, Train NLL: 0.4243, Train Acc:0.8125
Epoch: 99, Batch: 18000, Train NLL: 1.1804, Train Acc:0.5000
Epoch: 99, Batch: 19000, Train NLL: 0.5028, Train Acc:0.7500
Epoch: 99, Batch: 20000, Train

In [None]:
#EP1 = EmbedProject(weights, embed_size, hidden_size1).cuda()
#state_dict = torch.load('best_EP1.pt')
#EP1.load_state_dict(state_dict)