In [2]:
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Dirichlet
from torch import LongTensor
import torch
import torch.nn.functional as F
import sys
import re
import math
from collections import Counter
from tqdm import tqdm
from itertools import islice
import numpy as np
from torch.autograd import Variable
import pandas as pd

from torchtext import data
from torchtext import datasets
import torchtext
from torchtext.vocab import Vectors
from torchtext.data.iterator import BPTTIterator, Iterator, BucketIterator
from torchtext.data import Batch, Dataset, Field
from torch.utils.data import DataLoader
from namedtensor import ntorch
from namedtensor.text import NamedField
import os

# Utility functions

In [230]:
def get_batch(batch):
    data = batch[0]
    X,y = data[:,:-1],data[:,1:]
    return X,y

def make_predictions_for_kaggle_from_ensemble(models):
    sentences = []
    for i, l in enumerate(open("/home/amaro/cs287/hw2/input.txt"), 1):
        sentences.append(re.split(' ', l))
    tokenized = []
    for s in sentences:
        t_s = []
        for w in s:
            if w != '___\n':
                t_s.append(TEXT.vocab.stoi[w])
        tokenized.append(t_s)
    
    predictions = []
    for i in tokenized:
        X = torch.tensor(i,dtype=torch.long,device='cuda')
        prob = [m(torch.unsqueeze(X,0)) for m in models]
        pred = torch.squeeze(torch.mean(torch.stack(prob,dim=3),dim=3))[-1,:]
        tokens = torch.argsort(pred,descending=True)[:20]
        l_  = [TEXT.vocab.itos[j] for j in tokens]
        predictions.append(' '.join(l_))
    
    out = pd.DataFrame(index=range(1,len(predictions)+1))
    out.index.names = ['id']
    out['word'] = predictions
    out.to_csv('/home/amaro/cs287/hw2/predictions_ensemble_TCN.txt',sep=',')

def make_predictions_for_kaggle(model):
    sentences = []
    for i, l in enumerate(open("/home/amaro/cs287/hw2/input.txt"), 1):
        sentences.append(re.split(' ', l))
    tokenized = []
    for s in sentences:
        t_s = []
        for w in s:
            if w != '___\n':
                t_s.append(TEXT.vocab.stoi[w])
        tokenized.append(t_s)
    
    predictions = []
    for i in tokenized:
        X = torch.tensor(i,dtype=torch.long,device='cuda')
        pred = torch.squeeze(model(torch.unsqueeze(X,0)))[-1,:]
        tokens = torch.argsort(pred,descending=True)[:20]
        l_  = [TEXT.vocab.itos[j] for j in tokens]
        predictions.append(' '.join(l_))
    
    out = pd.DataFrame(index=range(1,len(predictions)+1))
    out.index.names = ['id']
    out['word'] = predictions
    out.to_csv('/home/amaro/cs287/hw2/predictions_2_TCN.txt',sep=',')

# Data generation

In [4]:
TEXT = Field()
train, val, test = torchtext.datasets.LanguageModelingDataset.splits(
    path=".", 
    train="train.txt", validation="valid.txt", test="valid.txt", text_field=TEXT)

# Data distributed with the assignment
TEXT.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))

len(TEXT.vocab) 10001


In [28]:
seqlen = 80
train_iter, val_iter, test_iter = torchtext.data.BPTTIterator.splits(
    (train, val, test), batch_size=24, device='cuda', bptt_len=seqlen, repeat=False)

def make_dataloader(train,shuffle=True):
    train_loader = BPTTIterator(train,16,80,device='cuda',repeat=False)
    text = []
    for i in train_loader:
        text.append(i.text)
    training_data = torch.cat(text[:-1],dim=1)
    train_data = torch.utils.data.TensorDataset(training_data.transpose(1,0))
    t_loader=DataLoader(train_data,batch_size=16,shuffle=shuffle)
    return t_loader
train_iter = make_dataloader(train,shuffle=True)
val_iter = make_dataloader(val,shuffle=False)

In [6]:
it = iter(train_iter)
batch = next(it) 

In [155]:
# Build the vocabulary with word embeddings
url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
TEXT.vocab.load_vectors(vectors=Vectors('wiki.simple.vec', url=url)) 
word2vec = TEXT.vocab.vectors


In [156]:
filters = word2vec.shape[1]
embed_size = word2vec.shape[1]
n_words = word2vec.shape[0]

# Model code

In [238]:
LSM = nn.LogSoftmax(dim=2)
from torch.nn.utils import weight_norm
### adapted from https://github.com/locuslab/TCN/ ###
class Chomp1d(torch.nn.Module):
    '''Ensure causal convolutions by removing right most items'''
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size
    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()
    
    
class TC_block(torch.nn.Module):
    def __init__(self, n_in, n_out, kernel, stride, dilation, padding, dropout=0.2):
        super(TC_block, self).__init__()
        self.conv1 = weight_norm(torch.nn.Conv1d(n_in, n_out, kernel,stride=stride,
                                             padding=padding, dilation=dilation))
      
        self.conv2 = weight_norm(nn.Conv1d(n_out, n_out, kernel,stride=stride,
                                           padding=padding, dilation=dilation))
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
    
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    
        self.chomp1 = Chomp1d(padding)
        self.chomp2 = Chomp1d(padding)
    
        self.block = torch.nn.Sequential(self.conv1,self.chomp1,self.relu1,self.dropout1,
                                          self.conv2,self.chomp2,self.relu2,self.dropout2)
        self.relu = nn.ReLU()
    
        if n_in != n_out:
            self.conv_re = nn.Conv1d(n_in,n_out,kernel_size=1,stride=1,padding=0)
    def forward(self, x):
        out = self.block(x)
        # skip connection
        if x.shape[1]!=out.shape[1]:
            x = self.conv_re(x)
        return self.relu(out + x)
      
      
      
class TCN(torch.nn.Module):
    def __init__(self, n_layers, n_filters, kernel=2, dropout=0.2, embedding_size = 1000, n_words = 10001,
                tied=True,embedding = None):  
        super(TCN, self).__init__()
        blocks = []
        self.embedding_size = embedding_size
        self.n_words = n_words
        if embedding is None:
            self.embedding = nn.Embedding(self.n_words,self.embedding_size)
            self.init_embedding = True
        else:
            self.embedding = nn.Embedding(self.n_words,self.embedding_size)
            self.embedding.data = embedding
            self.init_embedding = False
        self.n_filters = [self.embedding_size] + n_filters
        
                
        for i in range(1,n_layers):
            dilation = 2 ** i
            n_in = self.n_filters[i-1]
            n_out = self.n_filters[i]
            blocks.append(TC_block(n_in, n_out, kernel, stride=1, dilation=dilation, padding=(kernel-1) * dilation, dropout=dropout))
            
        self.network = nn.Sequential(*blocks)
        self.receptive_field = 1 + 2*(kernel-1)*(2 ** n_layers-1) + 1
        self.output_layer = nn.Linear(n_filters[-1], n_words)
        self.relu = nn.ReLU()
        if tied:
            self.output_layer.weight = self.embedding.weight
        self.drop = nn.Dropout(0.25)
        self.init_weights()
    def init_weights(self):
        if self.init_embedding:
            self.embedding.weight.data.normal_(0, 0.01)
        self.output_layer.bias.data.fill_(0)
        self.output_layer.weight.data.normal_(0, 0.01)

    def forward(self, x):
        embed = self.drop(self.embedding(x))
        hook = self.network(embed.transpose(1,2)).transpose(1,2)
        return self.output_layer(hook)

# Training loop

In [234]:
def train_loop(e=0):
    model.train()
    batch_idx = 0
    for batch in train_iter:
        X,y = get_batch(batch)
        prob = model(X)
        # skip some chars for loss
        skip = int(X.shape[1]/2)
        target = y[:, skip:].contiguous()
        output = prob[:, skip:,:].contiguous().transpose(1,2)
        loss = criterion(output, target)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.35)
        optimizer.step()
        batch_idx +=1
        if np.mod(batch_idx,100) == 0:
            batch_size = X.shape[0]
            ppl = np.exp(loss.cpu().detach() / (batch_size * (seqlen-skip))) # update
            acc = torch.sum(torch.argmax(prob.cpu().detach(),dim=2) == y.cpu().detach()).float() / torch.FloatTensor([batch_size*(seqlen-skip)])
            print('Epoch: %d, Batch: %d, loss: %.4f , Train PPL: %.4f, Train Acc: %.4f' % (e, batch_idx, loss.cpu().detach(), ppl, acc))



# Validation loop

In [233]:
def validation_loop(e=0):
    model.eval()
    batch_idx = 0
    ppl = []
    acc = 0
    total_loss = 0
    for batch in val_iter:
        X,y = get_batch(batch)
        prob = model(X)
        # skip some chars for loss
        skip = int(X.shape[1]/2)
        target = y[:, skip:].contiguous()
        output = prob[:, skip:,:].contiguous().transpose(1,2)
        loss = criterion(output, target).cpu().detach()
        total_loss += loss
        batch_idx +=1
        batch_size = X.shape[0]
        ppl.append(np.exp(loss / (batch_size * (seqlen-skip)))) # update
        acc += torch.sum(torch.argmax(prob.cpu().detach(),dim=2) == y.cpu().detach()).float() / torch.FloatTensor([batch_size*(seqlen-skip)])
    print('Validation --- Epoch: %d, total loss: %.4f , PPL: %.4f, Acc: %.4f' % (e, total_loss.cpu().detach(), np.mean(ppl), acc/batch_idx))
    return total_loss

# Experiments

In [51]:
model = TCN(5, [600,600,600,600], kernel=2, dropout=0.5, embedding_size = 600, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)


best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_5_layers.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Epoch: 0, Batch: 100, loss: 5.2793 , Train PPL: 1.0081, Train Acc: 0.3354
Epoch: 0, Batch: 200, loss: 5.4507 , Train PPL: 1.0083, Train Acc: 0.3034
Epoch: 0, Batch: 300, loss: 5.4952 , Train PPL: 1.0084, Train Acc: 0.2927
Epoch: 0, Batch: 400, loss: 5.3613 , Train PPL: 1.0082, Train Acc: 0.3338
Epoch: 0, Batch: 500, loss: 5.5610 , Train PPL: 1.0085, Train Acc: 0.3186
Epoch: 0, Batch: 600, loss: 5.5469 , Train PPL: 1.0085, Train Acc: 0.3308
Epoch: 0, Batch: 700, loss: 5.4303 , Train PPL: 1.0083, Train Acc: 0.3155
Validation --- Epoch: 0, total loss: 310.3686 , PPL: 1.2884, Acc: 0.3404
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 5.0877 , Train PPL: 1.0078, Train Acc: 0.3186
Epoch: 1, Batch: 200, loss: 5.3588 , Train PPL: 1.0082, Train Acc: 0.3308
Epoch: 1, Batch: 300, loss: 5.4441 , Train PPL: 1.0083, Train Acc: 0.3018
Epoch: 1, Batch: 400, loss: 5.1657 , Train PPL: 1.0079, Train Acc: 0.2988
Epoch: 1, Batch: 500, loss: 5.1097 , Train PPL: 1.0078, Train Acc: 0.3796
Epoch: 1, Batch: 600, loss: 5.2231 , Train PPL: 1.0080, Train Acc: 0.3232
Epoch: 1, Batch: 700, loss: 5.3021 , Train PPL: 1.0081, Train Acc: 0.3506
Validation --- Epoch: 1, total loss: 308.7462 , PPL: 1.2865, Acc: 0.3243
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.0947 , Train PPL: 1.0078, Train Acc: 0.3582
Epoch: 2, Batch: 200, loss: 5.4302 , Train PPL: 1.0083, Train Acc: 0.2957
Epoch: 2, Batch: 300, loss: 5.2537 , Train PPL: 1.0080, Train Acc: 0.3384
Epoch: 2, Batch: 400, loss: 5.4700 , Train PPL: 1.0084, Train Acc: 0.3095
Epoch: 2, Batch: 500, loss: 5.3653 , Train PPL: 1.0082, Train Acc: 0.2546
Epoch: 2, Batch: 600

Epoch: 14, Batch: 500, loss: 4.9416 , Train PPL: 1.0076, Train Acc: 0.2881
Epoch: 14, Batch: 600, loss: 4.7736 , Train PPL: 1.0073, Train Acc: 0.3643
Epoch: 14, Batch: 700, loss: 4.3876 , Train PPL: 1.0067, Train Acc: 0.3963
Validation --- Epoch: 14, total loss: 301.5753 , PPL: 1.2798, Acc: 0.3118
lr = 4
Epoch: 15, Batch: 100, loss: 4.6768 , Train PPL: 1.0072, Train Acc: 0.3689
Epoch: 15, Batch: 200, loss: 4.7835 , Train PPL: 1.0073, Train Acc: 0.3537
Epoch: 15, Batch: 300, loss: 4.7732 , Train PPL: 1.0073, Train Acc: 0.3506
Epoch: 15, Batch: 400, loss: 4.8680 , Train PPL: 1.0074, Train Acc: 0.3552
Epoch: 15, Batch: 500, loss: 4.7886 , Train PPL: 1.0073, Train Acc: 0.3491
Epoch: 15, Batch: 600, loss: 4.9769 , Train PPL: 1.0076, Train Acc: 0.3369
Epoch: 15, Batch: 700, loss: 4.6148 , Train PPL: 1.0071, Train Acc: 0.3567
Validation --- Epoch: 15, total loss: 301.1738 , PPL: 1.2793, Acc: 0.3346
lr = 4
Epoch: 16, Batch: 100, loss: 4.5137 , Train PPL: 1.0069, Train Acc: 0.3933
Epoch: 16, Ba

Epoch: 28, Batch: 100, loss: 4.3900 , Train PPL: 1.0067, Train Acc: 0.3552
Epoch: 28, Batch: 200, loss: 4.1190 , Train PPL: 1.0063, Train Acc: 0.4375
Epoch: 28, Batch: 300, loss: 4.4324 , Train PPL: 1.0068, Train Acc: 0.3659
Epoch: 28, Batch: 400, loss: 4.6545 , Train PPL: 1.0071, Train Acc: 0.3643
Epoch: 28, Batch: 500, loss: 4.3366 , Train PPL: 1.0066, Train Acc: 0.3674
Epoch: 28, Batch: 600, loss: 4.5471 , Train PPL: 1.0070, Train Acc: 0.4253
Epoch: 28, Batch: 700, loss: 4.3044 , Train PPL: 1.0066, Train Acc: 0.3902
Validation --- Epoch: 28, total loss: 297.1898 , PPL: 1.2755, Acc: 0.3811
lr = 2.0
Epoch: 29, Batch: 100, loss: 3.9129 , Train PPL: 1.0060, Train Acc: 0.4543
Epoch: 29, Batch: 200, loss: 4.2277 , Train PPL: 1.0065, Train Acc: 0.4207
Epoch: 29, Batch: 300, loss: 4.5261 , Train PPL: 1.0069, Train Acc: 0.3476
Epoch: 29, Batch: 400, loss: 4.1366 , Train PPL: 1.0063, Train Acc: 0.4451
Epoch: 29, Batch: 500, loss: 4.4972 , Train PPL: 1.0069, Train Acc: 0.3476
Epoch: 29, Batch:

Epoch: 41, Batch: 500, loss: 3.9291 , Train PPL: 1.0060, Train Acc: 0.4634
Epoch: 41, Batch: 600, loss: 4.1889 , Train PPL: 1.0064, Train Acc: 0.4482
Epoch: 41, Batch: 700, loss: 4.2821 , Train PPL: 1.0065, Train Acc: 0.3918
Validation --- Epoch: 41, total loss: 296.8812 , PPL: 1.2753, Acc: 0.3951
lr = 0.5
Epoch: 42, Batch: 100, loss: 3.7156 , Train PPL: 1.0057, Train Acc: 0.5015
Epoch: 42, Batch: 200, loss: 4.0132 , Train PPL: 1.0061, Train Acc: 0.4573
Epoch: 42, Batch: 300, loss: 4.2099 , Train PPL: 1.0064, Train Acc: 0.4619
Epoch: 42, Batch: 400, loss: 3.9605 , Train PPL: 1.0061, Train Acc: 0.4619
Epoch: 42, Batch: 500, loss: 4.1345 , Train PPL: 1.0063, Train Acc: 0.4299
Epoch: 42, Batch: 600, loss: 4.3663 , Train PPL: 1.0067, Train Acc: 0.3918
Epoch: 42, Batch: 700, loss: 4.1077 , Train PPL: 1.0063, Train Acc: 0.4314
Validation --- Epoch: 42, total loss: 297.2900 , PPL: 1.2758, Acc: 0.3958
lr = 0.5
Epoch: 43, Batch: 100, loss: 4.1686 , Train PPL: 1.0064, Train Acc: 0.4009
Epoch: 43

Epoch: 55, Batch: 100, loss: 4.2196 , Train PPL: 1.0065, Train Acc: 0.4329
Epoch: 55, Batch: 200, loss: 3.9239 , Train PPL: 1.0060, Train Acc: 0.4909
Epoch: 55, Batch: 300, loss: 3.8913 , Train PPL: 1.0059, Train Acc: 0.4756
Epoch: 55, Batch: 400, loss: 4.1402 , Train PPL: 1.0063, Train Acc: 0.3933
Epoch: 55, Batch: 500, loss: 4.2246 , Train PPL: 1.0065, Train Acc: 0.3948
Epoch: 55, Batch: 600, loss: 4.3056 , Train PPL: 1.0066, Train Acc: 0.3857
Epoch: 55, Batch: 700, loss: 4.3487 , Train PPL: 1.0067, Train Acc: 0.3887
Validation --- Epoch: 55, total loss: 297.2353 , PPL: 1.2757, Acc: 0.4004
lr = 0.125
Epoch: 56, Batch: 100, loss: 3.9319 , Train PPL: 1.0060, Train Acc: 0.4863
Epoch: 56, Batch: 200, loss: 4.3392 , Train PPL: 1.0066, Train Acc: 0.3582
Epoch: 56, Batch: 300, loss: 4.2180 , Train PPL: 1.0065, Train Acc: 0.4451
Epoch: 56, Batch: 400, loss: 3.9325 , Train PPL: 1.0060, Train Acc: 0.4405
Epoch: 56, Batch: 500, loss: 4.1257 , Train PPL: 1.0063, Train Acc: 0.4405
Epoch: 56, Batc

Epoch: 68, Batch: 500, loss: 4.0369 , Train PPL: 1.0062, Train Acc: 0.4238
Epoch: 68, Batch: 600, loss: 4.1295 , Train PPL: 1.0063, Train Acc: 0.4497
Epoch: 68, Batch: 700, loss: 4.1792 , Train PPL: 1.0064, Train Acc: 0.3979
Validation --- Epoch: 68, total loss: 297.5560 , PPL: 1.2761, Acc: 0.4004
lr = 0.03125
Epoch: 69, Batch: 100, loss: 3.9388 , Train PPL: 1.0060, Train Acc: 0.4771
Epoch: 69, Batch: 200, loss: 3.9959 , Train PPL: 1.0061, Train Acc: 0.4466
Epoch: 69, Batch: 300, loss: 4.0616 , Train PPL: 1.0062, Train Acc: 0.4024
Epoch: 69, Batch: 400, loss: 3.8252 , Train PPL: 1.0058, Train Acc: 0.4878
Epoch: 69, Batch: 500, loss: 3.9534 , Train PPL: 1.0060, Train Acc: 0.4543
Epoch: 69, Batch: 600, loss: 3.6842 , Train PPL: 1.0056, Train Acc: 0.5152
Epoch: 69, Batch: 700, loss: 3.9165 , Train PPL: 1.0060, Train Acc: 0.4710
Validation --- Epoch: 69, total loss: 297.5078 , PPL: 1.2760, Acc: 0.4009
lr = 0.015625
Epoch: 70, Batch: 100, loss: 3.9344 , Train PPL: 1.0060, Train Acc: 0.4451


Validation --- Epoch: 81, total loss: 297.5801 , PPL: 1.2761, Acc: 0.4004
lr = 0.00390625
Epoch: 82, Batch: 100, loss: 4.4771 , Train PPL: 1.0068, Train Acc: 0.3460
Epoch: 82, Batch: 200, loss: 3.8507 , Train PPL: 1.0059, Train Acc: 0.4710
Epoch: 82, Batch: 300, loss: 4.3121 , Train PPL: 1.0066, Train Acc: 0.3994
Epoch: 82, Batch: 400, loss: 4.0952 , Train PPL: 1.0063, Train Acc: 0.4634
Epoch: 82, Batch: 500, loss: 4.0444 , Train PPL: 1.0062, Train Acc: 0.4482
Epoch: 82, Batch: 600, loss: 3.9428 , Train PPL: 1.0060, Train Acc: 0.4909
Epoch: 82, Batch: 700, loss: 4.0840 , Train PPL: 1.0062, Train Acc: 0.4177
Validation --- Epoch: 82, total loss: 297.5741 , PPL: 1.2761, Acc: 0.4004
lr = 0.00390625
Epoch: 83, Batch: 100, loss: 4.1454 , Train PPL: 1.0063, Train Acc: 0.4162
Epoch: 83, Batch: 200, loss: 4.1977 , Train PPL: 1.0064, Train Acc: 0.4451
Epoch: 83, Batch: 300, loss: 3.9777 , Train PPL: 1.0061, Train Acc: 0.4085
Epoch: 83, Batch: 400, loss: 3.6449 , Train PPL: 1.0056, Train Acc: 0.

Epoch: 95, Batch: 300, loss: 4.0985 , Train PPL: 1.0063, Train Acc: 0.4253
Epoch: 95, Batch: 400, loss: 4.3239 , Train PPL: 1.0066, Train Acc: 0.3521
Epoch: 95, Batch: 500, loss: 3.7890 , Train PPL: 1.0058, Train Acc: 0.4680
Epoch: 95, Batch: 600, loss: 4.0826 , Train PPL: 1.0062, Train Acc: 0.4360
Epoch: 95, Batch: 700, loss: 4.2292 , Train PPL: 1.0065, Train Acc: 0.3811
Validation --- Epoch: 95, total loss: 297.5898 , PPL: 1.2761, Acc: 0.4003
lr = 0.0009765625
Epoch: 96, Batch: 100, loss: 4.2725 , Train PPL: 1.0065, Train Acc: 0.4055
Epoch: 96, Batch: 200, loss: 4.3587 , Train PPL: 1.0067, Train Acc: 0.3613
Epoch: 96, Batch: 300, loss: 4.2635 , Train PPL: 1.0065, Train Acc: 0.3918
Epoch: 96, Batch: 400, loss: 4.0100 , Train PPL: 1.0061, Train Acc: 0.4482
Epoch: 96, Batch: 500, loss: 4.0943 , Train PPL: 1.0063, Train Acc: 0.4466
Epoch: 96, Batch: 600, loss: 4.0524 , Train PPL: 1.0062, Train Acc: 0.4405
Epoch: 96, Batch: 700, loss: 4.2891 , Train PPL: 1.0066, Train Acc: 0.3643
Validati

In [52]:
model = TCN(4, [600,600,600], kernel=3, dropout=0.5, embedding_size = 600, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_4_layers_k3.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 12497801 parameters
Receptive field of network is 62
Epoch: 0, Batch: 100, loss: 6.6985 , Train PPL: 1.0103, Train Acc: 0.1570
Epoch: 0, Batch: 200, loss: 6.2922 , Train PPL: 1.0096, Train Acc: 0.1601
Epoch: 0, Batch: 300, loss: 6.3089 , Train PPL: 1.0097, Train Acc: 0.1829
Epoch: 0, Batch: 400, loss: 6.2825 , Train PPL: 1.0096, Train Acc: 0.2180
Epoch: 0, Batch: 500, loss: 5.9894 , Train PPL: 1.0092, Train Acc: 0.2226
Epoch: 0, Batch: 600, loss: 5.4370 , Train PPL: 1.0083, Train Acc: 0.3003
Epoch: 0, Batch: 700, loss: 5.8265 , Train PPL: 1.0089, Train Acc: 0.2866
Validation --- Epoch: 0, total loss: 332.0558 , PPL: 1.3110, Acc: 0.2905
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 6.1590 , Train PPL: 1.0094, Train Acc: 0.2424
Epoch: 1, Batch: 200, loss: 5.8487 , Train PPL: 1.0090, Train Acc: 0.2271
Epoch: 1, Batch: 300, loss: 5.8331 , Train PPL: 1.0089, Train Acc: 0.2591
Epoch: 1, Batch: 400, loss: 5.5738 , Train PPL: 1.0085, Train Acc: 0.2759
Epoch: 1, Batch: 500, loss: 5.5675 , Train PPL: 1.0085, Train Acc: 0.3430
Epoch: 1, Batch: 600, loss: 6.0445 , Train PPL: 1.0093, Train Acc: 0.2348
Epoch: 1, Batch: 700, loss: 5.8154 , Train PPL: 1.0089, Train Acc: 0.2424
Validation --- Epoch: 1, total loss: 320.2897 , PPL: 1.2984, Acc: 0.3085
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.4596 , Train PPL: 1.0084, Train Acc: 0.2942
Epoch: 2, Batch: 200, loss: 5.8151 , Train PPL: 1.0089, Train Acc: 0.2591
Epoch: 2, Batch: 300, loss: 5.8196 , Train PPL: 1.0089, Train Acc: 0.2607
Epoch: 2, Batch: 400, loss: 5.7728 , Train PPL: 1.0088, Train Acc: 0.2729
Epoch: 2, Batch: 500, loss: 5.5933 , Train PPL: 1.0086, Train Acc: 0.2835
Epoch: 2, Batch: 600

Epoch: 14, Batch: 500, loss: 4.9122 , Train PPL: 1.0075, Train Acc: 0.3354
Epoch: 14, Batch: 600, loss: 4.7655 , Train PPL: 1.0073, Train Acc: 0.3674
Epoch: 14, Batch: 700, loss: 4.8263 , Train PPL: 1.0074, Train Acc: 0.3643
Validation --- Epoch: 14, total loss: 303.7874 , PPL: 1.2823, Acc: 0.3128
lr = 4
Epoch: 15, Batch: 100, loss: 4.8351 , Train PPL: 1.0074, Train Acc: 0.3079
Epoch: 15, Batch: 200, loss: 4.7545 , Train PPL: 1.0073, Train Acc: 0.3933
Epoch: 15, Batch: 300, loss: 5.0739 , Train PPL: 1.0078, Train Acc: 0.3460
Epoch: 15, Batch: 400, loss: 5.0020 , Train PPL: 1.0077, Train Acc: 0.3003
Epoch: 15, Batch: 500, loss: 4.8689 , Train PPL: 1.0074, Train Acc: 0.3186
Epoch: 15, Batch: 600, loss: 5.0323 , Train PPL: 1.0077, Train Acc: 0.2790
Epoch: 15, Batch: 700, loss: 4.6444 , Train PPL: 1.0071, Train Acc: 0.4146
Validation --- Epoch: 15, total loss: 304.2900 , PPL: 1.2829, Acc: 0.3095
lr = 4
Epoch: 16, Batch: 100, loss: 4.5593 , Train PPL: 1.0070, Train Acc: 0.4284
Epoch: 16, Ba

Epoch: 28, Batch: 100, loss: 4.3584 , Train PPL: 1.0067, Train Acc: 0.3857
Epoch: 28, Batch: 200, loss: 4.4723 , Train PPL: 1.0068, Train Acc: 0.3521
Epoch: 28, Batch: 300, loss: 4.4491 , Train PPL: 1.0068, Train Acc: 0.3582
Epoch: 28, Batch: 400, loss: 4.7258 , Train PPL: 1.0072, Train Acc: 0.3552
Epoch: 28, Batch: 500, loss: 4.7691 , Train PPL: 1.0073, Train Acc: 0.3247
Epoch: 28, Batch: 600, loss: 4.5074 , Train PPL: 1.0069, Train Acc: 0.3933
Epoch: 28, Batch: 700, loss: 4.7583 , Train PPL: 1.0073, Train Acc: 0.4101
Validation --- Epoch: 28, total loss: 301.6432 , PPL: 1.2804, Acc: 0.3406
lr = 4
Epoch: 29, Batch: 100, loss: 4.6565 , Train PPL: 1.0071, Train Acc: 0.3323
Epoch: 29, Batch: 200, loss: 4.3978 , Train PPL: 1.0067, Train Acc: 0.3735
Epoch: 29, Batch: 300, loss: 4.5735 , Train PPL: 1.0070, Train Acc: 0.3216
Epoch: 29, Batch: 400, loss: 4.6734 , Train PPL: 1.0071, Train Acc: 0.3247
Epoch: 29, Batch: 500, loss: 4.5150 , Train PPL: 1.0069, Train Acc: 0.4375
Epoch: 29, Batch: 6

Epoch: 41, Batch: 500, loss: 4.3623 , Train PPL: 1.0067, Train Acc: 0.4238
Epoch: 41, Batch: 600, loss: 4.3940 , Train PPL: 1.0067, Train Acc: 0.3689
Epoch: 41, Batch: 700, loss: 4.6392 , Train PPL: 1.0071, Train Acc: 0.3338
Validation --- Epoch: 41, total loss: 296.4175 , PPL: 1.2752, Acc: 0.3921
lr = 1.0
Epoch: 42, Batch: 100, loss: 4.3330 , Train PPL: 1.0066, Train Acc: 0.3780
Epoch: 42, Batch: 200, loss: 4.2500 , Train PPL: 1.0065, Train Acc: 0.4040
Epoch: 42, Batch: 300, loss: 3.5239 , Train PPL: 1.0054, Train Acc: 0.5503
Epoch: 42, Batch: 400, loss: 4.0119 , Train PPL: 1.0061, Train Acc: 0.4924
Epoch: 42, Batch: 500, loss: 4.3130 , Train PPL: 1.0066, Train Acc: 0.3857
Epoch: 42, Batch: 600, loss: 4.4436 , Train PPL: 1.0068, Train Acc: 0.3445
Epoch: 42, Batch: 700, loss: 4.0561 , Train PPL: 1.0062, Train Acc: 0.4192
Validation --- Epoch: 42, total loss: 296.4901 , PPL: 1.2751, Acc: 0.3884
lr = 1.0
Epoch: 43, Batch: 100, loss: 4.3157 , Train PPL: 1.0066, Train Acc: 0.3354
Epoch: 43

Epoch: 55, Batch: 100, loss: 4.1387 , Train PPL: 1.0063, Train Acc: 0.4726
Epoch: 55, Batch: 200, loss: 3.7647 , Train PPL: 1.0058, Train Acc: 0.4527
Epoch: 55, Batch: 300, loss: 4.1869 , Train PPL: 1.0064, Train Acc: 0.3659
Epoch: 55, Batch: 400, loss: 4.1730 , Train PPL: 1.0064, Train Acc: 0.4268
Epoch: 55, Batch: 500, loss: 4.3303 , Train PPL: 1.0066, Train Acc: 0.3918
Epoch: 55, Batch: 600, loss: 4.0653 , Train PPL: 1.0062, Train Acc: 0.4284
Epoch: 55, Batch: 700, loss: 4.3785 , Train PPL: 1.0067, Train Acc: 0.3460
Validation --- Epoch: 55, total loss: 297.3725 , PPL: 1.2763, Acc: 0.3994
lr = 0.25
Epoch: 56, Batch: 100, loss: 4.2284 , Train PPL: 1.0065, Train Acc: 0.3796
Epoch: 56, Batch: 200, loss: 4.0217 , Train PPL: 1.0061, Train Acc: 0.3857
Epoch: 56, Batch: 300, loss: 4.3579 , Train PPL: 1.0067, Train Acc: 0.3674
Epoch: 56, Batch: 400, loss: 3.9402 , Train PPL: 1.0060, Train Acc: 0.4238
Epoch: 56, Batch: 500, loss: 4.1254 , Train PPL: 1.0063, Train Acc: 0.4238
Epoch: 56, Batch

Epoch: 68, Batch: 500, loss: 4.3305 , Train PPL: 1.0066, Train Acc: 0.3643
Epoch: 68, Batch: 600, loss: 4.0233 , Train PPL: 1.0062, Train Acc: 0.4177
Epoch: 68, Batch: 700, loss: 3.7433 , Train PPL: 1.0057, Train Acc: 0.5396
Validation --- Epoch: 68, total loss: 297.6108 , PPL: 1.2765, Acc: 0.4002
lr = 0.0625
Epoch: 69, Batch: 100, loss: 3.9697 , Train PPL: 1.0061, Train Acc: 0.4421
Epoch: 69, Batch: 200, loss: 4.2754 , Train PPL: 1.0065, Train Acc: 0.3902
Epoch: 69, Batch: 300, loss: 3.9463 , Train PPL: 1.0060, Train Acc: 0.4284
Epoch: 69, Batch: 400, loss: 4.0461 , Train PPL: 1.0062, Train Acc: 0.3902
Epoch: 69, Batch: 500, loss: 4.4146 , Train PPL: 1.0068, Train Acc: 0.3963
Epoch: 69, Batch: 600, loss: 4.1797 , Train PPL: 1.0064, Train Acc: 0.3659
Epoch: 69, Batch: 700, loss: 3.8789 , Train PPL: 1.0059, Train Acc: 0.4787
Validation --- Epoch: 69, total loss: 297.6034 , PPL: 1.2766, Acc: 0.4010
lr = 0.0625
Epoch: 70, Batch: 100, loss: 4.2762 , Train PPL: 1.0065, Train Acc: 0.3735
Epo

Epoch: 82, Batch: 100, loss: 4.2764 , Train PPL: 1.0065, Train Acc: 0.3552
Epoch: 82, Batch: 200, loss: 3.8451 , Train PPL: 1.0059, Train Acc: 0.4451
Epoch: 82, Batch: 300, loss: 4.0798 , Train PPL: 1.0062, Train Acc: 0.3659
Epoch: 82, Batch: 400, loss: 3.9888 , Train PPL: 1.0061, Train Acc: 0.4726
Epoch: 82, Batch: 500, loss: 4.3177 , Train PPL: 1.0066, Train Acc: 0.4040
Epoch: 82, Batch: 600, loss: 3.9089 , Train PPL: 1.0060, Train Acc: 0.4329
Epoch: 82, Batch: 700, loss: 3.9852 , Train PPL: 1.0061, Train Acc: 0.4604
Validation --- Epoch: 82, total loss: 297.7132 , PPL: 1.2767, Acc: 0.4013
lr = 0.0078125
Epoch: 83, Batch: 100, loss: 4.0771 , Train PPL: 1.0062, Train Acc: 0.4162
Epoch: 83, Batch: 200, loss: 4.1362 , Train PPL: 1.0063, Train Acc: 0.3857
Epoch: 83, Batch: 300, loss: 3.9782 , Train PPL: 1.0061, Train Acc: 0.4482
Epoch: 83, Batch: 400, loss: 4.3317 , Train PPL: 1.0066, Train Acc: 0.3491
Epoch: 83, Batch: 500, loss: 4.1671 , Train PPL: 1.0064, Train Acc: 0.3872
Epoch: 83, 

Epoch: 95, Batch: 400, loss: 4.0616 , Train PPL: 1.0062, Train Acc: 0.4085
Epoch: 95, Batch: 500, loss: 3.8787 , Train PPL: 1.0059, Train Acc: 0.4131
Epoch: 95, Batch: 600, loss: 3.9578 , Train PPL: 1.0061, Train Acc: 0.4207
Epoch: 95, Batch: 700, loss: 4.0128 , Train PPL: 1.0061, Train Acc: 0.4497
Validation --- Epoch: 95, total loss: 297.7440 , PPL: 1.2767, Acc: 0.4005
lr = 0.001953125
Epoch: 96, Batch: 100, loss: 4.3132 , Train PPL: 1.0066, Train Acc: 0.4131
Epoch: 96, Batch: 200, loss: 4.0490 , Train PPL: 1.0062, Train Acc: 0.4146
Epoch: 96, Batch: 300, loss: 3.9636 , Train PPL: 1.0061, Train Acc: 0.4588
Epoch: 96, Batch: 400, loss: 4.1525 , Train PPL: 1.0064, Train Acc: 0.3979
Epoch: 96, Batch: 500, loss: 4.2393 , Train PPL: 1.0065, Train Acc: 0.3720
Epoch: 96, Batch: 600, loss: 4.1694 , Train PPL: 1.0064, Train Acc: 0.3765
Epoch: 96, Batch: 700, loss: 4.2927 , Train PPL: 1.0066, Train Acc: 0.3857
Validation --- Epoch: 96, total loss: 297.7403 , PPL: 1.2767, Acc: 0.4004
lr = 0.001

In [53]:
model = TCN(4, [300,300,300], kernel=3, dropout=0.5, embedding_size = 300, n_words = n_words,tied=True,embedding=word2vec)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_4_layers_k3_word2vec.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 4633901 parameters
Receptive field of network is 62
Epoch: 0, Batch: 100, loss: 6.6709 , Train PPL: 1.0102, Train Acc: 0.1524
Epoch: 0, Batch: 200, loss: 6.2315 , Train PPL: 1.0095, Train Acc: 0.1936
Epoch: 0, Batch: 300, loss: 6.3242 , Train PPL: 1.0097, Train Acc: 0.1966
Epoch: 0, Batch: 400, loss: 6.3010 , Train PPL: 1.0097, Train Acc: 0.2454
Epoch: 0, Batch: 500, loss: 6.3201 , Train PPL: 1.0097, Train Acc: 0.2149
Epoch: 0, Batch: 600, loss: 5.9883 , Train PPL: 1.0092, Train Acc: 0.2287
Epoch: 0, Batch: 700, loss: 5.9589 , Train PPL: 1.0091, Train Acc: 0.2439
Validation --- Epoch: 0, total loss: 334.4513 , PPL: 1.3136, Acc: 0.2757
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 6.0809 , Train PPL: 1.0093, Train Acc: 0.2622
Epoch: 1, Batch: 200, loss: 6.0547 , Train PPL: 1.0093, Train Acc: 0.2591
Epoch: 1, Batch: 300, loss: 5.8774 , Train PPL: 1.0090, Train Acc: 0.2942
Epoch: 1, Batch: 400, loss: 5.9999 , Train PPL: 1.0092, Train Acc: 0.2180
Epoch: 1, Batch: 500, loss: 5.5050 , Train PPL: 1.0084, Train Acc: 0.3277
Epoch: 1, Batch: 600, loss: 5.3919 , Train PPL: 1.0083, Train Acc: 0.2927
Epoch: 1, Batch: 700, loss: 5.9243 , Train PPL: 1.0091, Train Acc: 0.2713
Validation --- Epoch: 1, total loss: 324.7977 , PPL: 1.3034, Acc: 0.2864
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.5558 , Train PPL: 1.0085, Train Acc: 0.2591
Epoch: 2, Batch: 200, loss: 5.9567 , Train PPL: 1.0091, Train Acc: 0.2637
Epoch: 2, Batch: 300, loss: 5.7342 , Train PPL: 1.0088, Train Acc: 0.2515
Epoch: 2, Batch: 400, loss: 5.0695 , Train PPL: 1.0078, Train Acc: 0.3887
Epoch: 2, Batch: 500, loss: 5.5945 , Train PPL: 1.0086, Train Acc: 0.3034
Epoch: 2, Batch: 600

Epoch: 14, Batch: 600, loss: 5.0966 , Train PPL: 1.0078, Train Acc: 0.3034
Epoch: 14, Batch: 700, loss: 5.3374 , Train PPL: 1.0082, Train Acc: 0.3110
Validation --- Epoch: 14, total loss: 315.6962 , PPL: 1.2945, Acc: 0.2422
lr = 4
Epoch: 15, Batch: 100, loss: 4.8201 , Train PPL: 1.0074, Train Acc: 0.3872
Epoch: 15, Batch: 200, loss: 5.0907 , Train PPL: 1.0078, Train Acc: 0.3155
Epoch: 15, Batch: 300, loss: 4.9318 , Train PPL: 1.0075, Train Acc: 0.3735
Epoch: 15, Batch: 400, loss: 5.0245 , Train PPL: 1.0077, Train Acc: 0.2957
Epoch: 15, Batch: 500, loss: 5.0694 , Train PPL: 1.0078, Train Acc: 0.3399
Epoch: 15, Batch: 600, loss: 4.9473 , Train PPL: 1.0076, Train Acc: 0.3506
Epoch: 15, Batch: 700, loss: 4.9373 , Train PPL: 1.0076, Train Acc: 0.3277
Validation --- Epoch: 15, total loss: 312.8839 , PPL: 1.2917, Acc: 0.2695
lr = 2.0
Epoch: 16, Batch: 100, loss: 4.9912 , Train PPL: 1.0076, Train Acc: 0.3140
Epoch: 16, Batch: 200, loss: 4.8764 , Train PPL: 1.0075, Train Acc: 0.3506
Epoch: 16, 

Epoch: 28, Batch: 200, loss: 4.9484 , Train PPL: 1.0076, Train Acc: 0.3902
Epoch: 28, Batch: 300, loss: 4.8479 , Train PPL: 1.0074, Train Acc: 0.3216
Epoch: 28, Batch: 400, loss: 4.6737 , Train PPL: 1.0071, Train Acc: 0.3811
Epoch: 28, Batch: 500, loss: 4.8912 , Train PPL: 1.0075, Train Acc: 0.3140
Epoch: 28, Batch: 600, loss: 4.8870 , Train PPL: 1.0075, Train Acc: 0.3369
Epoch: 28, Batch: 700, loss: 4.4344 , Train PPL: 1.0068, Train Acc: 0.3887
Validation --- Epoch: 28, total loss: 305.1396 , PPL: 1.2836, Acc: 0.3147
lr = 2.0
Epoch: 29, Batch: 100, loss: 4.6747 , Train PPL: 1.0072, Train Acc: 0.3979
Epoch: 29, Batch: 200, loss: 4.7686 , Train PPL: 1.0073, Train Acc: 0.3430
Epoch: 29, Batch: 300, loss: 4.8959 , Train PPL: 1.0075, Train Acc: 0.3308
Epoch: 29, Batch: 400, loss: 4.8978 , Train PPL: 1.0075, Train Acc: 0.3689
Epoch: 29, Batch: 500, loss: 4.7492 , Train PPL: 1.0073, Train Acc: 0.3537
Epoch: 29, Batch: 600, loss: 4.6855 , Train PPL: 1.0072, Train Acc: 0.3537
Epoch: 29, Batch:

Epoch: 41, Batch: 500, loss: 4.8474 , Train PPL: 1.0074, Train Acc: 0.3994
Epoch: 41, Batch: 600, loss: 4.5539 , Train PPL: 1.0070, Train Acc: 0.3689
Epoch: 41, Batch: 700, loss: 4.5167 , Train PPL: 1.0069, Train Acc: 0.4253
Validation --- Epoch: 41, total loss: 300.5365 , PPL: 1.2788, Acc: 0.3643
lr = 1.0
wrote model
Epoch: 42, Batch: 100, loss: 4.8114 , Train PPL: 1.0074, Train Acc: 0.3003
Epoch: 42, Batch: 200, loss: 4.6496 , Train PPL: 1.0071, Train Acc: 0.3811
Epoch: 42, Batch: 300, loss: 4.7148 , Train PPL: 1.0072, Train Acc: 0.3613
Epoch: 42, Batch: 400, loss: 4.4732 , Train PPL: 1.0068, Train Acc: 0.4177
Epoch: 42, Batch: 500, loss: 4.4821 , Train PPL: 1.0069, Train Acc: 0.4345
Epoch: 42, Batch: 600, loss: 4.3064 , Train PPL: 1.0066, Train Acc: 0.4024
Epoch: 42, Batch: 700, loss: 4.8553 , Train PPL: 1.0074, Train Acc: 0.3430
Validation --- Epoch: 42, total loss: 300.8099 , PPL: 1.2792, Acc: 0.3675
lr = 1.0
Epoch: 43, Batch: 100, loss: 4.5383 , Train PPL: 1.0069, Train Acc: 0.37

Epoch: 55, Batch: 100, loss: 4.1787 , Train PPL: 1.0064, Train Acc: 0.4360
Epoch: 55, Batch: 200, loss: 4.4781 , Train PPL: 1.0068, Train Acc: 0.3659
Epoch: 55, Batch: 300, loss: 4.2238 , Train PPL: 1.0065, Train Acc: 0.4466
Epoch: 55, Batch: 400, loss: 4.4778 , Train PPL: 1.0068, Train Acc: 0.4101
Epoch: 55, Batch: 500, loss: 4.7884 , Train PPL: 1.0073, Train Acc: 0.3186
Epoch: 55, Batch: 600, loss: 4.4560 , Train PPL: 1.0068, Train Acc: 0.3780
Epoch: 55, Batch: 700, loss: 4.4922 , Train PPL: 1.0069, Train Acc: 0.3613
Validation --- Epoch: 55, total loss: 299.8455 , PPL: 1.2783, Acc: 0.3739
lr = 0.5
Epoch: 56, Batch: 100, loss: 4.6940 , Train PPL: 1.0072, Train Acc: 0.3476
Epoch: 56, Batch: 200, loss: 4.5090 , Train PPL: 1.0069, Train Acc: 0.3796
Epoch: 56, Batch: 300, loss: 4.1974 , Train PPL: 1.0064, Train Acc: 0.4253
Epoch: 56, Batch: 400, loss: 4.5920 , Train PPL: 1.0070, Train Acc: 0.3430
Epoch: 56, Batch: 500, loss: 4.6046 , Train PPL: 1.0070, Train Acc: 0.4070
Epoch: 56, Batch:

Epoch: 68, Batch: 500, loss: 4.7121 , Train PPL: 1.0072, Train Acc: 0.3674
Epoch: 68, Batch: 600, loss: 4.5101 , Train PPL: 1.0069, Train Acc: 0.3857
Epoch: 68, Batch: 700, loss: 4.7485 , Train PPL: 1.0073, Train Acc: 0.3323
Validation --- Epoch: 68, total loss: 299.7613 , PPL: 1.2782, Acc: 0.3790
lr = 0.25
Epoch: 69, Batch: 100, loss: 4.3615 , Train PPL: 1.0067, Train Acc: 0.3918
Epoch: 69, Batch: 200, loss: 4.5387 , Train PPL: 1.0069, Train Acc: 0.4268
Epoch: 69, Batch: 300, loss: 4.7952 , Train PPL: 1.0073, Train Acc: 0.3582
Epoch: 69, Batch: 400, loss: 4.5317 , Train PPL: 1.0069, Train Acc: 0.3720
Epoch: 69, Batch: 500, loss: 4.3588 , Train PPL: 1.0067, Train Acc: 0.3613
Epoch: 69, Batch: 600, loss: 4.6314 , Train PPL: 1.0071, Train Acc: 0.3613
Epoch: 69, Batch: 700, loss: 4.2725 , Train PPL: 1.0065, Train Acc: 0.4527
Validation --- Epoch: 69, total loss: 299.6820 , PPL: 1.2781, Acc: 0.3791
lr = 0.125
Epoch: 70, Batch: 100, loss: 4.1741 , Train PPL: 1.0064, Train Acc: 0.4741
Epoch:

Epoch: 82, Batch: 100, loss: 4.3462 , Train PPL: 1.0066, Train Acc: 0.4009
Epoch: 82, Batch: 200, loss: 4.6456 , Train PPL: 1.0071, Train Acc: 0.3780
Epoch: 82, Batch: 300, loss: 4.6252 , Train PPL: 1.0071, Train Acc: 0.3552
Epoch: 82, Batch: 400, loss: 4.4814 , Train PPL: 1.0069, Train Acc: 0.3750
Epoch: 82, Batch: 500, loss: 4.3755 , Train PPL: 1.0067, Train Acc: 0.3872
Epoch: 82, Batch: 600, loss: 4.4722 , Train PPL: 1.0068, Train Acc: 0.3796
Epoch: 82, Batch: 700, loss: 4.3463 , Train PPL: 1.0066, Train Acc: 0.4268
Validation --- Epoch: 82, total loss: 299.7017 , PPL: 1.2781, Acc: 0.3807
lr = 0.03125
Epoch: 83, Batch: 100, loss: 4.5346 , Train PPL: 1.0069, Train Acc: 0.4268
Epoch: 83, Batch: 200, loss: 4.4275 , Train PPL: 1.0068, Train Acc: 0.3628
Epoch: 83, Batch: 300, loss: 4.4590 , Train PPL: 1.0068, Train Acc: 0.4040
Epoch: 83, Batch: 400, loss: 4.1500 , Train PPL: 1.0063, Train Acc: 0.4893
Epoch: 83, Batch: 500, loss: 4.4660 , Train PPL: 1.0068, Train Acc: 0.3354
Epoch: 83, Ba

Epoch: 95, Batch: 500, loss: 4.4364 , Train PPL: 1.0068, Train Acc: 0.3979
Epoch: 95, Batch: 600, loss: 4.4400 , Train PPL: 1.0068, Train Acc: 0.3689
Epoch: 95, Batch: 700, loss: 4.5127 , Train PPL: 1.0069, Train Acc: 0.3780
Validation --- Epoch: 95, total loss: 299.6606 , PPL: 1.2781, Acc: 0.3813
lr = 0.0078125
Epoch: 96, Batch: 100, loss: 4.7801 , Train PPL: 1.0073, Train Acc: 0.3140
Epoch: 96, Batch: 200, loss: 4.4531 , Train PPL: 1.0068, Train Acc: 0.4070
Epoch: 96, Batch: 300, loss: 4.6568 , Train PPL: 1.0071, Train Acc: 0.3506
Epoch: 96, Batch: 400, loss: 4.5526 , Train PPL: 1.0070, Train Acc: 0.3476
Epoch: 96, Batch: 500, loss: 4.5860 , Train PPL: 1.0070, Train Acc: 0.3674
Epoch: 96, Batch: 600, loss: 4.2010 , Train PPL: 1.0064, Train Acc: 0.3643
Epoch: 96, Batch: 700, loss: 4.2710 , Train PPL: 1.0065, Train Acc: 0.4268
Validation --- Epoch: 96, total loss: 299.6624 , PPL: 1.2781, Acc: 0.3813
lr = 0.0078125
Epoch: 97, Batch: 100, loss: 4.6833 , Train PPL: 1.0072, Train Acc: 0.35

In [54]:
model = TCN(4, [600,600,600], kernel=3, dropout=0.45, embedding_size = 600, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_4_layers_k3_600_filters_2.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 12497801 parameters
Receptive field of network is 62
Epoch: 0, Batch: 100, loss: 6.5419 , Train PPL: 1.0100, Train Acc: 0.1540
Epoch: 0, Batch: 200, loss: 6.3844 , Train PPL: 1.0098, Train Acc: 0.1936
Epoch: 0, Batch: 300, loss: 6.2338 , Train PPL: 1.0095, Train Acc: 0.2073
Epoch: 0, Batch: 400, loss: 6.0998 , Train PPL: 1.0093, Train Acc: 0.2363
Epoch: 0, Batch: 500, loss: 6.2956 , Train PPL: 1.0096, Train Acc: 0.2210
Epoch: 0, Batch: 600, loss: 6.0092 , Train PPL: 1.0092, Train Acc: 0.2561
Epoch: 0, Batch: 700, loss: 6.1426 , Train PPL: 1.0094, Train Acc: 0.2378
Validation --- Epoch: 0, total loss: 330.8627 , PPL: 1.3098, Acc: 0.2785
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 5.9273 , Train PPL: 1.0091, Train Acc: 0.2378
Epoch: 1, Batch: 200, loss: 5.8736 , Train PPL: 1.0090, Train Acc: 0.2622
Epoch: 1, Batch: 300, loss: 5.9605 , Train PPL: 1.0091, Train Acc: 0.2378
Epoch: 1, Batch: 400, loss: 5.9158 , Train PPL: 1.0091, Train Acc: 0.2515
Epoch: 1, Batch: 500, loss: 5.9485 , Train PPL: 1.0091, Train Acc: 0.3003
Epoch: 1, Batch: 600, loss: 5.3175 , Train PPL: 1.0081, Train Acc: 0.3643
Epoch: 1, Batch: 700, loss: 5.7331 , Train PPL: 1.0088, Train Acc: 0.2866
Validation --- Epoch: 1, total loss: 319.0126 , PPL: 1.2970, Acc: 0.3228
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.6604 , Train PPL: 1.0087, Train Acc: 0.2820
Epoch: 2, Batch: 200, loss: 5.8650 , Train PPL: 1.0090, Train Acc: 0.2957
Epoch: 2, Batch: 300, loss: 5.6612 , Train PPL: 1.0087, Train Acc: 0.2866
Epoch: 2, Batch: 400, loss: 5.0549 , Train PPL: 1.0077, Train Acc: 0.3354
Epoch: 2, Batch: 500, loss: 5.9892 , Train PPL: 1.0092, Train Acc: 0.2652
Epoch: 2, Batch: 600

Epoch: 14, Batch: 500, loss: 4.8003 , Train PPL: 1.0073, Train Acc: 0.3902
Epoch: 14, Batch: 600, loss: 4.6251 , Train PPL: 1.0071, Train Acc: 0.3247
Epoch: 14, Batch: 700, loss: 4.7220 , Train PPL: 1.0072, Train Acc: 0.3476
Validation --- Epoch: 14, total loss: 300.3836 , PPL: 1.2787, Acc: 0.3494
lr = 4
wrote model
Epoch: 15, Batch: 100, loss: 4.3844 , Train PPL: 1.0067, Train Acc: 0.3872
Epoch: 15, Batch: 200, loss: 4.9037 , Train PPL: 1.0075, Train Acc: 0.3476
Epoch: 15, Batch: 300, loss: 4.4512 , Train PPL: 1.0068, Train Acc: 0.4345
Epoch: 15, Batch: 400, loss: 4.3315 , Train PPL: 1.0066, Train Acc: 0.3826
Epoch: 15, Batch: 500, loss: 4.9630 , Train PPL: 1.0076, Train Acc: 0.3338
Epoch: 15, Batch: 600, loss: 5.0294 , Train PPL: 1.0077, Train Acc: 0.3232
Epoch: 15, Batch: 700, loss: 4.6638 , Train PPL: 1.0071, Train Acc: 0.3735
Validation --- Epoch: 15, total loss: 299.2794 , PPL: 1.2775, Acc: 0.3477
lr = 4
wrote model
Epoch: 16, Batch: 100, loss: 4.1521 , Train PPL: 1.0063, Train A

Epoch: 28, Batch: 100, loss: 4.3928 , Train PPL: 1.0067, Train Acc: 0.3979
Epoch: 28, Batch: 200, loss: 3.8187 , Train PPL: 1.0058, Train Acc: 0.4832
Epoch: 28, Batch: 300, loss: 4.5811 , Train PPL: 1.0070, Train Acc: 0.3430
Epoch: 28, Batch: 400, loss: 4.4839 , Train PPL: 1.0069, Train Acc: 0.3262
Epoch: 28, Batch: 500, loss: 4.4560 , Train PPL: 1.0068, Train Acc: 0.3567
Epoch: 28, Batch: 600, loss: 4.3144 , Train PPL: 1.0066, Train Acc: 0.4116
Epoch: 28, Batch: 700, loss: 4.4816 , Train PPL: 1.0069, Train Acc: 0.3369
Validation --- Epoch: 28, total loss: 296.0933 , PPL: 1.2748, Acc: 0.3880
lr = 2.0
Epoch: 29, Batch: 100, loss: 4.1156 , Train PPL: 1.0063, Train Acc: 0.4665
Epoch: 29, Batch: 200, loss: 4.0788 , Train PPL: 1.0062, Train Acc: 0.4070
Epoch: 29, Batch: 300, loss: 4.5404 , Train PPL: 1.0069, Train Acc: 0.3780
Epoch: 29, Batch: 400, loss: 4.3861 , Train PPL: 1.0067, Train Acc: 0.3598
Epoch: 29, Batch: 500, loss: 4.3156 , Train PPL: 1.0066, Train Acc: 0.3948
Epoch: 29, Batch:

Epoch: 41, Batch: 500, loss: 4.1016 , Train PPL: 1.0063, Train Acc: 0.4360
Epoch: 41, Batch: 600, loss: 3.9855 , Train PPL: 1.0061, Train Acc: 0.4558
Epoch: 41, Batch: 700, loss: 3.7811 , Train PPL: 1.0058, Train Acc: 0.4893
Validation --- Epoch: 41, total loss: 296.9861 , PPL: 1.2759, Acc: 0.3988
lr = 0.5
Epoch: 42, Batch: 100, loss: 4.1104 , Train PPL: 1.0063, Train Acc: 0.3765
Epoch: 42, Batch: 200, loss: 4.0471 , Train PPL: 1.0062, Train Acc: 0.4527
Epoch: 42, Batch: 300, loss: 3.9806 , Train PPL: 1.0061, Train Acc: 0.4909
Epoch: 42, Batch: 400, loss: 4.1736 , Train PPL: 1.0064, Train Acc: 0.4024
Epoch: 42, Batch: 500, loss: 3.9789 , Train PPL: 1.0061, Train Acc: 0.3979
Epoch: 42, Batch: 600, loss: 3.9221 , Train PPL: 1.0060, Train Acc: 0.4787
Epoch: 42, Batch: 700, loss: 4.0533 , Train PPL: 1.0062, Train Acc: 0.4405
Validation --- Epoch: 42, total loss: 296.9651 , PPL: 1.2759, Acc: 0.3999
lr = 0.25
Epoch: 43, Batch: 100, loss: 4.3854 , Train PPL: 1.0067, Train Acc: 0.3552
Epoch: 4

Epoch: 55, Batch: 100, loss: 3.8615 , Train PPL: 1.0059, Train Acc: 0.4451
Epoch: 55, Batch: 200, loss: 4.0147 , Train PPL: 1.0061, Train Acc: 0.4238
Epoch: 55, Batch: 300, loss: 4.1914 , Train PPL: 1.0064, Train Acc: 0.3841
Epoch: 55, Batch: 400, loss: 4.0870 , Train PPL: 1.0062, Train Acc: 0.3963
Epoch: 55, Batch: 500, loss: 3.9647 , Train PPL: 1.0061, Train Acc: 0.3933
Epoch: 55, Batch: 600, loss: 4.0429 , Train PPL: 1.0062, Train Acc: 0.4558
Epoch: 55, Batch: 700, loss: 4.0272 , Train PPL: 1.0062, Train Acc: 0.4055
Validation --- Epoch: 55, total loss: 297.4913 , PPL: 1.2765, Acc: 0.4039
lr = 0.0625
Epoch: 56, Batch: 100, loss: 4.4091 , Train PPL: 1.0067, Train Acc: 0.3857
Epoch: 56, Batch: 200, loss: 4.2524 , Train PPL: 1.0065, Train Acc: 0.3659
Epoch: 56, Batch: 300, loss: 3.7303 , Train PPL: 1.0057, Train Acc: 0.4375
Epoch: 56, Batch: 400, loss: 3.9027 , Train PPL: 1.0060, Train Acc: 0.4436
Epoch: 56, Batch: 500, loss: 3.8428 , Train PPL: 1.0059, Train Acc: 0.4451
Epoch: 56, Bat

Epoch: 68, Batch: 500, loss: 4.1065 , Train PPL: 1.0063, Train Acc: 0.4466
Epoch: 68, Batch: 600, loss: 4.1738 , Train PPL: 1.0064, Train Acc: 0.4345
Epoch: 68, Batch: 700, loss: 4.2148 , Train PPL: 1.0064, Train Acc: 0.3902
Validation --- Epoch: 68, total loss: 297.7128 , PPL: 1.2767, Acc: 0.4044
lr = 0.015625
Epoch: 69, Batch: 100, loss: 4.0469 , Train PPL: 1.0062, Train Acc: 0.4146
Epoch: 69, Batch: 200, loss: 4.2358 , Train PPL: 1.0065, Train Acc: 0.3628
Epoch: 69, Batch: 300, loss: 3.9074 , Train PPL: 1.0060, Train Acc: 0.4314
Epoch: 69, Batch: 400, loss: 3.8072 , Train PPL: 1.0058, Train Acc: 0.4665
Epoch: 69, Batch: 500, loss: 4.1199 , Train PPL: 1.0063, Train Acc: 0.4375
Epoch: 69, Batch: 600, loss: 3.9615 , Train PPL: 1.0061, Train Acc: 0.4573
Epoch: 69, Batch: 700, loss: 4.0833 , Train PPL: 1.0062, Train Acc: 0.4466
Validation --- Epoch: 69, total loss: 297.7010 , PPL: 1.2767, Acc: 0.4043
lr = 0.015625
Epoch: 70, Batch: 100, loss: 3.9558 , Train PPL: 1.0060, Train Acc: 0.4314

Validation --- Epoch: 81, total loss: 297.7574 , PPL: 1.2768, Acc: 0.4043
lr = 0.00390625
Epoch: 82, Batch: 100, loss: 4.3383 , Train PPL: 1.0066, Train Acc: 0.3780
Epoch: 82, Batch: 200, loss: 4.0229 , Train PPL: 1.0062, Train Acc: 0.3979
Epoch: 82, Batch: 300, loss: 3.6791 , Train PPL: 1.0056, Train Acc: 0.5107
Epoch: 82, Batch: 400, loss: 4.0946 , Train PPL: 1.0063, Train Acc: 0.4085
Epoch: 82, Batch: 500, loss: 4.1215 , Train PPL: 1.0063, Train Acc: 0.3780
Epoch: 82, Batch: 600, loss: 4.1194 , Train PPL: 1.0063, Train Acc: 0.3552
Epoch: 82, Batch: 700, loss: 3.9657 , Train PPL: 1.0061, Train Acc: 0.4375
Validation --- Epoch: 82, total loss: 297.7550 , PPL: 1.2768, Acc: 0.4044
lr = 0.00390625
Epoch: 83, Batch: 100, loss: 4.3565 , Train PPL: 1.0067, Train Acc: 0.3857
Epoch: 83, Batch: 200, loss: 4.1539 , Train PPL: 1.0064, Train Acc: 0.3979
Epoch: 83, Batch: 300, loss: 3.5311 , Train PPL: 1.0054, Train Acc: 0.5274
Epoch: 83, Batch: 400, loss: 4.2498 , Train PPL: 1.0065, Train Acc: 0.

Epoch: 95, Batch: 300, loss: 4.1071 , Train PPL: 1.0063, Train Acc: 0.3491
Epoch: 95, Batch: 400, loss: 4.0860 , Train PPL: 1.0062, Train Acc: 0.4192
Epoch: 95, Batch: 500, loss: 4.0520 , Train PPL: 1.0062, Train Acc: 0.4421
Epoch: 95, Batch: 600, loss: 4.2026 , Train PPL: 1.0064, Train Acc: 0.3811
Epoch: 95, Batch: 700, loss: 4.2060 , Train PPL: 1.0064, Train Acc: 0.3704
Validation --- Epoch: 95, total loss: 297.7622 , PPL: 1.2768, Acc: 0.4044
lr = 0.0009765625
Epoch: 96, Batch: 100, loss: 4.3149 , Train PPL: 1.0066, Train Acc: 0.3841
Epoch: 96, Batch: 200, loss: 3.9771 , Train PPL: 1.0061, Train Acc: 0.4954
Epoch: 96, Batch: 300, loss: 3.9621 , Train PPL: 1.0061, Train Acc: 0.4238
Epoch: 96, Batch: 400, loss: 3.9832 , Train PPL: 1.0061, Train Acc: 0.4436
Epoch: 96, Batch: 500, loss: 4.3112 , Train PPL: 1.0066, Train Acc: 0.3765
Epoch: 96, Batch: 600, loss: 4.0550 , Train PPL: 1.0062, Train Acc: 0.4375
Epoch: 96, Batch: 700, loss: 4.0768 , Train PPL: 1.0062, Train Acc: 0.4451
Validati

In [55]:
model = TCN(4, [1200,1200,1200], kernel=3, dropout=0.45, embedding_size = 1200, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_4_layers_k3_1200_filters_2.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 37945601 parameters
Receptive field of network is 62
Epoch: 0, Batch: 100, loss: 6.6188 , Train PPL: 1.0101, Train Acc: 0.1280
Epoch: 0, Batch: 200, loss: 6.3271 , Train PPL: 1.0097, Train Acc: 0.1723
Epoch: 0, Batch: 300, loss: 6.0799 , Train PPL: 1.0093, Train Acc: 0.2424
Epoch: 0, Batch: 400, loss: 5.8677 , Train PPL: 1.0090, Train Acc: 0.2500
Epoch: 0, Batch: 500, loss: 6.1308 , Train PPL: 1.0094, Train Acc: 0.2485
Epoch: 0, Batch: 600, loss: 5.6975 , Train PPL: 1.0087, Train Acc: 0.2866
Epoch: 0, Batch: 700, loss: 5.9261 , Train PPL: 1.0091, Train Acc: 0.2820
Validation --- Epoch: 0, total loss: 328.5562 , PPL: 1.3073, Acc: 0.3034
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 5.5057 , Train PPL: 1.0084, Train Acc: 0.2896
Epoch: 1, Batch: 200, loss: 5.8249 , Train PPL: 1.0089, Train Acc: 0.2439
Epoch: 1, Batch: 300, loss: 6.0073 , Train PPL: 1.0092, Train Acc: 0.2530
Epoch: 1, Batch: 400, loss: 5.5218 , Train PPL: 1.0085, Train Acc: 0.2652
Epoch: 1, Batch: 500, loss: 5.7056 , Train PPL: 1.0087, Train Acc: 0.3003
Epoch: 1, Batch: 600, loss: 5.6646 , Train PPL: 1.0087, Train Acc: 0.2515
Epoch: 1, Batch: 700, loss: 5.5139 , Train PPL: 1.0084, Train Acc: 0.2957
Validation --- Epoch: 1, total loss: 317.9680 , PPL: 1.2961, Acc: 0.3198
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.6628 , Train PPL: 1.0087, Train Acc: 0.2896
Epoch: 2, Batch: 200, loss: 5.2338 , Train PPL: 1.0080, Train Acc: 0.3369
Epoch: 2, Batch: 300, loss: 5.1300 , Train PPL: 1.0079, Train Acc: 0.3506
Epoch: 2, Batch: 400, loss: 5.4361 , Train PPL: 1.0083, Train Acc: 0.2973
Epoch: 2, Batch: 500, loss: 5.4712 , Train PPL: 1.0084, Train Acc: 0.3415
Epoch: 2, Batch: 600

Epoch: 14, Batch: 500, loss: 4.3117 , Train PPL: 1.0066, Train Acc: 0.4451
Epoch: 14, Batch: 600, loss: 4.6746 , Train PPL: 1.0072, Train Acc: 0.3399
Epoch: 14, Batch: 700, loss: 4.7082 , Train PPL: 1.0072, Train Acc: 0.3582
Validation --- Epoch: 14, total loss: 295.2891 , PPL: 1.2735, Acc: 0.3834
lr = 4
Epoch: 15, Batch: 100, loss: 3.8884 , Train PPL: 1.0059, Train Acc: 0.4726
Epoch: 15, Batch: 200, loss: 4.1853 , Train PPL: 1.0064, Train Acc: 0.4436
Epoch: 15, Batch: 300, loss: 4.6337 , Train PPL: 1.0071, Train Acc: 0.3643
Epoch: 15, Batch: 400, loss: 4.5683 , Train PPL: 1.0070, Train Acc: 0.3674
Epoch: 15, Batch: 500, loss: 4.8975 , Train PPL: 1.0075, Train Acc: 0.2973
Epoch: 15, Batch: 600, loss: 4.9057 , Train PPL: 1.0075, Train Acc: 0.3064
Epoch: 15, Batch: 700, loss: 4.7426 , Train PPL: 1.0073, Train Acc: 0.3780
Validation --- Epoch: 15, total loss: 296.3316 , PPL: 1.2749, Acc: 0.3774
lr = 4
Epoch: 16, Batch: 100, loss: 4.2540 , Train PPL: 1.0065, Train Acc: 0.4009
Epoch: 16, Ba

Epoch: 28, Batch: 100, loss: 3.9145 , Train PPL: 1.0060, Train Acc: 0.4253
Epoch: 28, Batch: 200, loss: 3.8448 , Train PPL: 1.0059, Train Acc: 0.4649
Epoch: 28, Batch: 300, loss: 3.8934 , Train PPL: 1.0060, Train Acc: 0.4284
Epoch: 28, Batch: 400, loss: 3.9923 , Train PPL: 1.0061, Train Acc: 0.3979
Epoch: 28, Batch: 500, loss: 3.5158 , Train PPL: 1.0054, Train Acc: 0.5655
Epoch: 28, Batch: 600, loss: 3.3992 , Train PPL: 1.0052, Train Acc: 0.5091
Epoch: 28, Batch: 700, loss: 3.8541 , Train PPL: 1.0059, Train Acc: 0.4085
Validation --- Epoch: 28, total loss: 299.7286 , PPL: 1.2789, Acc: 0.4059
lr = 1.0
Epoch: 29, Batch: 100, loss: 3.2703 , Train PPL: 1.0050, Train Acc: 0.5503
Epoch: 29, Batch: 200, loss: 3.4933 , Train PPL: 1.0053, Train Acc: 0.4421
Epoch: 29, Batch: 300, loss: 3.9117 , Train PPL: 1.0060, Train Acc: 0.4146
Epoch: 29, Batch: 400, loss: 3.5925 , Train PPL: 1.0055, Train Acc: 0.5076
Epoch: 29, Batch: 500, loss: 3.9342 , Train PPL: 1.0060, Train Acc: 0.3994
Epoch: 29, Batch:

Epoch: 41, Batch: 500, loss: 3.5210 , Train PPL: 1.0054, Train Acc: 0.4482
Epoch: 41, Batch: 600, loss: 3.6976 , Train PPL: 1.0057, Train Acc: 0.4345
Epoch: 41, Batch: 700, loss: 3.8223 , Train PPL: 1.0058, Train Acc: 0.4131
Validation --- Epoch: 41, total loss: 303.6558 , PPL: 1.2833, Acc: 0.4083
lr = 0.25
Epoch: 42, Batch: 100, loss: 3.4541 , Train PPL: 1.0053, Train Acc: 0.4741
Epoch: 42, Batch: 200, loss: 3.6088 , Train PPL: 1.0055, Train Acc: 0.4466
Epoch: 42, Batch: 300, loss: 3.6988 , Train PPL: 1.0057, Train Acc: 0.4299
Epoch: 42, Batch: 400, loss: 3.7122 , Train PPL: 1.0057, Train Acc: 0.4040
Epoch: 42, Batch: 500, loss: 3.6581 , Train PPL: 1.0056, Train Acc: 0.4619
Epoch: 42, Batch: 600, loss: 3.7213 , Train PPL: 1.0057, Train Acc: 0.4954
Epoch: 42, Batch: 700, loss: 3.8601 , Train PPL: 1.0059, Train Acc: 0.3948
Validation --- Epoch: 42, total loss: 303.8425 , PPL: 1.2835, Acc: 0.4088
lr = 0.25
Epoch: 43, Batch: 100, loss: 3.7242 , Train PPL: 1.0057, Train Acc: 0.4421
Epoch: 

Epoch: 55, Batch: 100, loss: 3.3802 , Train PPL: 1.0052, Train Acc: 0.5290
Epoch: 55, Batch: 200, loss: 3.5500 , Train PPL: 1.0054, Train Acc: 0.4284
Epoch: 55, Batch: 300, loss: 3.6853 , Train PPL: 1.0056, Train Acc: 0.4741
Epoch: 55, Batch: 400, loss: 3.5997 , Train PPL: 1.0055, Train Acc: 0.4024
Epoch: 55, Batch: 500, loss: 3.6116 , Train PPL: 1.0055, Train Acc: 0.4573
Epoch: 55, Batch: 600, loss: 3.4481 , Train PPL: 1.0053, Train Acc: 0.5152
Epoch: 55, Batch: 700, loss: 3.6963 , Train PPL: 1.0057, Train Acc: 0.4512
Validation --- Epoch: 55, total loss: 304.6096 , PPL: 1.2844, Acc: 0.4094
lr = 0.03125
Epoch: 56, Batch: 100, loss: 3.4541 , Train PPL: 1.0053, Train Acc: 0.5335
Epoch: 56, Batch: 200, loss: 3.5377 , Train PPL: 1.0054, Train Acc: 0.5152
Epoch: 56, Batch: 300, loss: 3.6184 , Train PPL: 1.0055, Train Acc: 0.4314
Epoch: 56, Batch: 400, loss: 3.4039 , Train PPL: 1.0052, Train Acc: 0.5335
Epoch: 56, Batch: 500, loss: 3.8714 , Train PPL: 1.0059, Train Acc: 0.3979
Epoch: 56, Ba

Epoch: 68, Batch: 500, loss: 3.6659 , Train PPL: 1.0056, Train Acc: 0.4024
Epoch: 68, Batch: 600, loss: 3.4049 , Train PPL: 1.0052, Train Acc: 0.4878
Epoch: 68, Batch: 700, loss: 3.3603 , Train PPL: 1.0051, Train Acc: 0.5564
Validation --- Epoch: 68, total loss: 304.8464 , PPL: 1.2846, Acc: 0.4086
lr = 0.0078125
Epoch: 69, Batch: 100, loss: 3.3471 , Train PPL: 1.0051, Train Acc: 0.5640
Epoch: 69, Batch: 200, loss: 3.8510 , Train PPL: 1.0059, Train Acc: 0.4101
Epoch: 69, Batch: 300, loss: 3.6821 , Train PPL: 1.0056, Train Acc: 0.4314
Epoch: 69, Batch: 400, loss: 3.8262 , Train PPL: 1.0058, Train Acc: 0.3841
Epoch: 69, Batch: 500, loss: 3.5242 , Train PPL: 1.0054, Train Acc: 0.4497
Epoch: 69, Batch: 600, loss: 3.2034 , Train PPL: 1.0049, Train Acc: 0.5610
Epoch: 69, Batch: 700, loss: 3.8129 , Train PPL: 1.0058, Train Acc: 0.3841
Validation --- Epoch: 69, total loss: 304.8475 , PPL: 1.2846, Acc: 0.4086
lr = 0.0078125
Epoch: 70, Batch: 100, loss: 3.5061 , Train PPL: 1.0054, Train Acc: 0.47

Validation --- Epoch: 81, total loss: 304.8944 , PPL: 1.2847, Acc: 0.4087
lr = 0.001953125
Epoch: 82, Batch: 100, loss: 3.7093 , Train PPL: 1.0057, Train Acc: 0.4665
Epoch: 82, Batch: 200, loss: 3.6441 , Train PPL: 1.0056, Train Acc: 0.4741
Epoch: 82, Batch: 300, loss: 3.5269 , Train PPL: 1.0054, Train Acc: 0.4482
Epoch: 82, Batch: 400, loss: 3.4404 , Train PPL: 1.0053, Train Acc: 0.5152
Epoch: 82, Batch: 500, loss: 3.9217 , Train PPL: 1.0060, Train Acc: 0.4345
Epoch: 82, Batch: 600, loss: 3.6755 , Train PPL: 1.0056, Train Acc: 0.4558
Epoch: 82, Batch: 700, loss: 3.3789 , Train PPL: 1.0052, Train Acc: 0.5061
Validation --- Epoch: 82, total loss: 304.8973 , PPL: 1.2847, Acc: 0.4087
lr = 0.001953125
Epoch: 83, Batch: 100, loss: 3.4672 , Train PPL: 1.0053, Train Acc: 0.4787
Epoch: 83, Batch: 200, loss: 3.6949 , Train PPL: 1.0056, Train Acc: 0.4497
Epoch: 83, Batch: 300, loss: 3.4228 , Train PPL: 1.0052, Train Acc: 0.5061
Epoch: 83, Batch: 400, loss: 3.5663 , Train PPL: 1.0055, Train Acc: 

Epoch: 95, Batch: 300, loss: 3.3859 , Train PPL: 1.0052, Train Acc: 0.5046
Epoch: 95, Batch: 400, loss: 3.5669 , Train PPL: 1.0055, Train Acc: 0.4634
Epoch: 95, Batch: 500, loss: 3.4019 , Train PPL: 1.0052, Train Acc: 0.5198
Epoch: 95, Batch: 600, loss: 3.6852 , Train PPL: 1.0056, Train Acc: 0.3857
Epoch: 95, Batch: 700, loss: 3.4447 , Train PPL: 1.0053, Train Acc: 0.4893
Validation --- Epoch: 95, total loss: 304.9020 , PPL: 1.2847, Acc: 0.4088
lr = 0.00048828125
Epoch: 96, Batch: 100, loss: 3.7878 , Train PPL: 1.0058, Train Acc: 0.4238
Epoch: 96, Batch: 200, loss: 3.3712 , Train PPL: 1.0052, Train Acc: 0.5290
Epoch: 96, Batch: 300, loss: 3.6095 , Train PPL: 1.0055, Train Acc: 0.4314
Epoch: 96, Batch: 400, loss: 3.5345 , Train PPL: 1.0054, Train Acc: 0.4512
Epoch: 96, Batch: 500, loss: 3.6079 , Train PPL: 1.0055, Train Acc: 0.4527
Epoch: 96, Batch: 600, loss: 3.4366 , Train PPL: 1.0053, Train Acc: 0.4756
Epoch: 96, Batch: 700, loss: 3.4851 , Train PPL: 1.0053, Train Acc: 0.4482
Validat

In [56]:
model = TCN(3, [2400,2400], kernel=3, dropout=0.45, embedding_size = 2400, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_3_layers_k3_2400_filters_2.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 93151601 parameters
Receptive field of network is 30
Epoch: 0, Batch: 100, loss: 6.4636 , Train PPL: 1.0099, Train Acc: 0.1753
Epoch: 0, Batch: 200, loss: 6.5999 , Train PPL: 1.0101, Train Acc: 0.1601
Epoch: 0, Batch: 300, loss: 6.2316 , Train PPL: 1.0095, Train Acc: 0.2088
Epoch: 0, Batch: 400, loss: 5.8660 , Train PPL: 1.0090, Train Acc: 0.2637
Epoch: 0, Batch: 500, loss: 6.1481 , Train PPL: 1.0094, Train Acc: 0.2439
Epoch: 0, Batch: 600, loss: 5.8979 , Train PPL: 1.0090, Train Acc: 0.2927
Epoch: 0, Batch: 700, loss: 5.4474 , Train PPL: 1.0083, Train Acc: 0.2973
Validation --- Epoch: 0, total loss: 328.2361 , PPL: 1.3071, Acc: 0.2930
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 6.0499 , Train PPL: 1.0093, Train Acc: 0.2713
Epoch: 1, Batch: 200, loss: 5.7102 , Train PPL: 1.0087, Train Acc: 0.2927
Epoch: 1, Batch: 300, loss: 5.7158 , Train PPL: 1.0088, Train Acc: 0.2881
Epoch: 1, Batch: 400, loss: 5.8043 , Train PPL: 1.0089, Train Acc: 0.2759
Epoch: 1, Batch: 500, loss: 5.5635 , Train PPL: 1.0085, Train Acc: 0.2851
Epoch: 1, Batch: 600, loss: 5.5518 , Train PPL: 1.0085, Train Acc: 0.3369
Epoch: 1, Batch: 700, loss: 5.5929 , Train PPL: 1.0086, Train Acc: 0.3110
Validation --- Epoch: 1, total loss: 314.9261 , PPL: 1.2929, Acc: 0.3331
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 5.6616 , Train PPL: 1.0087, Train Acc: 0.2988
Epoch: 2, Batch: 200, loss: 5.3535 , Train PPL: 1.0082, Train Acc: 0.3811
Epoch: 2, Batch: 300, loss: 5.5667 , Train PPL: 1.0085, Train Acc: 0.3110
Epoch: 2, Batch: 400, loss: 5.7474 , Train PPL: 1.0088, Train Acc: 0.2805
Epoch: 2, Batch: 500, loss: 5.5693 , Train PPL: 1.0085, Train Acc: 0.3308
Epoch: 2, Batch: 600

Epoch: 14, Batch: 500, loss: 4.4728 , Train PPL: 1.0068, Train Acc: 0.3384
Epoch: 14, Batch: 600, loss: 4.4490 , Train PPL: 1.0068, Train Acc: 0.3171
Epoch: 14, Batch: 700, loss: 4.7832 , Train PPL: 1.0073, Train Acc: 0.3186
Validation --- Epoch: 14, total loss: 298.4163 , PPL: 1.2771, Acc: 0.3917
lr = 4
Epoch: 15, Batch: 100, loss: 3.9747 , Train PPL: 1.0061, Train Acc: 0.4329
Epoch: 15, Batch: 200, loss: 3.8776 , Train PPL: 1.0059, Train Acc: 0.4207
Epoch: 15, Batch: 300, loss: 3.9523 , Train PPL: 1.0060, Train Acc: 0.4070
Epoch: 15, Batch: 400, loss: 4.1202 , Train PPL: 1.0063, Train Acc: 0.3841
Epoch: 15, Batch: 500, loss: 4.1186 , Train PPL: 1.0063, Train Acc: 0.4405
Epoch: 15, Batch: 600, loss: 4.3947 , Train PPL: 1.0067, Train Acc: 0.3552
Epoch: 15, Batch: 700, loss: 3.9131 , Train PPL: 1.0060, Train Acc: 0.4390
Validation --- Epoch: 15, total loss: 299.5661 , PPL: 1.2785, Acc: 0.3787
lr = 2.0
Epoch: 16, Batch: 100, loss: 3.6768 , Train PPL: 1.0056, Train Acc: 0.4695
Epoch: 16, 

Epoch: 28, Batch: 100, loss: 3.4143 , Train PPL: 1.0052, Train Acc: 0.4436
Epoch: 28, Batch: 200, loss: 3.1178 , Train PPL: 1.0048, Train Acc: 0.5534
Epoch: 28, Batch: 300, loss: 3.3090 , Train PPL: 1.0051, Train Acc: 0.4345
Epoch: 28, Batch: 400, loss: 3.1607 , Train PPL: 1.0048, Train Acc: 0.4771
Epoch: 28, Batch: 500, loss: 3.4021 , Train PPL: 1.0052, Train Acc: 0.4405
Epoch: 28, Batch: 600, loss: 3.6909 , Train PPL: 1.0056, Train Acc: 0.3796
Epoch: 28, Batch: 700, loss: 3.5678 , Train PPL: 1.0055, Train Acc: 0.4177
Validation --- Epoch: 28, total loss: 314.3120 , PPL: 1.2949, Acc: 0.4010
lr = 0.5
Epoch: 29, Batch: 100, loss: 3.1667 , Train PPL: 1.0048, Train Acc: 0.5000
Epoch: 29, Batch: 200, loss: 2.9772 , Train PPL: 1.0045, Train Acc: 0.4848
Epoch: 29, Batch: 300, loss: 3.3247 , Train PPL: 1.0051, Train Acc: 0.4665
Epoch: 29, Batch: 400, loss: 3.2969 , Train PPL: 1.0050, Train Acc: 0.4360
Epoch: 29, Batch: 500, loss: 3.1048 , Train PPL: 1.0047, Train Acc: 0.5000
Epoch: 29, Batch:

Epoch: 41, Batch: 500, loss: 2.9259 , Train PPL: 1.0045, Train Acc: 0.5625
Epoch: 41, Batch: 600, loss: 3.3131 , Train PPL: 1.0051, Train Acc: 0.4329
Epoch: 41, Batch: 700, loss: 3.2811 , Train PPL: 1.0050, Train Acc: 0.4497
Validation --- Epoch: 41, total loss: 319.3345 , PPL: 1.3004, Acc: 0.4017
lr = 0.125
Epoch: 42, Batch: 100, loss: 2.9616 , Train PPL: 1.0045, Train Acc: 0.5335
Epoch: 42, Batch: 200, loss: 2.7721 , Train PPL: 1.0042, Train Acc: 0.5732
Epoch: 42, Batch: 300, loss: 2.9854 , Train PPL: 1.0046, Train Acc: 0.5412
Epoch: 42, Batch: 400, loss: 3.2397 , Train PPL: 1.0050, Train Acc: 0.4604
Epoch: 42, Batch: 500, loss: 3.2985 , Train PPL: 1.0050, Train Acc: 0.4497
Epoch: 42, Batch: 600, loss: 3.2070 , Train PPL: 1.0049, Train Acc: 0.5046
Epoch: 42, Batch: 700, loss: 3.0748 , Train PPL: 1.0047, Train Acc: 0.5137
Validation --- Epoch: 42, total loss: 319.4344 , PPL: 1.3005, Acc: 0.4013
lr = 0.125
Epoch: 43, Batch: 100, loss: 3.1521 , Train PPL: 1.0048, Train Acc: 0.4878
Epoch

Epoch: 55, Batch: 100, loss: 3.0350 , Train PPL: 1.0046, Train Acc: 0.4970
Epoch: 55, Batch: 200, loss: 2.9461 , Train PPL: 1.0045, Train Acc: 0.5320
Epoch: 55, Batch: 300, loss: 3.0697 , Train PPL: 1.0047, Train Acc: 0.4924
Epoch: 55, Batch: 400, loss: 3.0480 , Train PPL: 1.0047, Train Acc: 0.5305
Epoch: 55, Batch: 500, loss: 3.1348 , Train PPL: 1.0048, Train Acc: 0.4527
Epoch: 55, Batch: 600, loss: 2.9566 , Train PPL: 1.0045, Train Acc: 0.5274
Epoch: 55, Batch: 700, loss: 2.9976 , Train PPL: 1.0046, Train Acc: 0.5015
Validation --- Epoch: 55, total loss: 320.7146 , PPL: 1.3020, Acc: 0.4012
lr = 0.03125
Epoch: 56, Batch: 100, loss: 3.1057 , Train PPL: 1.0047, Train Acc: 0.4939
Epoch: 56, Batch: 200, loss: 3.2583 , Train PPL: 1.0050, Train Acc: 0.5183
Epoch: 56, Batch: 300, loss: 3.0577 , Train PPL: 1.0047, Train Acc: 0.5442
Epoch: 56, Batch: 400, loss: 2.8961 , Train PPL: 1.0044, Train Acc: 0.5244
Epoch: 56, Batch: 500, loss: 3.1420 , Train PPL: 1.0048, Train Acc: 0.5229
Epoch: 56, Ba

Epoch: 68, Batch: 400, loss: 3.0297 , Train PPL: 1.0046, Train Acc: 0.4878
Epoch: 68, Batch: 500, loss: 3.2832 , Train PPL: 1.0050, Train Acc: 0.4390
Epoch: 68, Batch: 600, loss: 2.6709 , Train PPL: 1.0041, Train Acc: 0.6128
Epoch: 68, Batch: 700, loss: 3.1151 , Train PPL: 1.0048, Train Acc: 0.5091
Validation --- Epoch: 68, total loss: 320.9619 , PPL: 1.3023, Acc: 0.4009
lr = 0.0078125
Epoch: 69, Batch: 100, loss: 3.1044 , Train PPL: 1.0047, Train Acc: 0.5122
Epoch: 69, Batch: 200, loss: 2.9397 , Train PPL: 1.0045, Train Acc: 0.5381
Epoch: 69, Batch: 300, loss: 3.3394 , Train PPL: 1.0051, Train Acc: 0.4101
Epoch: 69, Batch: 400, loss: 3.1484 , Train PPL: 1.0048, Train Acc: 0.5320
Epoch: 69, Batch: 500, loss: 2.8130 , Train PPL: 1.0043, Train Acc: 0.5457
Epoch: 69, Batch: 600, loss: 3.0787 , Train PPL: 1.0047, Train Acc: 0.4588
Epoch: 69, Batch: 700, loss: 3.1702 , Train PPL: 1.0048, Train Acc: 0.4680
Validation --- Epoch: 69, total loss: 320.9609 , PPL: 1.3023, Acc: 0.4006
lr = 0.00390

Epoch: 81, Batch: 700, loss: 3.2402 , Train PPL: 1.0050, Train Acc: 0.4756
Validation --- Epoch: 81, total loss: 320.9893 , PPL: 1.3023, Acc: 0.4008
lr = 0.0009765625
Epoch: 82, Batch: 100, loss: 2.9307 , Train PPL: 1.0045, Train Acc: 0.5046
Epoch: 82, Batch: 200, loss: 3.1143 , Train PPL: 1.0048, Train Acc: 0.5290
Epoch: 82, Batch: 300, loss: 3.1432 , Train PPL: 1.0048, Train Acc: 0.4421
Epoch: 82, Batch: 400, loss: 2.6829 , Train PPL: 1.0041, Train Acc: 0.6189
Epoch: 82, Batch: 500, loss: 3.1873 , Train PPL: 1.0049, Train Acc: 0.4741
Epoch: 82, Batch: 600, loss: 3.0647 , Train PPL: 1.0047, Train Acc: 0.4802
Epoch: 82, Batch: 700, loss: 3.0204 , Train PPL: 1.0046, Train Acc: 0.5137
Validation --- Epoch: 82, total loss: 320.9922 , PPL: 1.3023, Acc: 0.4009
lr = 0.0009765625
Epoch: 83, Batch: 100, loss: 2.9823 , Train PPL: 1.0046, Train Acc: 0.5091
Epoch: 83, Batch: 200, loss: 3.0884 , Train PPL: 1.0047, Train Acc: 0.4939
Epoch: 83, Batch: 300, loss: 3.1238 , Train PPL: 1.0048, Train Acc

Epoch: 95, Batch: 100, loss: 3.1320 , Train PPL: 1.0048, Train Acc: 0.4863
Epoch: 95, Batch: 200, loss: 2.5634 , Train PPL: 1.0039, Train Acc: 0.6860
Epoch: 95, Batch: 300, loss: 2.9254 , Train PPL: 1.0045, Train Acc: 0.5396
Epoch: 95, Batch: 400, loss: 3.1410 , Train PPL: 1.0048, Train Acc: 0.5122
Epoch: 95, Batch: 500, loss: 2.5596 , Train PPL: 1.0039, Train Acc: 0.6707
Epoch: 95, Batch: 600, loss: 3.2438 , Train PPL: 1.0050, Train Acc: 0.4893
Epoch: 95, Batch: 700, loss: 3.0958 , Train PPL: 1.0047, Train Acc: 0.4558
Validation --- Epoch: 95, total loss: 321.0082 , PPL: 1.3023, Acc: 0.4009
lr = 0.000244140625
Epoch: 96, Batch: 100, loss: 3.2197 , Train PPL: 1.0049, Train Acc: 0.4375
Epoch: 96, Batch: 200, loss: 3.1035 , Train PPL: 1.0047, Train Acc: 0.5244
Epoch: 96, Batch: 300, loss: 2.8640 , Train PPL: 1.0044, Train Acc: 0.5244
Epoch: 96, Batch: 400, loss: 2.7068 , Train PPL: 1.0041, Train Acc: 0.6296
Epoch: 96, Batch: 500, loss: 3.1603 , Train PPL: 1.0048, Train Acc: 0.4893
Epoch:

In [237]:
model = TCN(5, [800,800,800,800], kernel=2, dropout=0.45, embedding_size = 800, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=3,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_5_layers_k2_800_filters_1.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 18263601 parameters
Receptive field of network is 64
Epoch: 0, Batch: 100, loss: 4419.4844 , Train PPL: 843.0439, Train Acc: 0.1220
Epoch: 0, Batch: 200, loss: 4249.6191 , Train PPL: 650.7190, Train Acc: 0.1204
Epoch: 0, Batch: 300, loss: 4265.4570 , Train PPL: 666.6206, Train Acc: 0.1418
Epoch: 0, Batch: 400, loss: 4035.4622 , Train PPL: 469.4768, Train Acc: 0.2226
Epoch: 0, Batch: 500, loss: 3761.5793 , Train PPL: 309.2391, Train Acc: 0.1997
Epoch: 0, Batch: 600, loss: 4059.3159 , Train PPL: 486.8624, Train Acc: 0.1829
Epoch: 0, Batch: 700, loss: 3935.5732 , Train PPL: 403.1665, Train Acc: 0.2134
Validation --- Epoch: 0, total loss: 219868.5312 , PPL: 364.3120, Acc: 0.2145
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 3790.4998 , Train PPL: 323.1772, Train Acc: 0.2332
Epoch: 1, Batch: 200, loss: 3739.6423 , Train PPL: 299.0689, Train Acc: 0.2424
Epoch: 1, Batch: 300, loss: 3611.4888 , Train PPL: 245.9967, Train Acc: 0.2973
Epoch: 1, Batch: 400, loss: 3780.1719 , Train PPL: 318.1290, Train Acc: 0.2561
Epoch: 1, Batch: 500, loss: 3595.8835 , Train PPL: 240.2139, Train Acc: 0.3064
Epoch: 1, Batch: 600, loss: 3591.4871 , Train PPL: 238.6094, Train Acc: 0.2896
Epoch: 1, Batch: 700, loss: 3566.2009 , Train PPL: 229.5869, Train Acc: 0.3095
Validation --- Epoch: 1, total loss: 207214.1875 , PPL: 259.6427, Acc: 0.3159
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 3694.0825 , Train PPL: 279.0032, Train Acc: 0.2942
Epoch: 2, Batch: 200, loss: 3721.0845 , Train PPL: 290.7270, Train Acc: 0.2759
Epoch: 2, Batch: 300, loss: 3708.8442 , Train PPL: 285.3528, Train Acc: 0.3338
Epoch: 2, Batch: 400, loss: 3678.4421 , Train PPL: 272.4299, Train Acc: 0.2942
Epoch: 2, Batch: 500, loss: 3628.8

Epoch: 13, Batch: 600, loss: 2995.9189 , Train PPL: 96.2501, Train Acc: 0.4024
Epoch: 13, Batch: 700, loss: 3053.4644 , Train PPL: 105.0747, Train Acc: 0.3933
Validation --- Epoch: 13, total loss: 188537.0469 , PPL: 159.6843, Acc: 0.3852
lr = 4
wrote model
Epoch: 14, Batch: 100, loss: 2988.4265 , Train PPL: 95.1570, Train Acc: 0.4131
Epoch: 14, Batch: 200, loss: 3045.3308 , Train PPL: 103.7799, Train Acc: 0.3750
Epoch: 14, Batch: 300, loss: 3252.9695 , Train PPL: 142.4221, Train Acc: 0.3354
Epoch: 14, Batch: 400, loss: 3052.5176 , Train PPL: 104.9231, Train Acc: 0.3735
Epoch: 14, Batch: 500, loss: 3059.5903 , Train PPL: 106.0605, Train Acc: 0.3552
Epoch: 14, Batch: 600, loss: 2695.0078 , Train PPL: 60.8398, Train Acc: 0.5168
Epoch: 14, Batch: 700, loss: 3157.0166 , Train PPL: 123.0419, Train Acc: 0.3720
Validation --- Epoch: 14, total loss: 188597.7656 , PPL: 159.7622, Acc: 0.3828
lr = 4
Epoch: 15, Batch: 100, loss: 3081.4651 , Train PPL: 109.6568, Train Acc: 0.3689
Epoch: 15, Batch: 2

Epoch: 26, Batch: 400, loss: 2724.0710 , Train PPL: 63.5958, Train Acc: 0.4985
Epoch: 26, Batch: 500, loss: 2641.0051 , Train PPL: 56.0320, Train Acc: 0.4192
Epoch: 26, Batch: 600, loss: 2830.8938 , Train PPL: 74.8426, Train Acc: 0.3750
Epoch: 26, Batch: 700, loss: 2869.4814 , Train PPL: 79.3771, Train Acc: 0.3841
Validation --- Epoch: 26, total loss: 187616.0156 , PPL: 156.9619, Acc: 0.4060
lr = 2.0
Epoch: 27, Batch: 100, loss: 2531.3167 , Train PPL: 47.4044, Train Acc: 0.4360
Epoch: 27, Batch: 200, loss: 2560.3345 , Train PPL: 49.5483, Train Acc: 0.4604
Epoch: 27, Batch: 300, loss: 2843.5845 , Train PPL: 76.3046, Train Acc: 0.3735
Epoch: 27, Batch: 400, loss: 2529.7615 , Train PPL: 47.2921, Train Acc: 0.4604
Epoch: 27, Batch: 500, loss: 2727.6643 , Train PPL: 63.9451, Train Acc: 0.4131
Epoch: 27, Batch: 600, loss: 2568.9380 , Train PPL: 50.2025, Train Acc: 0.4588
Epoch: 27, Batch: 700, loss: 2863.6379 , Train PPL: 78.6731, Train Acc: 0.3872
Validation --- Epoch: 27, total loss: 18798

KeyboardInterrupt: 

In [240]:
model = TCN(4, [300,300,300], kernel=3, dropout=0.5, embedding_size = 300, n_words = n_words,tied=True,embedding=word2vec)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_4_layers_k3_word2vec2.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 4633901 parameters
Receptive field of network is 62
Epoch: 0, Batch: 100, loss: 4461.0923 , Train PPL: 898.2474, Train Acc: 0.1067
Epoch: 0, Batch: 200, loss: 4086.4915 , Train PPL: 507.4549, Train Acc: 0.1570
Epoch: 0, Batch: 300, loss: 4237.2412 , Train PPL: 638.5557, Train Acc: 0.1479
Epoch: 0, Batch: 400, loss: 4115.2642 , Train PPL: 530.2076, Train Acc: 0.1723
Epoch: 0, Batch: 500, loss: 4034.4351 , Train PPL: 468.7425, Train Acc: 0.1814
Epoch: 0, Batch: 600, loss: 4090.6443 , Train PPL: 510.6775, Train Acc: 0.2165
Epoch: 0, Batch: 700, loss: 3893.9153 , Train PPL: 378.3601, Train Acc: 0.1966
Validation --- Epoch: 0, total loss: 224179.4688 , PPL: 408.1991, Acc: 0.2064
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 3836.0598 , Train PPL: 346.4200, Train Acc: 0.2348
Epoch: 1, Batch: 200, loss: 3854.4668 , Train PPL: 356.2780, Train Acc: 0.2530
Epoch: 1, Batch: 300, loss: 3693.9321 , Train PPL: 278.9393, Train Acc: 0.2500
Epoch: 1, Batch: 400, loss: 3780.3418 , Train PPL: 318.2115, Train Acc: 0.2348
Epoch: 1, Batch: 500, loss: 3718.5388 , Train PPL: 289.6010, Train Acc: 0.2820
Epoch: 1, Batch: 600, loss: 3890.6375 , Train PPL: 376.4742, Train Acc: 0.2058
Epoch: 1, Batch: 700, loss: 3811.1089 , Train PPL: 333.4914, Train Acc: 0.2713
Validation --- Epoch: 1, total loss: 211806.6562 , PPL: 293.5097, Acc: 0.2591
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 3879.3967 , Train PPL: 370.0782, Train Acc: 0.2271
Epoch: 2, Batch: 200, loss: 3864.2261 , Train PPL: 361.6181, Train Acc: 0.2683
Epoch: 2, Batch: 300, loss: 3796.9519 , Train PPL: 326.3715, Train Acc: 0.2759
Epoch: 2, Batch: 400, loss: 3658.0876 , Train PPL: 264.1067, Train Acc: 0.2652
Epoch: 2, Batch: 500, loss: 3688.3

Epoch: 13, Batch: 600, loss: 3310.7744 , Train PPL: 155.5414, Train Acc: 0.3186
Epoch: 13, Batch: 700, loss: 3248.7510 , Train PPL: 141.5092, Train Acc: 0.3613
Validation --- Epoch: 13, total loss: 193105.9844 , PPL: 179.1382, Acc: 0.3564
lr = 4
Epoch: 14, Batch: 100, loss: 3365.7588 , Train PPL: 169.1405, Train Acc: 0.3582
Epoch: 14, Batch: 200, loss: 3381.1064 , Train PPL: 173.1444, Train Acc: 0.3338
Epoch: 14, Batch: 300, loss: 3557.1633 , Train PPL: 226.4456, Train Acc: 0.2881
Epoch: 14, Batch: 400, loss: 3347.9709 , Train PPL: 164.6158, Train Acc: 0.3598
Epoch: 14, Batch: 500, loss: 3200.5295 , Train PPL: 131.4802, Train Acc: 0.4009
Epoch: 14, Batch: 600, loss: 3451.4070 , Train PPL: 192.7301, Train Acc: 0.2851
Epoch: 14, Batch: 700, loss: 3377.6763 , Train PPL: 172.2413, Train Acc: 0.3445
Validation --- Epoch: 14, total loss: 191680.3750 , PPL: 172.7063, Acc: 0.3673
lr = 4
wrote model
Epoch: 15, Batch: 100, loss: 3401.0010 , Train PPL: 178.4757, Train Acc: 0.3384
Epoch: 15, Batch

Epoch: 26, Batch: 300, loss: 3048.4851 , Train PPL: 104.2801, Train Acc: 0.3720
Epoch: 26, Batch: 400, loss: 3177.4333 , Train PPL: 126.9316, Train Acc: 0.3765
Epoch: 26, Batch: 500, loss: 3290.4629 , Train PPL: 150.7993, Train Acc: 0.3323
Epoch: 26, Batch: 600, loss: 3225.8179 , Train PPL: 136.6476, Train Acc: 0.3430
Epoch: 26, Batch: 700, loss: 3048.5591 , Train PPL: 104.2919, Train Acc: 0.3628
Validation --- Epoch: 26, total loss: 188585.1406 , PPL: 159.2173, Acc: 0.3762
lr = 4
wrote model
Epoch: 27, Batch: 100, loss: 3094.0969 , Train PPL: 111.7888, Train Acc: 0.3857
Epoch: 27, Batch: 200, loss: 3209.3210 , Train PPL: 133.2541, Train Acc: 0.3506
Epoch: 27, Batch: 300, loss: 3324.9995 , Train PPL: 158.9511, Train Acc: 0.3247
Epoch: 27, Batch: 400, loss: 3125.7617 , Train PPL: 117.3171, Train Acc: 0.3293
Epoch: 27, Batch: 500, loss: 3335.7366 , Train PPL: 161.5742, Train Acc: 0.3171
Epoch: 27, Batch: 600, loss: 3280.9246 , Train PPL: 148.6225, Train Acc: 0.3537
Epoch: 27, Batch: 700,

Validation --- Epoch: 38, total loss: 187938.6875 , PPL: 156.9186, Acc: 0.3768
lr = 4
Epoch: 39, Batch: 100, loss: 2965.6387 , Train PPL: 91.9082, Train Acc: 0.4238
Epoch: 39, Batch: 200, loss: 2951.9639 , Train PPL: 90.0122, Train Acc: 0.3537
Epoch: 39, Batch: 300, loss: 3151.8035 , Train PPL: 122.0680, Train Acc: 0.3308
Epoch: 39, Batch: 400, loss: 2840.8645 , Train PPL: 75.9888, Train Acc: 0.4238
Epoch: 39, Batch: 500, loss: 3066.6145 , Train PPL: 107.2023, Train Acc: 0.3613
Epoch: 39, Batch: 600, loss: 3164.7925 , Train PPL: 124.5091, Train Acc: 0.3125
Epoch: 39, Batch: 700, loss: 3039.6597 , Train PPL: 102.8866, Train Acc: 0.4070
Validation --- Epoch: 39, total loss: 187941.4844 , PPL: 157.1770, Acc: 0.3770
lr = 4
Epoch: 40, Batch: 100, loss: 3220.4998 , Train PPL: 135.5443, Train Acc: 0.3186
Epoch: 40, Batch: 200, loss: 3231.2261 , Train PPL: 137.7788, Train Acc: 0.2896
Epoch: 40, Batch: 300, loss: 3156.7534 , Train PPL: 122.9926, Train Acc: 0.3201
Epoch: 40, Batch: 400, loss: 29

Epoch: 51, Batch: 500, loss: 3116.3850 , Train PPL: 115.6522, Train Acc: 0.3354
Epoch: 51, Batch: 600, loss: 2902.3120 , Train PPL: 83.4507, Train Acc: 0.4116
Epoch: 51, Batch: 700, loss: 3118.7886 , Train PPL: 116.0767, Train Acc: 0.3720
Validation --- Epoch: 51, total loss: 185891.4062 , PPL: 148.9198, Acc: 0.3928
lr = 2.0
wrote model
Epoch: 52, Batch: 100, loss: 2862.0383 , Train PPL: 78.4815, Train Acc: 0.4238
Epoch: 52, Batch: 200, loss: 3088.3704 , Train PPL: 110.8171, Train Acc: 0.3841
Epoch: 52, Batch: 300, loss: 2855.5371 , Train PPL: 77.7076, Train Acc: 0.4390
Epoch: 52, Batch: 400, loss: 2963.5693 , Train PPL: 91.6188, Train Acc: 0.3841
Epoch: 52, Batch: 500, loss: 3086.4370 , Train PPL: 110.4911, Train Acc: 0.3415
Epoch: 52, Batch: 600, loss: 2761.7742 , Train PPL: 67.3580, Train Acc: 0.3979
Epoch: 52, Batch: 700, loss: 3069.9504 , Train PPL: 107.7488, Train Acc: 0.3537
Validation --- Epoch: 52, total loss: 185871.2188 , PPL: 148.7552, Acc: 0.3920
lr = 2.0
wrote model
Epoch

Epoch: 64, Batch: 300, loss: 2915.1150 , Train PPL: 85.0954, Train Acc: 0.3735
Epoch: 64, Batch: 400, loss: 2858.2717 , Train PPL: 78.0322, Train Acc: 0.3918
Epoch: 64, Batch: 500, loss: 3089.4714 , Train PPL: 111.0033, Train Acc: 0.3582
Epoch: 64, Batch: 600, loss: 2876.5271 , Train PPL: 80.2342, Train Acc: 0.4421
Epoch: 64, Batch: 700, loss: 2741.1584 , Train PPL: 65.2741, Train Acc: 0.4848
Validation --- Epoch: 64, total loss: 185719.1875 , PPL: 148.4956, Acc: 0.3958
lr = 1.0
wrote model
Epoch: 65, Batch: 100, loss: 2759.4048 , Train PPL: 67.1152, Train Acc: 0.4101
Epoch: 65, Batch: 200, loss: 3020.0657 , Train PPL: 99.8590, Train Acc: 0.3780
Epoch: 65, Batch: 300, loss: 2956.3022 , Train PPL: 90.6094, Train Acc: 0.3933
Epoch: 65, Batch: 400, loss: 3050.2869 , Train PPL: 104.5669, Train Acc: 0.3415
Epoch: 65, Batch: 500, loss: 2871.0701 , Train PPL: 79.5695, Train Acc: 0.3918
Epoch: 65, Batch: 600, loss: 3005.1355 , Train PPL: 97.6119, Train Acc: 0.3567
Epoch: 65, Batch: 700, loss: 

Epoch: 77, Batch: 100, loss: 2987.9290 , Train PPL: 95.0848, Train Acc: 0.3399
Epoch: 77, Batch: 200, loss: 2858.0259 , Train PPL: 78.0030, Train Acc: 0.3704
Epoch: 77, Batch: 300, loss: 2713.0852 , Train PPL: 62.5397, Train Acc: 0.4116
Epoch: 77, Batch: 400, loss: 2881.4680 , Train PPL: 80.8408, Train Acc: 0.3857
Epoch: 77, Batch: 500, loss: 2924.7361 , Train PPL: 86.3526, Train Acc: 0.3841
Epoch: 77, Batch: 600, loss: 2885.3259 , Train PPL: 81.3176, Train Acc: 0.3857
Epoch: 77, Batch: 700, loss: 3004.3293 , Train PPL: 97.4920, Train Acc: 0.3201
Validation --- Epoch: 77, total loss: 185720.8281 , PPL: 148.6611, Acc: 0.3984
lr = 0.5
Epoch: 78, Batch: 100, loss: 2919.7539 , Train PPL: 85.6993, Train Acc: 0.3780
Epoch: 78, Batch: 200, loss: 2702.3442 , Train PPL: 61.5240, Train Acc: 0.4726
Epoch: 78, Batch: 300, loss: 2907.2820 , Train PPL: 84.0854, Train Acc: 0.3902
Epoch: 78, Batch: 400, loss: 2775.6787 , Train PPL: 68.8009, Train Acc: 0.4512
Epoch: 78, Batch: 500, loss: 2835.5330 , Tr

Validation --- Epoch: 89, total loss: 185774.0625 , PPL: 148.9189, Acc: 0.4003
lr = 0.125
Epoch: 90, Batch: 100, loss: 2845.3801 , Train PPL: 76.5137, Train Acc: 0.3918
Epoch: 90, Batch: 200, loss: 2750.4065 , Train PPL: 66.2008, Train Acc: 0.4695
Epoch: 90, Batch: 300, loss: 2859.6965 , Train PPL: 78.2019, Train Acc: 0.3659
Epoch: 90, Batch: 400, loss: 2556.7190 , Train PPL: 49.2760, Train Acc: 0.4817
Epoch: 90, Batch: 500, loss: 2881.4463 , Train PPL: 80.8381, Train Acc: 0.3933
Epoch: 90, Batch: 600, loss: 2837.2773 , Train PPL: 75.5744, Train Acc: 0.3841
Epoch: 90, Batch: 700, loss: 2873.9700 , Train PPL: 79.9221, Train Acc: 0.3857
Validation --- Epoch: 90, total loss: 185741.6562 , PPL: 148.7882, Acc: 0.4006
lr = 0.125
Epoch: 91, Batch: 100, loss: 2595.6006 , Train PPL: 52.2849, Train Acc: 0.4832
Epoch: 91, Batch: 200, loss: 2795.4751 , Train PPL: 70.9088, Train Acc: 0.4040
Epoch: 91, Batch: 300, loss: 2899.3589 , Train PPL: 83.0759, Train Acc: 0.4024
Epoch: 91, Batch: 400, loss: 2

In [None]:
model = TCN(3, [2400,2400], kernel=3, dropout=0.5, embedding_size = 2400, n_words = n_words,tied=True)
model.cuda()
model_parameters = filter(lambda x: x.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(model.receptive_field))
criterion = torch.nn.CrossEntropyLoss(reduction='sum')

optimizer = torch.optim.SGD(model.parameters(), lr=4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=5,min_lr=1e-6,factor=0.5)




best_vloss = 1e8
for e in range(100):
    train_loop(e)
    validation_loss = validation_loop(e)
    scheduler.step(validation_loss)
    print('lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
    if validation_loss < best_vloss:
                with open("model_3_layers_k3_2400.pt", 'wb') as f:
                    print('wrote model')
                    torch.save(model, f)
                best_vloss = validation_loss

Network with 93151601 parameters
Receptive field of network is 30
Epoch: 0, Batch: 100, loss: 4604.0986 , Train PPL: 1117.0461, Train Acc: 0.0991
Epoch: 0, Batch: 200, loss: 4198.4863 , Train PPL: 601.9243, Train Acc: 0.1174
Epoch: 0, Batch: 300, loss: 3981.2520 , Train PPL: 432.2403, Train Acc: 0.1707
Epoch: 0, Batch: 400, loss: 3997.0073 , Train PPL: 442.7472, Train Acc: 0.2210
Epoch: 0, Batch: 500, loss: 4164.5488 , Train PPL: 571.5762, Train Acc: 0.1616
Epoch: 0, Batch: 600, loss: 3640.7839 , Train PPL: 257.2312, Train Acc: 0.2134
Epoch: 0, Batch: 700, loss: 4065.6482 , Train PPL: 491.5847, Train Acc: 0.2271
Validation --- Epoch: 0, total loss: 218778.5000 , PPL: 353.6627, Acc: 0.2563
lr = 4
wrote model


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Epoch: 1, Batch: 100, loss: 3923.4895 , Train PPL: 395.8079, Train Acc: 0.1570
Epoch: 1, Batch: 200, loss: 3828.5520 , Train PPL: 342.4780, Train Acc: 0.2378
Epoch: 1, Batch: 300, loss: 3724.7554 , Train PPL: 292.3585, Train Acc: 0.2988
Epoch: 1, Batch: 400, loss: 3885.8931 , Train PPL: 373.7613, Train Acc: 0.2561
Epoch: 1, Batch: 500, loss: 3825.0310 , Train PPL: 340.6447, Train Acc: 0.2561
Epoch: 1, Batch: 600, loss: 3814.1641 , Train PPL: 335.0482, Train Acc: 0.2713
Epoch: 1, Batch: 700, loss: 3827.1775 , Train PPL: 341.7612, Train Acc: 0.3049
Validation --- Epoch: 1, total loss: 208751.8906 , PPL: 271.0483, Acc: 0.2888
lr = 4
wrote model
Epoch: 2, Batch: 100, loss: 3656.9932 , Train PPL: 263.6664, Train Acc: 0.2927
Epoch: 2, Batch: 200, loss: 3628.8262 , Train PPL: 252.5849, Train Acc: 0.2790
Epoch: 2, Batch: 300, loss: 3458.2314 , Train PPL: 194.7456, Train Acc: 0.3476
Epoch: 2, Batch: 400, loss: 3678.3176 , Train PPL: 272.3782, Train Acc: 0.3323
Epoch: 2, Batch: 500, loss: 3649.2

## Ensemble validation loop

In [None]:
def test_ensemble(models,e=0):
    [m.eval() for m in models]
    
    batch_idx = 0
    ppl = []
    acc = 0
    total_loss = 0
    for batch in val_iter:
        X,y = get_batch(batch)
        prob = [m(X) for m in models]
        prob = torch.mean(torch.stack(prob,dim=3),dim=3)
        # skip some chars for loss
        skip = int(X.shape[1]/2)
        target = y[:, skip:].contiguous()
        output = prob[:, skip:,:].contiguous().transpose(1,2)
        loss = criterion(output, target).cpu().detach()
        total_loss += loss
        batch_idx +=1
        batch_size = X.shape[0]
        ppl.append(np.exp(loss / (batch_size * (seqlen-skip)))) # update
        acc += torch.sum(torch.argmax(prob.cpu().detach(),dim=2) == y.cpu().detach()).float() / torch.FloatTensor([batch_size*(seqlen-skip)])
    print('Validation --- Epoch: %d, total loss: %.4f , PPL: %.4f, Acc: %.4f' % (e, total_loss.cpu().detach(), np.mean(ppl), acc/batch_idx))
    return total_loss

In [None]:
models_store = ['model_4_layers_k3_word2vec2.pt','model_4_layers_k3_600_tied_3.pt','model_5_layers_k2_600_filters_1.pt']
models = []
for path in models_store:
    models.append(torch.load(path))
[m.cuda() for m in models]
test_ensemble(models,0)

make_predictions_for_kaggle_from_ensemble(models)