In [160]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Dirichlet

import re
import math
from collections import Counter
from tqdm import tqdm
from itertools import islice
import numpy as np


import torchtext
from torchtext.vocab import Vectors
from torchtext.data.iterator import BPTTIterator, Iterator
from torchtext.data import Batch, Dataset, Field
from torch.utils.data import DataLoader
from namedtensor import ntorch
from namedtensor.text import NamedField

In [194]:
def make_dataloader(train,shuffle=True):
    train_loader = BPTTIterator(train,50,32,device='cuda',repeat=False)
    text = []
    for i in train_loader:
        text.append(i.text)
    training_data = torch.cat(text[:-1],dim=1)
    train_data = torch.utils.data.TensorDataset(training_data.transpose(1,0))
    t_loader=DataLoader(train_data,batch_size=50,shuffle=shuffle)
    return t_loader

In [195]:
# Our input $x$
TEXT = Field()
# Data distributed with the assignment
train, val, test = torchtext.datasets.LanguageModelingDataset.splits(
    path=".", 
    train="train.txt", validation="valid.txt", test="valid.txt",text_field=TEXT)
TEXT.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))

train_loader = make_dataloader(train)
valid_loader = make_dataloader(val,False)
            

len(TEXT.vocab) 10001


In [198]:
### make sure its shuffling
for i in train_loader:
    print(i[0][0])
    break
for i in train_loader:
    print(i[0][0])
    break
    

tensor([ 716,   10, 4469,    5, 5325, 7368,    7,  957, 3906,    8,    2, 1552,
        8633, 4818,   98, 1484, 1254,    3,    2, 8311,   43,  403,    7,    0,
        2859, 4040,    8,    2,  358,   19,    7,  362], device='cuda:0')
tensor([1139,   14,   64,  846,    8,    0, 5873,    3,    2, 5941,  629, 1136,
         763,  379, 5581, 6470, 8560,  314,    4,    4,    3,    2, 2318,    5,
         581, 6482,  354,    6, 6860,  184,  329,   53], device='cuda:0')


In [11]:
LSM = nn.LogSoftmax(dim=1)
from torch.nn.utils import weight_norm
### adapted from https://github.com/locuslab/TCN/ ###
class Chomp1d(torch.nn.Module):
    '''Ensure causal convolutions by removing right most items'''
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size
    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()
    
    
class TC_block(torch.nn.Module):
    def __init__(self, n_in, n_out, kernel, stride, dilation, padding, dropout=0.5):
        super(TC_block, self).__init__()
        self.conv1 = weight_norm(torch.nn.Conv1d(n_in, n_out, kernel,stride=stride,
                                             padding=padding, dilation=dilation))
      
        self.conv2 = weight_norm(nn.Conv1d(n_out, n_out, kernel,stride=stride,
                                           padding=padding, dilation=dilation))
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
    
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
    
        self.chomp1 = Chomp1d(padding)
        self.chomp2 = Chomp1d(padding)
    
        self.block = torch.nn.Sequential(self.conv1,self.chomp1,self.relu1,self.dropout1,
                                          self.conv2,self.chomp2,self.relu2,self.dropout2)
        self.relu = nn.ReLU()
    
        if n_in != n_out:
            self.conv_re = nn.Conv1d(n_in,n_out,kernel_size=1,stride=1,padding=0)
    def forward(self, x):
        out = self.block(x)
        # skip connection
        if x.shape[1]!=out.shape[1]:
            x = self.conv_re(x)
        return self.relu(out + x)
      
      
      
class TCN(torch.nn.Module):
    def __init__(self, n_layers, n_filters, kernel=2, dropout=0.2, embedding_size = 600, n_words = 10001):  
        super(TCN, self).__init__()
        blocks = []
        self.embedding_size = embedding_size
        self.n_words = n_words
        self.embedding = nn.Embedding(self.n_words,self.embedding_size)
    
        self.n_filters = [self.embedding_size] + n_filters
    
        for i in range(1,n_layers):
            dilation = 2 ** i
            n_in = self.n_filters[i-1]
            n_out = self.n_filters[i]
            blocks.append(TC_block(n_in, n_out, kernel, stride=1, dilation=dilation, padding=(kernel-1) * dilation, dropout=0.2))
            
        self.network = nn.Sequential(*blocks)
        self.receptive_field = 1 + 2*(kernel-1)*(2 ** n_layers-1) + 1
        self.output_layer = nn.Linear(n_filters[-1], n_words)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        embed = self.embedding(x)
        hook = self.network(embed.transpose(1,2))
        return self.output_layer(hook.transpose(1,2)).transpose(1,2)

In [12]:
tc_net = TCN(3,[600,600,600])
model_parameters = filter(lambda p: p.requires_grad, tc_net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('Network with {} parameters'.format(params))
print('Receptive field of network is {}'.format(tc_net.receptive_field))

Network with 14896001 parameters
Receptive field of network is 16


In [44]:
tc_net.cuda()
optimizer = torch.optim.Adam(tc_net.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss(reduction='sum')
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",patience=4,min_lr=1e-6)

def training_loop(net,train_iter,optimizer,criterion,scheduler,e=0):
    net.train()
    for i,b in enumerate(train_iter):
        optimizer.zero_grad()
        data = torch.transpose(b.text.values, dim0=0, dim1=1)
        X = data[:,:-1]
        y = data[:,1:]
        prob = net(X)
        loss = criterion(prob,y)
        loss.backward()
        optimizer.step()
        if np.mod(i,500)==0:
            batch_size = X.shape[0]
            ppl = np.exp(loss.cpu().detach() / (batch_size * 31)) # update
            acc = torch.sum(torch.argmax(prob.cpu().detach(),dim=1) == y.cpu().detach()).float() / torch.FloatTensor([batch_size*31])
            print('Epoch: %d, Batch: %d, loss: %.4f , Train PPL: %.4f, Train Acc: %.4f' % (e, i, loss.cpu().detach(), ppl, acc))
            
def validation_loop(net,val_iter,criterion,scheduler,e=0):
    net.eval()
    acc_val = []
    ppl_val = []
    print('Running validation')
    for i,b in enumerate(val_iter):
        data = torch.transpose(b.text.values, dim0=0, dim1=1)
        X = data[:,:-1]
        y = data[:,1:]
        prob = net(X)
        #h0 = tuple(h.detach() for h in hidden)
        #h0 = [h.data for h in hidden]
        batch_size = X.shape[0]
        loss = criterion(prob,y)
        ppl_val.append(torch.exp(loss.cpu().detach() / (batch_size * 31))) # update
        acc_val.append(torch.sum(torch.argmax(prob.cpu().detach(),dim=1) == y.cpu().detach()).float() / torch.FloatTensor([batch_size*31]))
    scheduler.step(torch.mean(torch.stack(ppl_val)))
    print('Epoch: %d, Val PPL: %.4f, Val Acc: %.4f' % (e,torch.mean(torch.stack(ppl_val)), torch.mean(torch.stack(acc_val))))
    return ppl_val,acc_val
         

In [45]:
for e in range(1):
    training_loop(tc_net,train_iter,optimizer,criterion,scheduler,e)
    ppl_val,acc_val = validation_loop(tc_net,val_iter,criterion,scheduler,e)

Epoch: 0, Batch: 0, loss: 1315.7861 , Train PPL: 69.7189, Train Acc: 0.2581
Epoch: 0, Batch: 500, loss: 1551.8590 , Train PPL: 149.3058, Train Acc: 0.1194
Epoch: 0, Batch: 1000, loss: 1920.3643 , Train PPL: 490.1559, Train Acc: 0.1613
Epoch: 0, Batch: 1500, loss: 2353.6643 , Train PPL: 1983.1969, Train Acc: 0.1774
Epoch: 0, Batch: 2000, loss: 2449.4138 , Train PPL: 2700.8850, Train Acc: 0.1323
Epoch: 0, Batch: 2500, loss: 1842.5327 , Train PPL: 381.3258, Train Acc: 0.1613
Running validation
Epoch: 0, Val PPL: 33647610560512.0000, Val Acc: 0.1773


In [44]:
# load test set

import pandas as pd
    
def write_predictions(net,output_file):
    sentences = []
    for i, l in enumerate(open("input.txt"), 1):
        sentences.append(re.split(' ', l))
    # make predictions
    predictions = []

    for i in range(len(sentences)):
        s = torch.tensor([TEXT.vocab.stoi[j] for j in sentences[i]]).cuda()
        prob, hidden = net(torch.unsqueeze(s, 0))
        top_idx = torch.squeeze(torch.argsort(prob[:,-1,:], descending=True))[:20]
        l_  = [TEXT.vocab.itos[j] for j in top_idx]
        predictions.append(' '.join(l_))
      
    if i % 100 == 0:
        print(i, '/', len(sentences),end="\r",flush=True)
        
    out = pd.DataFrame(index=range(len(predictions)))
    out.index.names = ['id']
    out['word'] = predictions
    out.to_csv('predictions_model3.txt',sep=',')

In [54]:
train_loss = 0
train_words = 0

for b in iter(train_iter):
  data = torch.transpose(b.text.values, dim0=0, dim1=1)
  X = data[:,:-1]
  y = data[:,1:]
  prob = tc_net(X)
  train_loss += criterion(prob, y).detach()
  train_words += X.shape[0] * X.shape[1]
  
train_ppl = torch.exp(train_loss / train_words)
print(train_ppl)

tensor(283.3504, device='cuda:0')


In [97]:
train

<torchtext.datasets.language_modeling.LanguageModelingDataset at 0x7f393a406e48>

In [96]:
for i in dataloader:
    print(i.text.values)
    break

TypeError: Traceback (most recent call last):
  File "/opt/anaconda/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/anaconda/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 234, in default_collate
    raise TypeError((error_msg.format(type(batch[0]))))
TypeError: batch must contain tensors, numbers, dicts or lists; found <class 'torchtext.data.example.Example'>


IndexError: list index out of range