In [1]:
import sys
sys.path.append('..')

import torch
import numpy as np
import hashlib
import torch.nn as nn
import nltk
import torch.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups

from hashembed.embedding import HashEmbedding
from evaluate.data_loader import *
from evaluate.helpers import *

In [30]:
# constants
use_hash_embeddings = True
embedding_size = 20
num_buckets = 10**4
max_words = 10**5
max_epochs = 2
num_hash_functions = 2
hidden = 50 
seed = 3
batchSize = 32 # default in keras
size_phrase = 150
isMasking = True

np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x110a2f2b0>

In [31]:
trainAg = AgNews(maxWord=max_words,maxLength=size_phrase,train=True, hash_function='sha1')
testAg = AgNews(maxWord=max_words,maxLength=size_phrase,train=False, hash_function='sha1')

In [32]:
trainIter = DataLoader(dataset=trainAg,batch_size=batchSize,shuffle=False)
testIter = DataLoader(dataset=testAg,batch_size=batchSize,shuffle=False)

In [33]:
num_classes = len(trainAg.classes)

In [34]:
def ReduceSum(x):
    return torch.sum(x,dim=1)

def param_from_np(array, requires_grad=True, astype=torch.FloatTensor):
    return torch.nn.Parameter(torch.from_numpy(array).type(astype),requires_grad=requires_grad)

In [35]:
class ModelSimple(nn.Module):
    def __init__(self,max_words,embedding_size,num_classes,hidden=hidden,seed=3,isMasking=isMasking,isHash=False,**kwargs):
        np.random.seed(seed)
        torch.manual_seed(seed)
        super().__init__()
        
        self.padding_idx = 0 if isMasking else None
        self.isHash = isHash
        if self.isHash:
            self.embedding = HashEmbedding(max_words,embedding_size,mask_zero=isMasking,
                                           num_buckets=num_buckets,seed=seed,**kwargs)
        else:
            self.embedding = nn.Embedding(max_words,embedding_size,padding_idx=self.padding_idx)
        self.reduce = ReduceSum
        
        self.output_dim = self.embedding.output_dim if isHash else embedding_size
        self.fc1 = nn.Linear(self.output_dim, num_classes)
        
        self.reset_parameters()
        
    def reset_parameters(self):
        # Unfortunately has to set weight to 0 even when padding_idx =0
        if not self.isHash:
            init=np.random.normal(scale=0.05,size=self.embedding.weight.shape)
            if self.padding_idx is not None:
                init[0,:] = 0
            self.embedding.weight = param_from_np(init)
        
        self.fc1.weight = param_from_np(np.random.normal(scale=0.05,size=self.fc1.weight.shape))
        self.fc1.bias = param_from_np(np.zeros(self.fc1.bias.shape))
        
    def forward(self,x):
        #print()
        #print(x.data.shape)
        x = self.embedding(x)
        #print(x.data.shape)
        x = self.reduce(x) 
        #print(x.data.shape)
        #x = x.view(x.size(0),-1)
        #print(x.data.shape)
        x = self.fc1(x)
        #print(x.data.shape)
        return x

In [28]:
%%time 

np.random.seed(seed)
torch.manual_seed(seed)

model = ModelSimple(max_words,embedding_size,num_classes,isHash=True,append_weight=False)
criterion=nn.CrossEntropyLoss() # Cross Entropy also computes softmax  !!!
optimizer=torch.optim.Adam(model.parameters())

i=0
print('Num parameters in model: {}'.format(sum([np.prod(p.shape) for p in model.parameters()])))
print("Train on {} samples, validate on {} samples".format(len(trainIter.dataset),len(testIter.dataset)))
for epoch in range(max_epochs):
    for x,y in trainIter:
        x = Variable(x)
        y = Variable(y).squeeze(1)

        optimizer.zero_grad() # Reset gradients
        outputs = model(x)
        loss = criterion(outputs,y)
        loss.backward()
        optimizer.step()
        
        i += 1
        if i % 500 == 0:
            total, correct = 0,0
            for xTest,yTest in testIter:
                yTest = yTest.squeeze(1)
                xTest = Variable(xTest)
                outputs = model(xTest)
                _,predicted=torch.max(outputs.data,1)
                correct += (predicted.cpu() == yTest.cpu()).sum()
                total += yTest.size(0)
            accuracy = correct / total
            print("Iter: {}. Loss: {}. Acc: {}.".format(i,loss.data[0],accuracy))
        
print([p.shape for p in model.parameters()])

Num parameters in model: 400084
Train on 120000 samples, validate on 7600 samples
Iter: 500. Loss: 0.4013097584247589. Acc: 0.855.
Iter: 1000. Loss: 0.2846349775791168. Acc: 0.8814473684210526.
Iter: 1500. Loss: 0.1797334849834442. Acc: 0.8885526315789474.
Iter: 2000. Loss: 0.7115187644958496. Acc: 0.8918421052631579.
Iter: 2500. Loss: 0.3064766228199005. Acc: 0.9025.
Iter: 3000. Loss: 0.23164613544940948. Acc: 0.9069736842105263.
Iter: 3500. Loss: 0.18615978956222534. Acc: 0.9101315789473684.
Iter: 4000. Loss: 0.3013129234313965. Acc: 0.9132894736842105.
Iter: 4500. Loss: 0.2002478837966919. Acc: 0.9126315789473685.
Iter: 5000. Loss: 0.20568497478961945. Acc: 0.9139473684210526.
Iter: 5500. Loss: 0.1013067290186882. Acc: 0.9097368421052632.
Iter: 6000. Loss: 0.27220240235328674. Acc: 0.9084210526315789.
Iter: 6500. Loss: 0.30281955003738403. Acc: 0.9107894736842105.
Iter: 7000. Loss: 0.18671970069408417. Acc: 0.91.
Iter: 7500. Loss: 0.06923487782478333. Acc: 0.9123684210526316.
[torch

In [36]:
%%time 

np.random.seed(seed)
torch.manual_seed(seed)

model = ModelSimple(max_words,embedding_size,num_classes,isHash=False,append_weight=False)
criterion=nn.CrossEntropyLoss() # Cross Entropy also computes softmax  !!!
optimizer=torch.optim.Adam(model.parameters())

i=0
print('Num parameters in model: {}'.format(sum([np.prod(p.shape) for p in model.parameters()])))
print("Train on {} samples, validate on {} samples".format(len(trainIter.dataset),len(testIter.dataset)))
for epoch in range(max_epochs):
    for x,y in trainIter:
        x = Variable(x)
        y = Variable(y).squeeze(1)

        optimizer.zero_grad() # Reset gradients
        outputs = model(x)
        loss = criterion(outputs,y)
        loss.backward()
        optimizer.step()
        
        i += 1
        if i % 500 == 0:
            total, correct = 0,0
            for xTest,yTest in testIter:
                yTest = yTest.squeeze(1)
                xTest = Variable(xTest)
                outputs = model(xTest)
                _,predicted=torch.max(outputs.data,1)
                correct += (predicted.cpu() == yTest.cpu()).sum()
                total += yTest.size(0)
            accuracy = correct / total
            print("Iter: {}. Loss: {}. Acc: {}.".format(i,loss.data[0],accuracy))
        
print([p.shape for p in model.parameters()])

Num parameters in model: 400084
Train on 120000 samples, validate on 7600 samples
Iter: 500. Loss: 0.3609367907047272. Acc: 0.8542105263157894.
Iter: 1000. Loss: 0.30413705110549927. Acc: 0.8809210526315789.
Iter: 1500. Loss: 0.19680339097976685. Acc: 0.8894736842105263.
Iter: 2000. Loss: 0.7089694738388062. Acc: 0.8928947368421053.
Iter: 2500. Loss: 0.2150319367647171. Acc: 0.8988157894736842.
Iter: 3000. Loss: 0.2716756761074066. Acc: 0.9076315789473685.
Iter: 3500. Loss: 0.21767686307430267. Acc: 0.9073684210526316.
Iter: 4000. Loss: 0.25736936926841736. Acc: 0.9072368421052631.
Iter: 4500. Loss: 0.21957768499851227. Acc: 0.9090789473684211.
Iter: 5000. Loss: 0.19243374466896057. Acc: 0.9042105263157895.
Iter: 5500. Loss: 0.06525357067584991. Acc: 0.9036842105263158.
Iter: 6000. Loss: 0.33062615990638733. Acc: 0.9003947368421052.
Iter: 6500. Loss: 0.20704275369644165. Acc: 0.9069736842105263.
Iter: 7000. Loss: 0.2019444853067398. Acc: 0.9044736842105263.
Iter: 7500. Loss: 0.07641279