In [4]:
require 'nn'
require 'hdf5'

### LOADING THE DATA AND CONVERTING IT TO LOGREG FW:

In [5]:
myFile = hdf5.open('PTB.hdf5','r')

In [6]:
data = myFile:all()
myFile:close()

In [7]:
data

{
  train_output : LongTensor - size: 912666
  train_input_cap_windows : LongTensor - size: 912666x5
  test_input_word_windows : LongTensor - size: 129696x5
  valid_output : LongTensor - size: 131808
  valid_input_cap_windows : LongTensor - size: 131808x5
  nwords : IntTensor - size: 1
  test_input_cap_windows : LongTensor - size: 129696x5
  train_input_word_windows : LongTensor - size: 912666x5
  nclasses : IntTensor - size: 1
  word_embeddings : DoubleTensor - size: 400002x50
  valid_input_word_windows : LongTensor - size: 131808x5
}


In [8]:
train_input_word_windows = data['train_input_word_windows']
train_output = data['train_output']

In [9]:
train = train_input_word_windows:clone()

In [10]:
train_input_cap_windows = data['train_input_cap_windows']
train_cap = train_input_cap_windows:clone()
for j = 1, 5 do
    train:narrow(2,j,1):add((j-1)*100002)
end
for j = 1, 5 do
    train_cap:narrow(2,j,1):add((j-1)*4)
end

### TRAINING:

In [12]:
linreg = nn.Sequential()

In [13]:
par = nn.ParallelTable()

In [14]:
par:add(nn.LookupTable(5*data['nwords'][1],data['nclasses'][1])) -- first child
par:add(nn.LookupTable(5*4,data['nclasses'][1])) 

In [15]:
linreg_wc = nn.Sequential()
linreg_wc:add(par)
linreg_wc:add(nn.CAddTable())
linreg_wc:add(nn.Sum(2))
linreg_wc:add(nn.Add(45))
linreg_wc:add(nn.LogSoftMax())

In [20]:
-- Sanity check:
linreg_wc:forward({train:narrow(1,5, 10),train_cap:narrow(1,5, 10)})

Columns 1 to 8
 -6.0055  -4.9296  -8.7408  -2.8092 -10.7994  -8.2520  -7.7948  -6.6132
 -7.7342  -3.4312  -7.1430  -5.7468  -7.7104  -7.7696 -11.3573  -5.9405
-13.8382  -5.4248 -11.5133  -4.5599 -10.2738  -4.1189 -10.6515  -8.4949
-13.3956  -0.1216 -13.5374  -8.2611  -5.6333  -4.8360 -14.7254  -9.1043
 -8.6703  -3.8077 -11.0128  -6.3796  -9.4844  -4.4842 -10.9414 -10.0560
 -9.0265  -5.9098 -11.6009 -10.0960  -5.7907  -4.2499 -15.0949  -7.6290
 -6.3499  -0.4661  -6.2313  -3.3386 -10.1042  -3.7913 -10.3408 -10.5862
-15.0915  -5.8688 -13.8706  -6.0483  -7.4689  -7.3352  -8.8640 -13.4190
 -8.2211  -6.3168 -11.6299  -8.0336 -10.3626  -9.8184  -8.0784 -10.2295
-12.9501 -10.7326 -10.4127  -6.1787  -8.7891  -8.6312 -11.1573  -9.0545

Columns 9 to 16
 -8.3375  -3.5441  -6.6724 -12.0109  -6.4086  -2.4705 -11.8305  -3.0104
-12.6474  -0.5909 -12.2291  -3.7573  -6.0375 -11.3018  -9.1724  -7.3335
-13.9362  -0.1635 -12.1281  -8.6869  -4.3848  -9.9905 -10.2757  -7.1345
-12.6207  -3.8384 -11.3386  -6.5

809  -8.6053
-11.7348 -13.6495  -5.9996 -12.5039 -13.4178
[torch.DoubleTensor of size 10x45]



In [None]:
-- linreg_w:add(nn.LookupTable(5*data['nwords'][1],data['nclasses'][1]))
-- linreg_w:add(nn.Sum(2))
-- linreg_w:add(nn.Add(data['nclasses'][1]))
-- linreg_w:add(nn.LogSoftMax())

In [16]:
criterion = nn.ClassNLLCriterion()

In [18]:
 train:size(1)/60/500

30.4222	


In [22]:
eta = 0.01
max_e = 20
input_w = torch.Tensor(60,5)
input_c = torch.Tensor(60,5)
output = torch.Tensor(60)
preds = torch.Tensor(60,45)
loss_tensor = torch.Tensor(15*max_e)

In [44]:
for i = 1,max_e do
    print("EPOCH: "..i)
    k = 1
    --28520
    for j = 1,15211 do
        linreg_wc:zeroGradParameters()
        
        input_w:copy(train:narrow(1, (j-1)*60+1, 60))
        input_c:copy(train_cap:narrow(1, (j-1)*60+1, 60))
        preds:copy(linreg_wc:forward({input_w,input_c}))
        
        output:copy(train_output:narrow(1,(j-1)*60+1, 60))
        
        loss = criterion:forward(preds, output)
        
        if j % 1000 == 0 then
            print(j)
            loss_tensor[k] = loss
            k = k + 1
        end
        
        dLdpreds = criterion:backward(preds, output)
        
        linreg_wc:backward({input_w,input_c}, dLdpreds)
        
        linreg_wc:updateParameters(eta)
        
    end
    
end

EPOCH: 1	


1000	


2000	


3000	


4000	


5000	


6000	


7000	


8000	


9000	


10000	


11000	


12000	


13000	


14000	


15000	


EPOCH: 2	


1000	


2000	


3000	


4000	


5000	


6000	


7000	


8000	


9000	


10000	


11000	


12000	


13000	


14000	


15000	


EPOCH: 3	


1000	


2000	


3000	


4000	


5000	


6000	


7000	


8000	


9000	


10000	


11000	


12000	


13000	


14000	


15000	


EPOCH: 4	


1000	


2000	


3000	


4000	


5000	


6000	


7000	


8000	


9000	


10000	


11000	


12000	


13000	


14000	


15000	


EPOCH: 5	


1000	


2000	


3000	


4000	


5000	


6000	


7000	


8000	


9000	


10000	


11000	


12000	


13000	


14000	


15000	


### TRAINING ACCU: 

In [27]:
-- train_acc = torch.zeros(4)

In [54]:
preds_train = linreg_wc:forward({train,train_cap})

In [55]:
m,a = preds_train:max(2)

In [56]:
acc = 0
for i = 1, train:size(1) do
    if a[i][1] == train_output[i] then
        acc = acc + 1
    end
end
print("Accuracy on train is: "..acc/train:size(1))

Accuracy on train is: 0.70046216249975	


In [57]:
train_acc[4] = acc/train:size(1)

### VALIDATION:

In [33]:
-- val_acc = torch.zeros(4)

In [29]:
val_word = data['valid_input_word_windows']:clone()
val_cap = data['valid_input_cap_windows']:clone()

for j = 1, 5 do
    val_word:narrow(2,j,1):add((j-1)*100002)
end

for j = 1, 5 do
    val_cap:narrow(2,j,1):add((j-1)*4)
end

In [58]:
pred_val = linreg_wc:forward({val_word,val_cap})

In [59]:
m_v,a_v = pred_val:max(2)

In [60]:
acc_v = 0
for i = 1, data['valid_output']:size(1) do
    if a_v[i][1] == data['valid_output'][i] then
        acc_v = acc_v + 1
    end
end
print("Accuracy on validation is: "..acc_v/val_word:size(1))

Accuracy on validation is: 0.70328052925467	


In [62]:
val_acc[4] = acc_v/val_word:size(1)

## Neural Network:

In [49]:
hp1 = 50

In [78]:
nn1 = nn.Sequential()

In [79]:
par2 = nn.ParallelTable()
par2:add(nn.LookupTable(data['nwords'][1],hp1)) -- first child
par2:add(nn.LookupTable(4,hp1)) 

In [80]:
nn1:add(par2)
nn1:add(nn.CAddTable())
nn1:add(nn.Sum(2))
nn1:add(nn.Add(hp1))
nn1:add(nn.HardTanh())
nn1:add(nn.Linear(hp1,45))

In [75]:
criterion2 = nn.ClassNLLCriterion()

In [76]:
timer = torch.Timer()

eta = 0.01
max_e = 1
hp1 = 50
batchsize = 100
input_w = torch.Tensor(100,5)
input_c = torch.Tensor(100,5)
output = torch.Tensor(100)

loss_tensor = torch.Tensor(18*max_e)

k = 1

for i = 1,max_e do
    print("EPOCH: "..i)
    
    for j = 1,torch.floor(train:size(1)/batchsize) do
        nn1:zeroGradParameters()
        
        input_w = train_input_word_windows:narrow(1, (j-1)*batchsize+1, batchsize)
        input_c = train_input_cap_windows:narrow(1, (j-1)*batchsize+1, batchsize)
        preds = nn1:forward({input_w,input_c})
        
        output = train_output:narrow(1,(j-1)*batchsize+1, batchsize)
        
        loss = criterion2:forward(preds, output)
        
        if j % 500 == 0 then
            loss_tensor[k] = loss
            k = k + 1
        end
        
        dLdpreds = criterion2:backward(preds, output)
        
        nn1:backward({input_w,input_c}, dLdpreds)
        
        nn1:updateParameters(eta)
        
    end
    
end

print('Time elapsed for 1 epoch: ' .. timer:time().real .. ' seconds')

EPOCH: 1	


Time elapsed for 1 epoch: 104.36425614357 seconds	


In [77]:
loss_tensor

 -10.4673
 -20.1917
 -40.5038
 -55.5550
 -67.3850
 -94.7285
-116.4675
-132.2956
-146.8417
-142.9090
-188.5284
-211.0211
-257.5870
-231.1713
-217.2197
-257.8606
-309.2840
-330.2755
[torch.DoubleTensor of size 18]

