In [1]:
require 'nn'
require 'hdf5'

local Squeeze, parent = torch.class('nn.Squeeze', 'nn.Module')

function Squeeze:updateOutput(input)
    self.size = input:size()
    self.output = input:squeeze()
    return self.output
end

function Squeeze:updateGradInput(input, gradOutput)
  self.gradInput = gradOutput:view(self.size)
  return self.gradInput  
end

myFile = hdf5.open('PTB.hdf5','r')
data = myFile:all()
myFile:close()

In [34]:
train_input_word_windows = data['train_input_word_windows']
train_input_cap_windows = data['train_input_cap_windows']

train = train_input_word_windows:clone()
train_cap = train_input_cap_windows:clone()
train_output = data['train_output']

In [5]:
dim_hidden = 50
dim_hidden2 = 50
--Define the module
neuralnet = nn.Sequential()

--Include the lookup tables
neuralnet:add(nn.LookupTable(data['nwords'][1],dim_hidden))
neuralnet:add(nn.View(1,-1,5*50))
neuralnet:add(nn.Squeeze()) -- this layer is to go from a 1xAxB tensor to AxB dimensional tensor (https://groups.google.com/forum/#!topic/torch7/u4OEc0GB74k)
neuralnet:add(nn.Linear(5*dim_hidden,dim_hidden2))
neuralnet:add(nn.HardTanh())
neuralnet:add(nn.Linear(dim_hidden2, data['nclasses'][1]))
neuralnet:add(nn.LogSoftMax())

In [6]:
dataset={};
for i=1,train:size(1) do 
  dataset[i] = {train[i]:view(1,5), train_output[i]}
end
function dataset:size() return train:size(1) end -- 100 examples

In [7]:
criterion = nn.ClassNLLCriterion()

In [14]:
timer = torch.Timer()

trainer = nn.StochasticGradient(neuralnet, criterion)
trainer.learningRate = 0.001
trainer.maxIteration = 10
trainer:train(dataset)

print('Time elapsed: ' .. timer:time().real .. ' seconds',"\n")


# StochasticGradient: training	


# current error = 0.25574613654007	


# current error = 0.2501719415205	


# current error = 0.24487123168396	


# current error = 0.23992389741266	


# current error = 0.23531232038182	


# current error = 0.23095860899926	


# current error = 0.22691767798042	


# current error = 0.22318743010994	


# current error = 0.21957943355692	


# current error = 0.21625220273859	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 0.21625220273859	
Time elapsed: 1561.8227100372 seconds	
	


In [15]:

val_word = data['valid_input_word_windows']:clone()
val_output = data['valid_output']:clone()

pred_train = neuralnet:forward(train)
max,argmax_train = pred_train:max(2)
acc_train = 0
for i = 1, train_output:size(1) do
    if argmax_train[i][1] == train_output[i] then
        acc_train = acc_train + 1
    end
end
score_train = acc_train/train_output:size(1)
print('Validation Score on Train is '..score_train)

Validation Score on Train is 0.93374246438456	


In [16]:
pred_val = neuralnet:forward(val_word)
max,argmax_val = pred_val:max(2)
acc_val = 0
for i = 1, data['valid_output']:size(1) do
    if argmax_val[i][1] == data['valid_output'][i] then
        acc_val = acc_val + 1
    end
end
score_val = acc_val/data['valid_output']:size(1)
print('Validation Score on Validation is '..score_val)

Validation Score on Validation is 0.91404163631949	


## Adding the cap:

In [35]:
train_new = train_input_word_windows:clone()
train_cap_new = train_cap:clone()
train_cap_new:add(100002)

In [36]:
train_new = torch.cat(train_new,train_cap_new,2)

In [37]:
dim_hidden = 50
dim_hidden2 = 50
--Define the module
neuralnet_wc = nn.Sequential()

--Include the lookup tables
neuralnet_wc:add(nn.LookupTable(data['nwords'][1]+4,dim_hidden))
neuralnet_wc:add(nn.View(1,-1,10*dim_hidden))
neuralnet_wc:add(nn.Squeeze()) -- this layer is to go from a 1xAxB tensor to AxB dimensional tensor (https://groups.google.com/forum/#!topic/torch7/u4OEc0GB74k)
neuralnet_wc:add(nn.Linear(10*dim_hidden,dim_hidden2))
neuralnet_wc:add(nn.HardTanh())
neuralnet_wc:add(nn.Linear(dim_hidden2, data['nclasses'][1]))
neuralnet_wc:add(nn.LogSoftMax())

In [38]:
dataset2={};
for i=1,train:size(1) do 
  dataset2[i] = {train_new[i]:view(1,10), train_output[i]}
end
function dataset2:size() return train:size(1) end -- 100 examples

In [39]:
trainer2 = nn.StochasticGradient(neuralnet_wc, criterion)
trainer2.learningRate = 0.01
trainer2.maxIteration = 3
trainer2:train(dataset2)

# StochasticGradient: training	


...asdrizard/torch/install/share/lua/5.1/nn/LookupTable.lua:56: index out of range at /Users/nicolasdrizard/torch/pkg/torch/lib/TH/generic/THTensorMath.c:141
stack traceback:
	[C]: in function 'index'
	...asdrizard/torch/install/share/lua/5.1/nn/LookupTable.lua:56: in function 'updateOutput'
	...lasdrizard/torch/install/share/lua/5.1/nn/Sequential.lua:44: in function 'forward'
	...rd/torch/install/share/lua/5.1/nn/StochasticGradient.lua:35: in function 'train'
	[string "trainer2 = nn.StochasticGradient(neuralnet_wc..."]:4: in main chunk
	[C]: in function 'xpcall'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:179: in function <...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:143>
	...colasdrizard/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0102c0fbb0: 

In [29]:
pred_train2 = neuralnet_wc:forward(train_new)
max,argmax_train2 = pred_train2:max(2)
acc_train2 = 0
for i = 1, train_output:size(1) do
    if argmax_train2[i][1] == train_output[i] then
        acc_train2 = acc_train2 + 1
    end
end
score_train2 = acc_train2/train_output:size(1)
print('Validation Score on Train is '..score_train2)


Validation Score on Train is 0.90422454654824	


In [30]:
valid_new = data['valid_input_word_windows']:clone()
valid_cap_new = data['valid_input_cap_windows']:clone()
valid_cap_new:add(100002)
valid_new = torch.cat(valid_new,valid_cap_new,2)

In [32]:
pred_val2 = neuralnet_wc:forward(valid_new)
max,argmax_val2 = pred_val2:max(2)
acc_val2 = 0
for i = 1, data['valid_output']:size(1) do
    if argmax_val2[i][1] == data['valid_output'][i] then
        acc_val2 = acc_val2 + 1
    end
end
score_val2 = acc_val2/data['valid_output']:size(1)
print('Validation Score on Train is '..score_val2)

Validation Score on Train is 0.89842801650886	


# Adding the embeddings

In [4]:
train_new = data['train_input_word_windows']
train_cap_new = data['train_input_cap_windows']
train_output = data['train_output']
train_cap_new:add(100002)
train_new = torch.cat(train_new,train_cap_new,2)

In [7]:
dim_hidden = 50
dim_hidden2 = 50
--Define the module
neuralnet_wc = nn.Sequential()

par = nn.LookupTable(data['nwords'][1]+4,dim_hidden)
word_embeddings = data['word_embeddings']
par.weight:narrow(1, 1, data['nwords'][1]):copy(word_embeddings:narrow(1, 1, data['nwords'][1]))
print(par.weight[1][1], word_embeddings[1][1])

--Include the lookup tables
neuralnet_wc:add(par)
neuralnet_wc:add(nn.View(1,-1,10*dim_hidden))
neuralnet_wc:add(nn.Squeeze()) -- this layer is to go from a 1xAxB tensor to AxB dimensional tensor (https://groups.google.com/forum/#!topic/torch7/u4OEc0GB74k)
neuralnet_wc:add(nn.Linear(10*dim_hidden,dim_hidden2))
neuralnet_wc:add(nn.HardTanh())
neuralnet_wc:add(nn.Linear(dim_hidden2, data['nclasses'][1]))
neuralnet_wc:add(nn.LogSoftMax())

criterion = nn.ClassNLLCriterion()

In [12]:
dataset2={};
for i=1,train_new:size(1) do 
  dataset2[i] = {train_new[i]:view(1,10), train_output[i]}
end
function dataset2:size() return train_new:size(1) end -- 100 examples

In [16]:
dataset2[1][1]

      1       1    5032       2       4  100004  100004  100005  100004  100003
[torch.LongTensor of size 1x10]



In [18]:
timer = torch.Timer()

trainer2 = nn.StochasticGradient(neuralnet_wc, criterion)
trainer2.learningRate = 0.01
trainer2.maxIteration = 3
trainer2:train(dataset2)

print('Time elapsed: ' .. timer:time().real .. ' seconds',"\n")


# StochasticGradient: training	


# current error = 0.29927499225139	


# current error = 0.16488427987802	


# current error = 0.13787830303627	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 0.13787830303627	
Time elapsed: 527.83199000359 seconds	
	


In [24]:
-- Evaluation
valid = data['valid_input_word_windows']
valid_cap = data['valid_input_cap_windows']
valid_new = torch.cat(valid,torch.add(valid_cap,100002) ,2)

In [25]:
-- On train set

pred_train2 = neuralnet_wc:forward(train_new)
max,argmax_train2 = pred_train2:max(2)
acc_train2 = 0
for i = 1, train_output:size(1) do
    if argmax_train2[i][1] == train_output[i] then
        acc_train2 = acc_train2 + 1
    end
end
score_train2 = acc_train2/train_output:size(1)
print('Validation Score on Train is '..score_train2)


-- On validation set
pred_val2 = neuralnet_wc:forward(valid_new)
max,argmax_val2 = pred_val2:max(2)
acc_val2 = 0
for i = 1, data['valid_output']:size(1) do
    if argmax_val2[i][1] == data['valid_output'][i] then
        acc_val2 = acc_val2 + 1
    end
end
score_val2 = acc_val2/data['valid_output']:size(1)
print('Validation Score on Train is '..score_val2)

Validation Score on Train is 0.95776987419275	


Validation Score on Train is 0.95278738771546	


# Hyperparameter tunning script

Run all the following cells until the one called SCRIPT TO TUNE. This one is the longest to run, there are 2 loops: one over the parameters to test (here over dim_hidden) and one over the number of iterations (5 iterations of 4 epoch), the latter is to log more information about loss and accuracy and attest that the model converges.
In between each iteration of the outer loop the script rewrites in the log files in case of failure.

Then the last cell is to load the saved files. The goal is to use the two log of accuracy_train and accuracy_valid to select the best model and then retrieve its test prediction to submit them and take the lead!!
We will have enough log to have some nice plots too.

In [2]:
-- Helper

function compute_accuracy(pred, true_)
    max,argmax = pred:max(2)
    acc = 0
    for i = 1, true_:size(1) do
        if argmax[i][1] == true_[i] then
            acc = acc + 1
        end
    end
    score = acc/true_:size(1)
    
    return score
end

-- If words_embeddings is nil, weight are initialized randomly by torch
function build_nn(nwords, ncap, nclasses, dim_hidden, dim_hidden2, word_embeddings)
    --Define the module
    neuralnet_wc = nn.Sequential()

    par = nn.LookupTable(nwords + ncap,dim_hidden)
    
    -- Adding the embeddings
    if word_embeddings then
        par.weight:narrow(1, 1, nwords):copy(word_embeddings:narrow(1, 1, nwords))
    end
    neuralnet_wc:add(par)

    neuralnet_wc:add(nn.View(1,-1,10*dim_hidden))
    neuralnet_wc:add(nn.Squeeze()) -- this layer is to go from a 1xAxB tensor to AxB dimensional tensor (https://groups.google.com/forum/#!topic/torch7/u4OEc0GB74k)
    neuralnet_wc:add(nn.Linear(10*dim_hidden,dim_hidden2))
    neuralnet_wc:add(nn.HardTanh())
    neuralnet_wc:add(nn.Linear(dim_hidden2, nclasses))
    neuralnet_wc:add(nn.LogSoftMax())
    
    return neuralnet_wc
end

In [3]:
-- Loading the data

-- dimension
nwords = data['nwords'][1]
ncap = 4
nclasses = data['nclasses'][1]

-- Training data
train_output = data['train_output']
train_new = torch.cat(data['train_input_word_windows'],
    torch.add(data['train_input_cap_windows'], 100002),2)

-- Validation data
valid_output = data['valid_output']
valid_new = torch.cat(data['valid_input_word_windows'],
    torch.add(data['valid_input_cap_windows'],100002) ,2)

-- Test data
test_new = torch.cat(data['test_input_word_windows'],
    torch.add(data['test_input_cap_windows'],100002) ,2)

-- Formating the dataset to train
dataset={};
for i=1,train_new:size(1) do 
  dataset[i] = {train_new[i]:view(1,10), train_output[i]}
end
function dataset:size() return train_new:size(1) end -- 100 examples

In [18]:
-- Hyperparameter grid choice

-- Place to change parameter if needed
dim_hidden2_list = torch.DoubleTensor({40, 50, 60, 80, 100, 120})
num_parameters = dim_hidden2_list:size(1)

In [19]:
-- Metric storage

-- Initialization
loss_train = torch.zeros(num_parameters, 5) -- Store every 4 epochs
training_accuracy = torch.zeros(num_parameters, 5) -- Store every 4 epochs
valid_accuracy = torch.zeros(num_parameters, 5) -- Store every 4 epochs
training_time = torch.zeros(num_parameters)
test_pred = torch.zeros(test_new:size(1),45)


In [20]:
-- Script

-- REquired value because of the embeddings
dim_hidden = 50

for d in dim_hidden2_list:size() do
--for d=1,1 do
    dim_hidden2 = dim_hidden2_list[d]

    --Define the model
    neuralnet_wc = build_nn(nwords, ncap, nclasses, dim_hidden, dim_hidden2, data['word_embeddings'])
    criterion = nn.ClassNLLCriterion()
    
    -- Training
    timer = torch.Timer()
    trainer = nn.StochasticGradient(neuralnet_wc, criterion)
    trainer.learningRate = 0.01
    trainer.maxIteration = 4
    
    for j=1, 5 do
        trainer:train(dataset)
        
        -- Pred on train
        pred_val = neuralnet_wc:forward(valid_new)
        valid_accuracy[d][j] = compute_accuracy(pred_val, valid_output)
        
        pred_train = neuralnet_wc:forward(train_new)
        training_accuracy[d][j] = compute_accuracy(pred_train, train_output)
        loss_train[d][j] = criterion:forward(pred_train, train_output)
    end

    training_time[d] = timer:time().real
    test_pred = neuralnet_wc:forward(test_new)
    
    -- Saving (rewriting the files with update at each iteration)
    filename = 'log_hyper_embedding.f5'
    myFile = hdf5.open(filename, 'w')
    myFile:write('valid_accuracy', valid_accuracy)
    myFile:write('training_accuracy', training_accuracy)
    myFile:write('training_time', training_time)
    myFile:write('loss_train', loss_train)
    myFile:close()
    
    -- Saving the pred for each hyperparameter
    filename = 'test_pred_embedding_h_'.. d .. '.f5'
    myFile = hdf5.open(filename, 'w')
    myFile:write('test_pred', test_pred)
    myFile:close()    
    
end

# StochasticGradient: training	


# current error = 0.67797367306208	


# current error = 0.4675418220396	


# current error = 0.39370319093712	


# current error = 0.34702673766335	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 0.34702673766335	


[string "-- Script..."]:30: attempt to index a nil value
stack traceback:
	[string "-- Script..."]:30: in main chunk
	[C]: in function 'xpcall'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:179: in function <...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:143>
	...colasdrizard/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x010f039bb0: 

In [8]:
--- Script to read the saved files

myFile = hdf5.open('log_hyper_embedding.f5','r')
hyperparameter_log = myFile:all()
myFile:close()

d = 1
myFile = hdf5.open('test_pred_embedding_h_'.. d .. '.f5','r')
test_pred_1 = myFile:all()
myFile:close()