In [1]:
require 'nn'
require 'hdf5'

### LOADING THE DATA AND CONVERTING IT TO LOGREG FW:

In [2]:
myFile = hdf5.open('PTB.hdf5','r')

In [3]:
data = myFile:all()
myFile:close()

In [4]:
data

{
  train_output : LongTensor - size: 912666
  train_input_cap_windows : LongTensor - size: 912666x5
  test_input_word_windows : LongTensor - size: 129696x5
  valid_output : LongTensor - size: 131808
  valid_input_cap_windows : LongTensor - size: 131808x5
  nwords : IntTensor - size: 1
  test_input_cap_windows : LongTensor - size: 129696x5
  train_input_word_windows : LongTensor - size: 912666x5
  nclasses : IntTensor - size: 1
  word_embeddings : DoubleTensor - size: 400002x50
  valid_input_word_windows : LongTensor - size: 131808x5
}


In [5]:
train_input_word_windows = data['train_input_word_windows']
train_input_cap_windows = data['train_input_cap_windows']

train = train_input_word_windows:clone()
train_cap = train_input_cap_windows:clone()
train_output = data['train_output']

In [6]:
train_cap = train_input_cap_windows:clone()
for j = 1, 5 do
    train:narrow(2,j,1):add((j-1)*100002)
end
for j = 1, 5 do
    train_cap:narrow(2,j,1):add((j-1)*4)
end

In [7]:
train_cap[1]

  2
  6
 11
 14
 17
[torch.LongTensor of size 5]



In [8]:
data['nwords'][1]

100002	


In [9]:
data['nclasses'][1]

45	


### TRAINING:

In [74]:

par = nn.ParallelTable()

par:add(nn.LookupTable(5*data['nwords'][1],data['nclasses'][1])) -- first child
par:add(nn.LookupTable(5*4,data['nclasses'][1])) 

-- Logistic regression pipeline
--Define the module
linreg_wc = nn.Sequential()
--Include the lookup tables
linreg_wc:add(par)
--Sum the lookup table per entry
linreg_wc:add(nn.CAddTable())
linreg_wc:add(nn.Sum(2))
-- Add the bias b
linreg_wc:add(nn.Add(45))
linreg_wc:add(nn.LogSoftMax())

-- Defining the criterion
criterion = nn.ClassNLLCriterion()

In [72]:
-- Sanity check:
preds = linreg_wc:forward({train:narrow(1,5, 10),train_cap:narrow(1,5, 10)})

In [73]:
preds:exp():sum(2)

 0.9997
 0.9996
 0.9998
 0.9997
 0.9997
 0.9997
 0.9998
 0.9997
 0.9997
 0.9998
[torch.DoubleTensor of size 10x1]



In [18]:
-- SGD on log reg
eta = 0.01
batch_size = 100
input_w = torch.Tensor(batch_size,5)
input_c = torch.Tensor(batch_size,5)
targets = torch.Tensor(batch_size)

for ep = 1,1 do
    timer = torch.Timer()
    for j = 1,train:size(1) - train:size(1) % batch_size, batch_size do
        linreg_wc:zeroGradParameters()
        input_w = train:narrow(1, j, batch_size)
        input_c = train_cap:narrow(1, j, batch_size)
        targets = train_output:narrow(1,j, batch_size)  
        -- Forward pass
        preds = linreg_wc:forward({input_w,input_c})
        loss = criterion:forward(preds, targets)
        
        if j % 501 == 0 then
            print(loss)
        end
        -- Backward pass
        dLdpreds = criterion:backward(preds, targets)
        linreg_wc:backward({input_w,input_c}, dLdpreds)
        --- Updating parameters
        linreg_wc:updateParameters(eta) 
    end
    print('Time elapsed for epoch ' .. ep ..': ' .. timer:time().real .. ' seconds',"\n")

end

8.5759885310946	


6.7818050887484	


6.0842318276842	


5.1755214683275	


5.228723004582	


5.0174404883134	


5.1613892500926	


4.6790277431305	


4.0471138986201	


3.8357703021543	


3.817613050954	


4.3769468180343	


3.7315103678902	


3.7802154694901	


3.9288410165105	


3.6936996131465	


3.363783605287	


3.5276212550741	


3.113692175301	


[string "-- SGD ..."]:14: bad argument #3 to 'narrow' (out of range at /Users/nicolasdrizard/torch/pkg/torch/lib/TH/generic/THTensor.c:351)
stack traceback:
	[C]: in function 'narrow'
	[string "-- SGD ..."]:14: in main chunk
	[C]: in function 'xpcall'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:179: in function <...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:143>
	...colasdrizard/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...asdrizard/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...colasdrizard/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x010b30fbb0: 

In [38]:
-- Log leg with only word feature
--Define the module
linreg_w = nn.Sequential()
--Include the lookup tables
linreg_w:add(nn.LookupTable(5*data['nwords'][1],data['nclasses'][1]))
linreg_w:add(nn.Sum(2))
-- Add the bias b
linreg_w:add(nn.Add(45))
linreg_w:add(nn.LogSoftMax())

-- Defining the criterion
criterion = nn.ClassNLLCriterion()

In [62]:
-- Sanity check:
preds = linreg_w:forward(train[1]:view(1,5))

In [61]:
-- Building the dataset
dataset={};
for i=1,train:size(1) do 
  dataset[i] = {train[i]:view(1,5), train_output[i]}
end
function dataset:size() return train:size(1) end -- 100 examples


In [63]:
dataset[dataset:size()]

{
  1 : LongTensor - size: 1x5
  2 : 11
}


In [64]:
-- Training using StochasticGradient
timer = torch.Timer()

trainer = nn.StochasticGradient(linreg_w, criterion)
trainer.learningRate = 0.01
trainer:train(dataset)

print('Time elapsed to train ' .. timer:time().real .. ' seconds',"\n")


# StochasticGradient: training	


# current error = 1.3794849209672	


# current error = 0.76052419663021	


# current error = 0.61227595531347	


# current error = 0.52964826351044	


# current error = 0.47433221174542	


# current error = 0.4337290418635	


# current error = 0.40218224908772	


# current error = 0.37669916538154	


# current error = 0.35551716059799	


# current error = 0.33752287632374	


# current error = 0.32197378525984	


# current error = 0.30835092001758	


# current error = 0.2962778218484	


# current error = 0.28547382819134	


# current error = 0.27572522468948	


# current error = 0.26686623912229	


# current error = 0.25876604690913	


# current error = 0.25131973564641	


# current error = 0.24444194696155	


# current error = 0.23806232528917	


# current error = 0.23212223993658	


# current error = 0.22657236499832	


# current error = 0.22137086189557	


# current error = 0.21648197102891	


# current error = 0.2118749270897	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 0.2118749270897	
Time elapsed to train 5713.0099458694 seconds	
	


In [None]:
-- Computing the accuracy score

val_word = data['valid_input_word_windows']:clone()
for j = 1, 5 do
    val_word:narrow(2,j,1):add((j-1)*100002)
end

train_word = train_input_word_windows:clone()
for j = 1, 5 do
    train_word:narrow(2,j,1):add((j-1)*100002)
end

val_output = data['valid_output']
pred_validate = torch.Tensor(val_output:size(1), 45)    
pred_train = torch.Tensor(train_output:size(1), 45)    

pred_validate = linreg_w:forward(val_word)
pred_train = linreg_w:forward(train_word)

max,argmax_val = pred_validate:max(2)
max,argmax_train = pred_train:max(2)

acc_validate = 0
for i = 1, val_output:size(1) do
    if argmax_val[i][1] == val_output[i] then
        acc_validate = acc_validate + 1
    end
end

acc_train = 0
for i = 1, train_output:size(1) do
    if argmax_train[i][1] == train_output[i] then
        acc_train = acc_train + 1
    end
end
score_val = acc_validate/val_output:size(1)
score_train = acc_train/train_output:size(1)
print('Validation Score on Train is '..score_train)
print('Validation Score on Validation is '..score_val)

In [78]:
--saving the model
torch.save('model/log_reg_words_sgdmodule', linreg_w)




In [10]:
--loading the model
linreg_w2 = torch.load('model/log_reg_words_sgdmodule')

In [11]:
-- Prediction on test with submission
test_word = data['test_input_word_windows']:clone()
for j = 1, 5 do
    test_word:narrow(2,j,1):add((j-1)*100002)
end  

pred_val = linreg_w2:forward(test_word)
max,argmax = pred_val:max(2)
pred_test = argmax:view(argmax:size(1))

myFile = hdf5.open('submission/pred_test_lgrw2.h5', 'w')
myFile:write('Testpred', pred_test)
myFile:close()

In [12]:
test_word:size()

 129696
      5
[torch.LongStorage of size 2]



# Neural Network

In [13]:
-- We consider the built feature here
train = train_input_word_windows:clone()
train_cap = train_input_cap_windows:clone()

In [14]:
dim_hidden = 50
--Define the module
neuralnet = nn.Sequential()

--Include the lookup tables
par = nn.ParallelTable()
par:add(nn.LookupTable(data['nwords'][1],dim_hidden)) -- first child
par:add(nn.LookupTable(4,dim_hidden)) 
neuralnet:add(par)
neuralnet:add(nn.JoinTable(2))
neuralnet:add(nn.Sum(2))
neuralnet:add(nn.Add(dim_hidden))

neuralnet:add(nn.HardTanh())

neuralnet:add(nn.Linear(dim_hidden, data['nclasses'][1]))

neuralnet:add(nn.LogSoftMax())

In [16]:
-- Test
-- Batch with 10 element
neuralnet:forward({train:narrow(1,5, 10),train_cap:narrow(1,5, 10)}):sum(2)

-180.6292
-179.4826
-177.2327
-178.4308
-177.1351
-176.5015
-176.3860
-179.1067
-178.3210
-177.3904
[torch.DoubleTensor of size 10x1]



In [70]:
-- SGD on neuralnet
eta = 0.01
batch_size = 100
input_w = torch.Tensor(batch_size,5)
input_c = torch.Tensor(batch_size,5)
targets = torch.Tensor(batch_size)

-- For the Accuracy on the validation set
val_word = data['valid_input_word_windows']:clone()
val_cap = data['valid_input_cap_windows']:clone()

val_output = data['valid_output']
pred_val = torch.Tensor(val_output:size(1), 45)

criterion = nn.ClassNLLCriterion()

for ep = 1,5 do
    timer = torch.Timer()
    -- To be sure to consider only batch of the same size
    for j = 1,train:size(1) - train:size(1) % batch_size, batch_size do
        neuralnet:zeroGradParameters()
        input_w = train:narrow(1, j, batch_size)
        input_c = train_cap:narrow(1, j, batch_size)
        targets = train_output:narrow(1,j, batch_size)  
        -- Forward pass
        preds = neuralnet:forward({input_w,input_c})
        loss = criterion:forward(preds, targets)
        
        if j % 501 == 0 then
            print(loss)
        end
        -- Backward pass
        dLdpreds = criterion:backward(preds, targets)
        neuralnet:backward({input_w,input_c}, dLdpreds)
        --- Updating parameters
        neuralnet:updateParameters(eta) 
    end
    print('Time elapsed for epoch ' .. ep ..': ' .. timer:time().real .. ' seconds',"\n")
    -- Evaluating the accuracy on the validation set
    pred_val = neuralnet:forward({val_word, val_cap})
    max,argmax = pred_val:max(2)

    acc = 0
    for i = 1, val_output:size(1) do
        if argmax[i][1] == val_output[i] then
            acc = acc + 1
        end
    end
    score = acc/val_output:size(1)
    print('Validation Score is '..score)

end

3.8898707998767	


2.8758456393912	


3.2050669027685	


2.6489577068804	


2.7498094002453	


2.6825000064236	


2.824666045155	


2.8477612160101	


2.5235147998269	


2.4288681396417	


2.0892679645625	


2.3904251976955	


2.3317229213979	


2.279531009562	




2.2933153697501	


2.4274080248097	


2.3171917513797	


2.5087745416034	


2.6062318466591	


Time elapsed for epoch 1: 133.19005703926 seconds	
	


Validation Score is 0.27003672007769	


2.5026424602521	


2.4207418009544	


2.9260936587252	


2.3913462837156	


2.5835910207376	


2.5235448919277	


2.5935075353371	


2.6714172192295	


2.3902954409781	


2.3054344427427	


1.9977375704705	


2.2906179356041	


2.173673214273	


2.1916890646751	


2.2449466234562	


2.3427394296164	


2.2413542832905	


2.4428794219126	


2.5008728137761	


Time elapsed for epoch 2: 135.79912805557 seconds	
	


Validation Score is 0.27895878854091	


2.4426967223651	


2.2807296625277	


2.8485047489928	


2.3209873936175	


2.5085282929789	


2.4711013919259	


2.511446725365	


2.6047166009476	


2.3006343727676	


2.237735474894	


1.9444604691873	


2.2483355673351	


2.1229075250767	


2.16289014247	


2.2093800835072	


2.3294113295806	


2.1884962024668	


2.4100095277122	


2.4478439951989	


Time elapsed for epoch 3: 137.39781308174 seconds	
	


Validation Score is 0.28297978878369	


2.4137918961129	


2.221003558904	


2.800579300496	


2.2728064140365	


2.4554092595734	


2.435090223665	


2.4375065388558	


2.5698927281729	


2.266079469031	


2.2246176394618	


1.9169782289405	


2.2189973124291	


2.089657313294	


2.1506508859938	


2.176215433173	


2.2970164518705	


2.1496671113838	


2.3967529970368	


2.4053921924961	


Time elapsed for epoch 4: 128.47289800644 seconds	
	


Validation Score is 0.28690216071862	


2.3846665795851	


2.1726820256113	


2.7494483592059	


2.2367228155743	


2.4231323891795	


2.4146110312395	


2.3846809001856	


2.5437790932748	


2.248394048677	


2.2121803073311	


1.9011909215113	


2.203171600584	


2.0703786126542	


2.1415262288633	


2.1514558319843	


2.2566647599768	


2.1380294547215	


2.3833049599876	


2.3712172783666	


Time elapsed for epoch 5: 135.55886483192 seconds	
	


Validation Score is 0.28950443068706	


In [6]:
-- Use of the stochasticgradient module
-- Only words feature first

-- Building the dataset
dataset={};
for i=1,train:size(1) do 
  dataset[i] = {train[i]:view(1,5), train_output[i]}
end
function dataset:size() return train:size(1) end -- 100 examples

In [11]:
--Check
print(dataset[1])
print(dataset[1][1])

{
  1 : LongTensor - size: 1x5
  2 : 1
}
    1     1  5032     2     4
[torch.LongTensor of size 1x5]



In [29]:
dim_hidden = 50
--Define the module
neuralnet_w = nn.Sequential()

--Include the lookup tables
neuralnet_w:add(nn.LookupTable(data['nwords'][1],dim_hidden))
neuralnet_w:add(nn.Sum(2))
neuralnet_w:add(nn.Add(dim_hidden))

neuralnet_w:add(nn.HardTanh())

neuralnet_w:add(nn.Linear(dim_hidden, data['nclasses'][1]))

--neuralnet_w:add(nn.LogSoftMax())

-- Defining the criterion
criterion = nn.ClassNLLCriterion()

In [30]:
-- Test the module
neuralnet_w:forward(dataset[1][1])

Columns 1 to 10
-0.4656 -0.2358 -0.2387  0.7646  1.4196  0.1271 -0.5708  0.5629 -0.1456 -0.1834

Columns 11 to 20
 0.1106 -0.1164 -0.1584 -0.4165  0.9461 -1.3635 -0.5184 -0.4090 -0.8857 -0.8798

Columns 21 to 30
 0.6350 -0.0262 -0.6443  0.2792 -0.2837 -1.0278 -0.0088 -0.1556  1.0539  0.8953

Columns 31 to 40
 0.3359  0.4260 -0.1281  0.2132  0.4996  1.5266  0.3289 -0.3932  0.3782 -0.9084

Columns 41 to 45
 0.1558  0.1407  0.2678  0.3221  0.0584
[torch.DoubleTensor of size 1x45]



In [23]:
-- Training using StochasticGradient
timer = torch.Timer()

trainer = nn.StochasticGradient(neuralnet_w, criterion)
trainer.learningRate = 0.01
trainer.maxIteration = 5
trainer:train(dataset)

print('Time elapsed to train ' .. timer:time().real .. ' seconds',"\n")


# StochasticGradient: training	


# current error = 2.0096873995551	


# current error = 1.9225598150036	


# current error = 1.8669479596766	


# current error = 1.8257884102976	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 1.8257884102976	
Time elapsed to train 802.30167603493 seconds	
	


In [33]:
-- Training using StochasticGradient
timer = torch.Timer()

trainer = nn.StochasticGradient(neuralnet_w, criterion)
trainer.learningRate = 0.01
trainer.maxIteration = 5
trainer:train(dataset)

print('Time elapsed to train ' .. timer:time().real .. ' seconds',"\n")


# StochasticGradient: training	


# current error = -172126.08958512	


# current error = -203435.45737267	


# current error = -234744.82516024	


# current error = -266054.19294777	


# current error = -297363.56073544	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = -297363.56073544	
Time elapsed to train 1143.8174819946 seconds	
	


In [32]:
-- Computing the accuracy score

val_word = data['valid_input_word_windows']
val_output = data['valid_output']

pred_validate = torch.Tensor(val_output:size(1), 45)    
pred_train = torch.Tensor(train_output:size(1), 45)    

pred_validate = neuralnet_w:forward(val_word)
pred_train = neuralnet_w:forward(train)

max,argmax_val = pred_validate:max(2)
max,argmax_train = pred_train:max(2)

acc_validate = 0
for i = 1, val_output:size(1) do
    if argmax_val[i][1] == val_output[i] then
        acc_validate = acc_validate + 1
    end
end

acc_train = 0
for i = 1, train_output:size(1) do
    if argmax_train[i][1] == train_output[i] then
        acc_train = acc_train + 1
    end
end
score_val = acc_validate/val_output:size(1)
score_train = acc_train/train_output:size(1)
print('Validation Score on Train is '..score_train)
print('Validation Score on Validation is '..score_val)

Validation Score on Train is 0.13979374711011	
Validation Score on Validation is 0.14067431415392	


In [27]:
train_output:narrow(1, 1, 20)

  1
  1
  2
  3
  4
  5
  2
  6
  7
  8
  9
 10
  8
  5
  9
  1
  3
 11
  1
  1
[torch.LongTensor of size 20]



In [28]:
argmax_train:narrow(1, 1, 20)

  1
  2
  2
  3
  3
  4
  4
  8
  8
  8
  9
  9
  8
  8
  8
  1
  1
 11
  1
  1
[torch.LongTensor of size 20x1]



# Neural Network with pretrained embeddings