In [1]:
require 'nn'
require 'hdf5'
require 'optim'


In [3]:
myFile = hdf5.open('../../6-grams.hdf5','r')
data = myFile:all()
myFile:close()
print(data)

{
  test : LongTensor - size: 3761x55
  nwords : LongTensor - size: 1
  train_1000_nocounts : LongTensor - size: 696825x7
  train_nocounts : DoubleTensor - size: 887522x6
  train_1000 : DoubleTensor - size: 887522x6
  train : LongTensor - size: 772670x7
  valid : LongTensor - size: 3370x55
  valid_txt : DoubleTensor - size: 70391x6
  valid_output : LongTensor - size: 3370x50
}


In [4]:
train = data['train_nocounts']:narrow(2,1,6)
train_input = train:narrow(2,1,5)
train_output = train:narrow(2,6,1)

valid_txt = data['valid_txt']:narrow(2,1,6)
valid_txt_input = valid_txt:narrow(2,1,5)
valid_txt_output = valid_txt:narrow(2,6,1)

valid_topredict = data['valid']:narrow(2,1,50)
valid_input = data['valid']:narrow(2,51,5)
valid_output = data['valid_output']

In [4]:
-- Model
nwords = 10001
N = 5
dwin = N
hid1 = 30
hid2 = 100

-- To store the whole model
nnlm = nn.Sequential()

-- Layer to embedd (and put the words along the window into one vector)
LT = nn.Sequential()
LT_ = nn.LookupTable(nwords,hid1)
LT:add(LT_)
LT:add(nn.View(-1, hid1*dwin))

nnlm:add(LT)

concat = nn.ConcatTable()

lin_tanh = nn.Sequential()
lin_tanh:add(nn.Linear(hid1*dwin,hid2))
lin_tanh:add(nn.Tanh())

id = nn.Identity()

concat:add(lin_tanh)
concat:add(id)

nnlm:add(concat)
nnlm:add(nn.JoinTable(2))
nnlm:add(nn.Linear(hid1*dwin + hid2, nwords))
nnlm:add(nn.LogSoftMax())

-- Loss
criterion = nn.ClassNLLCriterion()


nEpochs = 1
batchSize = 32
eta = 0.01
av_L = 0

inputs_batch = torch.DoubleTensor(batchSize,dwin)
targets_batch = torch.DoubleTensor(batchSize)
outputs = torch.DoubleTensor(batchSize, nwords)
dL_do = torch.DoubleTensor(batchSize, nwords)

kag_pred_valid = torch.Tensor(valid_input:size(1),50)
norm_mat = torch.Tensor(valid_input:size(1),50)

In [5]:
parameters, gradParameters = nnlm:getParameters()

In [7]:
for i = 1, 1 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
    
    
end

Epoch 1: 817.29979610443	
Average Perplexity on train: 1038.2426267938	


/Users/virgileaudi/torch/install/share/lua/5.1/nn/THNN.lua:1091: multi-target not supported at /tmp/luarocks_nn-scm-1-9848/nn/lib/THNN/generic/ClassNLLCriterion.c:18
stack traceback:
	[C]: in function 'v'
	/Users/virgileaudi/torch/install/share/lua/5.1/nn/THNN.lua:1091: in function 'ClassNLLCriterion_updateOutput'
	...udi/torch/install/share/lua/5.1/nn/ClassNLLCriterion.lua:41: in function 'forward'
	[string "for i = 1, 1 do..."]:41: in main chunk
	[C]: in function 'xpcall'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:179: in function <.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:143>
	.../virgileaudi/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0102352b50: 

In [16]:
print (criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze()))

6.4636394432108	70391	


In [28]:
nnlm:forward(valid_input[1]):size()

     1
 10001
[torch.LongStorage of size 2]



In [30]:
kag_pred_valid:zero()
for ii = 1, valid_input:size(1) do
    kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
end

norm_mat:zero()
norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat)

CE = 0
for iii = 1, valid_input:size(1) do
    mm,aa = valid_output[iii]:max(1)
    CE = CE + math.log(kag_pred_valid[iii][aa[1]])
end

val_res = math.exp(-CE/kag_pred_valid:size(1))
print('Perplexity on valid: '..val_res)



Perplexity on valid: 8.0082783571626	


In [31]:
for i = 1, 1 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 1: 837.54529905319	
Average Perplexity on train: 601.86348771938	


Perplexity on valid.txt: 544.09100456299	


Perplexity on valid: 7.5411670975744	


In [32]:
for i = 3, 8 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 3: 825.9562330246	
Average Perplexity on train: 527.4359496501	


Perplexity on valid.txt: 490.41028578536	


Perplexity on valid: 7.3124630553358	


Epoch 4: 810.81244516373	
Average Perplexity on train: 478.24240750295	


Perplexity on valid.txt: 451.28884307244	


Perplexity on valid: 7.1345689871114	


Epoch 5: 809.59715104103	
Average Perplexity on train: 441.10636934143	


Perplexity on valid.txt: 421.14663777603	


Perplexity on valid: 6.9774349905299	


Epoch 6: 815.37426805496	
Average Perplexity on train: 411.90963927563	


Perplexity on valid.txt: 397.32171347555	


Perplexity on valid: 6.8419065078654	


Epoch 7: 810.07148694992	
Average Perplexity on train: 388.22452792996	


Perplexity on valid.txt: 377.90359626936	


Perplexity on valid: 6.7242994567635	


Epoch 8: 843.48118114471	
Average Perplexity on train: 368.57023037956	


Perplexity on valid.txt: 361.76307273096	


Perplexity on valid: 6.6223244922863	


In [33]:
for i = 9, 15 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 9: 827.41940593719	
Average Perplexity on train: 351.92530292078	


Perplexity on valid.txt: 348.08247912807	


Perplexity on valid: 6.5329550483487	


Epoch 10: 818.73921298981	
Average Perplexity on train: 337.5367575987	


Perplexity on valid.txt: 336.26468217226	


Perplexity on valid: 6.4530105725527	


Epoch 11: 810.9077231884	
Average Perplexity on train: 324.88169664599	


Perplexity on valid.txt: 325.9017569163	


Perplexity on valid: 6.3802435933612	


Epoch 12: 818.34377193451	
Average Perplexity on train: 313.59659021479	


Perplexity on valid.txt: 316.70651340805	


Perplexity on valid: 6.3133236716505	


Epoch 13: 875.60341501236	
Average Perplexity on train: 303.4203653605	


Perplexity on valid.txt: 308.46612556472	


Perplexity on valid: 6.2514416608928	


Epoch 14: 931.39901995659	
Average Perplexity on train: 294.15795710215	


Perplexity on valid.txt: 301.02181851019	


Perplexity on valid: 6.1939794227997	


Epoch 15: 947.89197707176	
Average Perplexity on train: 285.66596548785	


Perplexity on valid.txt: 294.25169224865	


Perplexity on valid: 6.1404268036459	


In [34]:
for i = 16, 20 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(0.001)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 16: 884.64695596695	
Average Perplexity on train: 289.63091355636	


Perplexity on valid.txt: 298.53214613822	


Perplexity on valid: 6.161870172494	


Epoch 17: 862.36055397987	
Average Perplexity on train: 287.61173679513	


Perplexity on valid.txt: 298.16840792915	


Perplexity on valid: 6.1582829766217	


Epoch 18: 913.85455417633	
Average Perplexity on train: 286.44803734131	


Perplexity on valid.txt: 297.64453359062	


Perplexity on valid: 6.1549117249554	


Epoch 19: 907.04614782333	
Average Perplexity on train: 285.45577036762	


Perplexity on valid.txt: 297.05836750921	


Perplexity on valid: 6.1512939860661	


Epoch 20: 945.45608305931	
Average Perplexity on train: 284.5339143652	


Perplexity on valid.txt: 296.44259460417	


Perplexity on valid: 6.1474060630745	


In [35]:
for i = 21, 22 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 21: 956.9749341011	
Average Perplexity on train: 274.63307799968	


Perplexity on valid.txt: 285.71248287443	


Perplexity on valid: 6.0745420849267	


Epoch 22: 884.85721302032	
Average Perplexity on train: 267.31538454884	


Perplexity on valid.txt: 279.97206629824	


Perplexity on valid: 6.026810149315	


In [36]:
for i = 23, 25 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 23: 925.0414981842	


Average Perplexity on train: 260.75603062799	


Perplexity on valid.txt: 274.84122276941	


Perplexity on valid: 5.9832071111076	


Epoch 24: 861.51717591286	
Average Perplexity on train: 254.65781156113	


Perplexity on valid.txt: 270.11753172733	


Perplexity on valid: 5.942581446567	


Epoch 25: 886.49924206734	
Average Perplexity on train: 248.95055580556	


Perplexity on valid.txt: 265.73155316471	


Perplexity on valid: 5.9045085155161	


In [37]:
for i = 26, 27 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 26: 856.12632894516	
Average Perplexity on train: 243.58987799773	


Perplexity on valid.txt: 261.63750741407	


Perplexity on valid: 5.8686917859439	


Epoch 27: 915.89268708229	
Average Perplexity on train: 238.54260990519	


Perplexity on valid.txt: 257.80547119188	


Perplexity on valid: 5.834951373464	


In [39]:
torch.save('../../../nnlm',nnlm)




### Results on test:

In [40]:
test_topredict = data['test']:narrow(2,1,50)
test_input = data['test']:narrow(2,51,5)
kag_pred_test = torch.Tensor(test_input:size(1),50)
norm_mat = torch.Tensor(test_input:size(1),50)

kag_pred_test:zero()
for ii = 1, test_input:size(1) do
    kag_pred_test[ii]:copy(nnlm:forward(test_input[ii]):index(2, test_topredict[ii])):exp()
end

norm_mat:zero()
norm_mat:copy(torch.expandAs(kag_pred_test:sum(2), kag_pred_test))
kag_pred_test:cdiv(norm_mat)

In [43]:
kag_pred_test:size()

 3761
   50
[torch.LongStorage of size 2]



In [44]:
filename = 'bengio_1.f5'
myFile = hdf5.open(filename, 'w')
myFile:write('test', kag_pred_test)
myFile:write('valid',kag_pred_valid)
myFile:close()

### Bengio (no skip):

In [5]:
-- Model
nwords = 10001
N = 5
dwin = N
hid1 = 30
hid2 = 100

-- To store the whole model
nnlm2 = nn.Sequential()

-- Layer to embedd (and put the words along the window into one vector)
LT2 = nn.Sequential()
LT2_ = nn.LookupTable(nwords,hid1)
LT2:add(LT2_)
LT2:add(nn.View(-1, hid1*dwin))

nnlm2:add(LT2)

lin_tanh2 = nn.Sequential()
lin_tanh2:add(nn.Linear(hid1*dwin,hid2))
lin_tanh2:add(nn.Tanh())

nnlm2:add(lin_tanh2)
nnlm2:add(nn.Linear(hid2, nwords))
nnlm2:add(nn.LogSoftMax())

-- Loss
criterion2 = nn.ClassNLLCriterion()

batchSize = 32
eta = 0.01
av_L = 0

inputs_batch = torch.DoubleTensor(batchSize,dwin)
targets_batch = torch.DoubleTensor(batchSize)
outputs = torch.DoubleTensor(batchSize, nwords)
dL_do = torch.DoubleTensor(batchSize, nwords)

kag_pred_valid2 = torch.Tensor(valid_input:size(1),50)
norm_mat2 = torch.Tensor(valid_input:size(1),50)

valtxtperp = torch.Tensor(25)
valkagperp = torch.Tensor(25)

In [6]:
for i = 1, 25 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT2_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm2:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm2:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion2:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion2:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm2:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm2:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    perp_txt = math.exp(criterion2:forward(nnlm2:forward(valid_txt_input),valid_txt_output:squeeze()))
    valtxtperp[i] = perp_txt
    print('Perplexity on valid.txt: '..perp_txt)
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid2:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid2[ii]:copy(nnlm2:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat2:zero()
    norm_mat2:copy(torch.expandAs(kag_pred_valid2:sum(2), kag_pred_valid2))
    kag_pred_valid2:cdiv(norm_mat2)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid2[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid2:size(1))
    valkagperp[i] = val_res
    print('Perplexity on valid: '..val_res)
    
end

Epoch 1: 440.03609108925	
Average Perplexity on train: 1082.4594136257	


Perplexity on valid.txt: 680.00174311834	


Perplexity on valid: 8.0683115316289	


Epoch 2: 437.69223618507	
Average Perplexity on train: 625.75434648624	


Perplexity on valid.txt: 565.59387298635	


Perplexity on valid: 7.5508052997539	


Epoch 3: 437.58501505852	
Average Perplexity on train: 542.04730055156	


Perplexity on valid.txt: 504.82438770484	


Perplexity on valid: 7.2963433831007	


Epoch 4: 440.55102109909	
Average Perplexity on train: 488.70411608916	


Perplexity on valid.txt: 462.7072798064	


Perplexity on valid: 7.098015126089	


Epoch 5: 437.68313002586	
Average Perplexity on train: 450.07392975748	


Perplexity on valid.txt: 431.43937680042	


Perplexity on valid: 6.9357044990068	


Epoch 6: 438.48814606667	
Average Perplexity on train: 420.43136766947	


Perplexity on valid.txt: 407.2472402181	


Perplexity on valid: 6.8043197597726	


Epoch 7: 438.92952895164	
Average Perplexity on train: 396.75551814658	


Perplexity on valid.txt: 387.86149170982	


Perplexity on valid: 6.6919146102422	


Epoch 8: 438.47046113014	
Average Perplexity on train: 377.31399735107	


Perplexity on valid.txt: 371.85625736873	


Perplexity on valid: 6.5915150215024	


Epoch 9: 437.71296596527	
Average Perplexity on train: 360.89540150306	


Perplexity on valid.txt: 358.20662639867	


Perplexity on valid: 6.4997537264968	


Epoch 10: 439.26542687416	
Average Perplexity on train: 346.66001827333	


Perplexity on valid.txt: 346.28640781448	


Perplexity on valid: 6.4149865991385	


Epoch 11: 437.89451003075	
Average Perplexity on train: 334.06440532189	


Perplexity on valid.txt: 335.70382460091	


Perplexity on valid: 6.3367377316278	


Epoch 12: 439.25250911713	
Average Perplexity on train: 322.75947617039	


Perplexity on valid.txt: 326.21265528821	


Perplexity on valid: 6.2646586589572	


Epoch 13: 437.93637800217	
Average Perplexity on train: 312.51662767159	


Perplexity on valid.txt: 317.64534339779	


Perplexity on valid: 6.198286531227	


Epoch 14: 437.52219605446	
Average Perplexity on train: 303.17209168724	


Perplexity on valid.txt: 309.86898733408	


Perplexity on valid: 6.1371942499275	


Epoch 15: 437.57384490967	
Average Perplexity on train: 294.59906686244	


Perplexity on valid.txt: 302.77414250815	


Perplexity on valid: 6.0808821564847	


Epoch 16: 438.14680194855	
Average Perplexity on train: 286.6952607716	


Perplexity on valid.txt: 296.26926494731	


Perplexity on valid: 6.0288753184055	


Epoch 17: 437.56386184692	
Average Perplexity on train: 279.3772274862	


Perplexity on valid.txt: 290.28151079237	


Perplexity on valid: 5.9806027627422	


Epoch 18: 437.68017911911	
Average Perplexity on train: 272.57639873033	


Perplexity on valid.txt: 284.75322217638	


Perplexity on valid: 5.9355272990494	


Epoch 19: 438.39722704887	
Average Perplexity on train: 266.23748893407	


Perplexity on valid.txt: 279.63531129512	


Perplexity on valid: 5.8931416469176	


Epoch 20: 437.97945809364	
Average Perplexity on train: 260.31190936053	


Perplexity on valid.txt: 274.88321109219	


Perplexity on valid: 5.8531056498463	


Epoch 21: 437.68694591522	
Average Perplexity on train: 254.7575250141	


Perplexity on valid.txt: 270.45718040551	


Perplexity on valid: 5.8151450727278	


Epoch 22: 437.4172911644	
Average Perplexity on train: 249.53665455955	


Perplexity on valid.txt: 266.32196549948	


Perplexity on valid: 5.7791420384127	


Epoch 23: 437.95185995102	
Average Perplexity on train: 244.61664732927	


Perplexity on valid.txt: 262.44770892005	


Perplexity on valid: 5.7449874566149	


Epoch 24: 438.25385594368	
Average Perplexity on train: 239.96957797857	


Perplexity on valid.txt: 258.80916918393	


Perplexity on valid: 5.7126058674399	


Epoch 25: 437.70601701736	
Average Perplexity on train: 235.57116926444	


Perplexity on valid.txt: 255.38467631097	


Perplexity on valid: 5.6819592787889	


In [8]:
test_topredict = data['test']:narrow(2,1,50)
test_input = data['test']:narrow(2,51,5)
kag_pred_test2 = torch.Tensor(test_input:size(1),50)
norm_mat2 = torch.Tensor(test_input:size(1),50)

kag_pred_test2:zero()
for ii = 1, test_input:size(1) do
    kag_pred_test2[ii]:copy(nnlm2:forward(test_input[ii]):index(2, test_topredict[ii])):exp()
end

norm_mat2:copy(torch.expandAs(kag_pred_test2:sum(2), kag_pred_test2))
kag_pred_test2:cdiv(norm_mat2)

In [9]:
filename = 'bengio_2.f5'
myFile = hdf5.open(filename, 'w')
myFile:write('test', kag_pred_test2)
myFile:write('valid',kag_pred_valid2)
myFile:close()

In [10]:
filename = 'bengio_2_perp.f5'
myFile = hdf5.open(filename, 'w')
myFile:write('test', valtxtperp)
myFile:write('valid',valkagperp)
myFile:close()

In [11]:
torch.save('../../../nnlm2',nnlm2)


