In [1]:
require 'nn'
require 'hdf5'
require 'optim'


In [2]:
myFile = hdf5.open('6-grams.hdf5','r')
data = myFile:all()
myFile:close()
print(data)

{
  test : LongTensor - size: 3761x55
  nwords : LongTensor - size: 1
  train_1000_nocounts : LongTensor - size: 696825x7
  train_nocounts : DoubleTensor - size: 887522x6
  train_1000 : DoubleTensor - size: 887522x6
  train : LongTensor - size: 772670x7
  valid : LongTensor - size: 3370x55
  valid_txt : DoubleTensor - size: 70391x6
  valid_output : LongTensor - size: 3370x50
}


In [3]:
train = data['train_nocounts']:narrow(2,1,6)
train_input = train:narrow(2,1,5)
train_output = train:narrow(2,6,1)

valid_txt = data['valid_txt']:narrow(2,1,6)
valid_txt_input = valid_txt:narrow(2,1,5)
valid_txt_output = valid_txt:narrow(2,6,1)

valid_topredict = data['valid']:narrow(2,1,50)
valid_input = data['valid']:narrow(2,51,5)
valid_output = data['valid_output']

In [4]:
-- Model
nwords = 10001
N = 5
dwin = N
hid1 = 30
hid2 = 100

-- To store the whole model
nnlm = nn.Sequential()

-- Layer to embedd (and put the words along the window into one vector)
LT = nn.Sequential()
LT_ = nn.LookupTable(nwords,hid1)
LT:add(LT_)
LT:add(nn.View(-1, hid1*dwin))

nnlm:add(LT)

concat = nn.ConcatTable()

lin_tanh = nn.Sequential()
lin_tanh:add(nn.Linear(hid1*dwin,hid2))
lin_tanh:add(nn.Tanh())

id = nn.Identity()

concat:add(lin_tanh)
concat:add(id)

nnlm:add(concat)
nnlm:add(nn.JoinTable(2))
nnlm:add(nn.Linear(hid1*dwin + hid2, nwords))
nnlm:add(nn.LogSoftMax())

-- Loss
criterion = nn.ClassNLLCriterion()


nEpochs = 1
batchSize = 32
eta = 0.01
av_L = 0

inputs_batch = torch.DoubleTensor(batchSize,dwin)
targets_batch = torch.DoubleTensor(batchSize)
outputs = torch.DoubleTensor(batchSize, nwords)
dL_do = torch.DoubleTensor(batchSize, nwords)

kag_pred_valid = torch.Tensor(valid_input:size(1),50)
norm_mat = torch.Tensor(valid_input:size(1),50)

In [5]:
parameters, gradParameters = nnlm:getParameters()

In [7]:
for i = 1, 1 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
    
    
end

Epoch 1: 817.29979610443	
Average Perplexity on train: 1038.2426267938	


/Users/virgileaudi/torch/install/share/lua/5.1/nn/THNN.lua:1091: multi-target not supported at /tmp/luarocks_nn-scm-1-9848/nn/lib/THNN/generic/ClassNLLCriterion.c:18
stack traceback:
	[C]: in function 'v'
	/Users/virgileaudi/torch/install/share/lua/5.1/nn/THNN.lua:1091: in function 'ClassNLLCriterion_updateOutput'
	...udi/torch/install/share/lua/5.1/nn/ClassNLLCriterion.lua:41: in function 'forward'
	[string "for i = 1, 1 do..."]:41: in main chunk
	[C]: in function 'xpcall'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:179: in function <.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:143>
	.../virgileaudi/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0102352b50: 

In [16]:
print (criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze()))

6.4636394432108	70391	


In [28]:
nnlm:forward(valid_input[1]):size()

     1
 10001
[torch.LongStorage of size 2]



In [30]:
kag_pred_valid:zero()
for ii = 1, valid_input:size(1) do
    kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
end

norm_mat:zero()
norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat)

CE = 0
for iii = 1, valid_input:size(1) do
    mm,aa = valid_output[iii]:max(1)
    CE = CE + math.log(kag_pred_valid[iii][aa[1]])
end

val_res = math.exp(-CE/kag_pred_valid:size(1))
print('Perplexity on valid: '..val_res)



Perplexity on valid: 8.0082783571626	


### NCE:

In [31]:
for i = 1, 1 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 1: 837.54529905319	
Average Perplexity on train: 601.86348771938	


Perplexity on valid.txt: 544.09100456299	


Perplexity on valid: 7.5411670975744	


In [32]:
for i = 3, 8 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 3: 825.9562330246	
Average Perplexity on train: 527.4359496501	


Perplexity on valid.txt: 490.41028578536	


Perplexity on valid: 7.3124630553358	


Epoch 4: 810.81244516373	
Average Perplexity on train: 478.24240750295	


Perplexity on valid.txt: 451.28884307244	


Perplexity on valid: 7.1345689871114	


Epoch 5: 809.59715104103	
Average Perplexity on train: 441.10636934143	


Perplexity on valid.txt: 421.14663777603	


Perplexity on valid: 6.9774349905299	


Epoch 6: 815.37426805496	
Average Perplexity on train: 411.90963927563	


Perplexity on valid.txt: 397.32171347555	


Perplexity on valid: 6.8419065078654	


Epoch 7: 810.07148694992	
Average Perplexity on train: 388.22452792996	


Perplexity on valid.txt: 377.90359626936	


Perplexity on valid: 6.7242994567635	


Epoch 8: 843.48118114471	
Average Perplexity on train: 368.57023037956	


Perplexity on valid.txt: 361.76307273096	


Perplexity on valid: 6.6223244922863	


In [33]:
for i = 9, 15 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(eta)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end

Epoch 9: 827.41940593719	
Average Perplexity on train: 351.92530292078	


Perplexity on valid.txt: 348.08247912807	


Perplexity on valid: 6.5329550483487	


Epoch 10: 818.73921298981	
Average Perplexity on train: 337.5367575987	


Perplexity on valid.txt: 336.26468217226	


Perplexity on valid: 6.4530105725527	


Epoch 11: 810.9077231884	
Average Perplexity on train: 324.88169664599	


Perplexity on valid.txt: 325.9017569163	


Perplexity on valid: 6.3802435933612	


Epoch 12: 818.34377193451	
Average Perplexity on train: 313.59659021479	


Perplexity on valid.txt: 316.70651340805	


Perplexity on valid: 6.3133236716505	


Epoch 13: 875.60341501236	
Average Perplexity on train: 303.4203653605	


Perplexity on valid.txt: 308.46612556472	


Perplexity on valid: 6.2514416608928	


Epoch 14: 931.39901995659	
Average Perplexity on train: 294.15795710215	


Perplexity on valid.txt: 301.02181851019	


Perplexity on valid: 6.1939794227997	


Epoch 15: 947.89197707176	
Average Perplexity on train: 285.66596548785	


Perplexity on valid.txt: 294.25169224865	


Perplexity on valid: 6.1404268036459	


In [None]:
for i = 16, 20 do
    -- timing the epoch
    timer = torch.Timer()
    av_L = 0
    
    -- max renorm
    LT_.weight:renorm(2,1,1)
    
    -- mini batch loop
    for t = 1, train_input:size(1), batchSize do
        -- Mini batch data
        current_batch_size = math.min(batchSize,train_input:size(1)-t)
        inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
        targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
        
        -- reset gradients
        nnlm:zeroGradParameters()
        --gradParameters:zero()

        -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
        outputs:narrow(1,1,current_batch_size):copy(nnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

        -- Average loss computation
        L = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
        av_L = av_L + L

        -- Backward pass
        dL_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size),
                targets_batch:narrow(1,1,current_batch_size)))
        nnlm:backward(inputs_batch:narrow(1,1,current_batch_size), dL_do:narrow(1,1,current_batch_size))
        nnlm:updateParameters(0.001)
        
    end
        
    print('Epoch '..i..': '..timer:time().real)
    print('Average Perplexity on train: '.. math.exp(av_L/math.floor(train_input:size(1)/batchSize)))
    
    
    -- Evaluating perplexity on valiadation_txt:
    
    print('Perplexity on valid.txt: '..math.exp(criterion:forward(nnlm:forward(valid_txt_input),valid_txt_output:squeeze())))
    
    -- Evaluatin perplexity on validation kaggle:
    kag_pred_valid:zero()

    for ii = 1, valid_input:size(1) do
        kag_pred_valid[ii]:copy(nnlm:forward(valid_input[ii]):index(2, valid_topredict[ii])):exp()
    end
    
    norm_mat:zero()
    norm_mat:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
    kag_pred_valid:cdiv(norm_mat)
    
    CE = 0
    for iii = 1, valid_input:size(1) do
        mm,aa = valid_output[iii]:max(1)
        CE = CE + math.log(kag_pred_valid[iii][aa[1]])
    end
    
    val_res = math.exp(-CE/kag_pred_valid:size(1))
    print('Perplexity on valid: '..val_res)
    
end