In [1]:
require 'nn';
require 'hdf5';

In [2]:
function build_model(dwin, nchar, nclass, hid1, hid2)
    -- Model with skip layer from Bengio, standards parameters
    -- should be:
    -- dwin = 5
    -- hid1 = 30
    -- hid2 = 100

    -- To store the whole model
    local dnnlm = nn.Sequential()

    -- Layer to embedd (and put the words along the window into one vector)
    local LT = nn.Sequential()
    local LT_ = nn.LookupTable(nchar,hid1)
    LT:add(LT_)
    LT:add(nn.View(-1, hid1*dwin))

    dnnlm:add(LT)

    local concat = nn.ConcatTable()

    local lin_tanh = nn.Sequential()
    lin_tanh:add(nn.Linear(hid1*dwin,hid2))
    lin_tanh:add(nn.Tanh())

    local id = nn.Identity()

    concat:add(lin_tanh)
    concat:add(id)

    dnnlm:add(concat)
    dnnlm:add(nn.JoinTable(2))
    dnnlm:add(nn.Linear(hid1*dwin + hid2, nclass))
    dnnlm:add(nn.LogSoftMax())

    -- Loss
    local criterion = nn.ClassNLLCriterion()

    return dnnlm, criterion
end


function train_model(train_input, train_output, dnnlm, criterion, dwin, nclass, eta, nEpochs, batchSize)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01

    -- To store the loss
    local av_L = 0

    -- Memory allocation
    local inputs_batch = torch.DoubleTensor(batchSize,dwin)
    local targets_batch = torch.DoubleTensor(batchSize)
    local outputs = torch.DoubleTensor(batchSize, nclass)
    local df_do = torch.DoubleTensor(batchSize, nclass)

    for i = 1, nEpochs do
        -- timing the epoch
        local timer = torch.Timer()

        av_L = 0
        
        -- max renorm of the lookup table
        dnnlm:get(1):get(1).weight:renorm(2,1,1)
        
        -- mini batch loop
        for t = 1, train_input:size(1), batchSize do
            -- Mini batch data
            local current_batch_size = math.min(batchSize,train_input:size(1)-t)
            inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
            targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
            
            -- reset gradients
            dnnlm:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
            outputs:narrow(1,1,current_batch_size):copy(dnnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

            -- Average loss computation
            local f = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
            av_L = av_L +f

            -- Backward pass
            df_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size)))
            dnnlm:backward(inputs_batch:narrow(1,1,current_batch_size), df_do:narrow(1,1,current_batch_size))
            dnnlm:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        print('Average Loss: '..av_L/math.floor(train_input:size(1)/batchSize))
       
    end

end

In [3]:
myFile = hdf5.open('../data_preprocessed/6-grams.hdf5','r')
data = myFile:all()
myFile:close()

In [4]:
data

{
  input_data_test : LongTensor - size: 367519
  input_data_valid : LongTensor - size: 115720
  F_train : DoubleTensor - size: 65033x7
  input_data_train : LongTensor - size: 599909
  input_matrix_train : DoubleTensor - size: 599903x5
  input_data_valid_nospace : LongTensor - size: 95827
  output_matrix_train : DoubleTensor - size: 599903
}


In [5]:
Nwin = 5

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']

valid_space = data['input_data_valid_nospace']
valid = data['input_data_valid_nospace']:clone()

test = data['input_data_test']:clone()

In [6]:
torch.manualSeed(1)

nnlm1, crit = build_model(Nwin, 49, 2, 20, 16)

train_model(train_input, train_output, nnlm1, crit, Nwin, 2, 0.01, 20, 10)

Epoch 1: 30.32079577446	
Average Loss: 0.28291292916549	


Epoch 2: 30.03511595726	
Average Loss: 0.22909422380413	


Epoch 3: 28.832560062408	
Average Loss: 0.20544086393986	


Epoch 4: 28.815920829773	
Average Loss: 0.19439121899764	


Epoch 5: 30.704405069351	
Average Loss: 0.18717225893088	


Epoch 6: 30.742899179459	
Average Loss: 0.18127659217664	


Epoch 7: 31.944208860397	
Average Loss: 0.1765919324312	


Epoch 8: 34.879153966904	
Average Loss: 0.17284293586983	


Epoch 9: 32.785515069962	
Average Loss: 0.16978364828228	


Epoch 10: 32.413477182388	
Average Loss: 0.16708374429545	


Epoch 11: 28.789509057999	
Average Loss: 0.16467734778624	


Epoch 12: 29.227755069733	
Average Loss: 0.16264201066196	


Epoch 13: 32.339390993118	
Average Loss: 0.16092642410432	


Epoch 14: 35.106617927551	
Average Loss: 0.15947024681839	


Epoch 15: 33.359790086746	
Average Loss: 0.15822392705003	


Epoch 16: 35.18958902359	
Average Loss: 0.15711771041754	


Epoch 17: 30.188794851303	
Average Loss: 0.15611926878796	


Epoch 18: 29.59033203125	
Average Loss: 0.15523507092617	


Epoch 19: 36.355064153671	
Average Loss: 0.15446284926575	


Epoch 20: 30.361381053925	
Average Loss: 0.15377656872611	


### Greedy:

In [7]:
it = 1
i = 1
nextpred = torch.Tensor(2)

print('here')
while it<data['input_data_valid_nospace']:size(1)-(Nwin-1) do
    it = it + 1
    nextpred:copy(nnlm1:forward(valid:narrow(1,i,Nwin)));
    m, argm = nextpred:max(1)

    if argm[1] == 2 then
        i = i + 1
    elseif argm[1] == 1 then 
        valid_ = torch.LongTensor(valid:size(1)+1)
        valid_:narrow(1,1,i+(Nwin-1)):copy(valid:narrow(1,1,i+(Nwin-1)))
        valid_[i+Nwin] = 1
        valid_:narrow(1,i+(Nwin-1)+2,valid:size(1)-i-(Nwin-1)):copy(valid:narrow(1,i+(Nwin-1)+1,valid:size(1)-i-(Nwin-1)))
        valid = valid_
        i = i + 2
    end 
end

here	


In [9]:
num_sent = 0
for i = 5,valid_space:size(1) do
    if valid_space[i] == 2 then
        num_sent = num_sent + 1
    end
end
print(num_sent)

1002	


In [16]:
num_spaces = torch.DoubleTensor(num_sent,2)
row = 1
count_space = 0
for i=5,valid:size(1) do
    if valid[i] == 2 then
        num_spaces[{row, 1}] = row
        num_spaces[{row, 2}] = count_space
        count_space = 0
        row = row + 1
    elseif valid[i] == 1 then
        count_space = count_space + 1
    end
end

In [17]:
num_spaces[1]

 1
 7
[torch.DoubleTensor of size 2]



In [10]:
it = 1
i = 1
nextpred = torch.Tensor(2)

print('here')
while it<data['input_data_test']:size(1)-(Nwin-1) do
    it = it + 1
    nextpred:copy(nnlm1:forward(test:narrow(1,i,Nwin)));
    m, argm = nextpred:max(1)

    if argm[1] == 2 then
        i = i + 1
    elseif argm[1] == 1 then 
        test_ = torch.LongTensor(test:size(1)+1)
        test_:narrow(1,1,i+(Nwin-1)):copy(test:narrow(1,1,i+(Nwin-1)))
        test_[i+Nwin] = 1
        test_:narrow(1,i+(Nwin-1)+2,test:size(1)-i-(Nwin-1)):copy(test:narrow(1,i+(Nwin-1)+1,test:size(1)-i-(Nwin-1)))
        test = test_
        i = i + 2
    end 
end

here	


In [11]:
num_sent = 0
for i = 5,test:size(1) do
    if test[i] == 2 then
        num_sent = num_sent + 1
    end
end

print(num_sent)

3761	


In [12]:
num_spaces_test = torch.DoubleTensor(num_sent,2)
row = 1
count_space = 0
for i=5,test:size(1) do
    if test[i] == 2 then
        num_spaces_test[{row, 1}] = row
        num_spaces_test[{row, 2}] = count_space
        count_space = 0
        row = row + 1
    elseif test[i] == 1 then
        count_space = count_space + 1
    end
end

In [15]:
count_space:sum(1)

[string "local f = function() return count_space:sum(1..."]:1: attempt to index global 'count_space' (a number value)
stack traceback:
	[string "local f = function() return count_space:sum(1..."]:1: in function 'f'
	[string "local f = function() return count_space:sum(1..."]:1: in main chunk
	[C]: in function 'xpcall'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:179: in function <.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:143>
	.../virgileaudi/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x010977cb50: 

In [23]:
myFile = hdf5.open('../submission/pred_test_greedy_nn_5', 'w')
myFile:write('num_spaces', num_spaces_test)
myFile:close()

### Viturby:

In [24]:
function predict_viterbi(nnlm, gram_input)
    -- Backpointer
    local score
    local bp = torch.zeros(gram_input:size(1) + 1, 2)
    local context = torch.DoubleTensor(1)
    local y_hat = torch.DoubleTensor(2)
    local pi = torch.ones(gram_input:size(1) + 1, 2):mul(-9999)
    -- Initialization
    pi[{1,1}] = 0
    -- i is shifted
    for i=2,gram_input:size(1)+1 do
        for c_prev =1,2 do
            -- Precompute y_hat(c_prev)
            if c_prev == 1 then
                context[1] = c_prev
            else
                context[1] = gram_input[i-1]
            end
            -- Line where the model appears
            y_hat:copy(nnlm:forward(context))

            for c_current =1,2 do
                score = pi[{i-1, c_prev}] + math.log(y_hat[c_current])
                if score > pi[{i, c_current}] then
                    pi[{i, c_current}] = score
                    bp[{i, c_current}] = c_prev
                end
            end
        end
    end
    return pi, bp
end

In [25]:
-- Building the sequences from the backpointer
function build_sequences_from_bp(bp, gram_input)
    local predictions = torch.DoubleTensor(2*gram_input:size(1))
    -- Next position to fill in predictions (have to do it backward)
    local position = 2*gram_input:size(1)
    local col = 2
    -- Loop until the 3rd position (because 2nd is the first one, could be set by hand)
    for i=bp:size(1),3,-1 do
        -- coming from a space
        if bp[i][col] == 1 then
            predictions[position] = 1
            position = position - 1
            col = 1
        else
            col = 2
        end
        -- index i is shifted of 1 wrt local index in gram_input
        predictions[position] = gram_input[i-1]
        position = position - 1
    end
    -- Beginnning of gram_input set
    predictions[position] = gram_input[1]
    position = position - 1

    return predictions:narrow(1,position+1,predictions:size(1)-position)
end

In [26]:
N = 2

myFile = hdf5.open('../data_preprocessed/'..tostring(N)..'-grams.hdf5','r')
data = myFile:all()
F_train = data['F_train']
input_data_test = data['input_data_test']
input_data_valid = data['input_data_valid']
input_data_train = data['input_data_train']
input_data_valid_nospace = data['input_data_valid_nospace']
myFile:close()

-- Dynamic Programming version for bigram
gram_input = input_data_test

pi, bp = predict_viterbi(nnlm1, gram_input)
pred = build_sequences_from_bp(bp, gram_input)

/Users/virgileaudi/torch/install/share/lua/5.1/nn/View.lua:49: input view (1x20) and desired view (-1x100) do not match
stack traceback:
	[C]: in function 'error'
	/Users/virgileaudi/torch/install/share/lua/5.1/nn/View.lua:49: in function 'batchsize'
	/Users/virgileaudi/torch/install/share/lua/5.1/nn/View.lua:80: in function 'updateOutput'
	...irgileaudi/torch/install/share/lua/5.1/nn/Sequential.lua:44: in function 'updateOutput'
	...irgileaudi/torch/install/share/lua/5.1/nn/Sequential.lua:44: in function 'forward'
	[string "function predict_viterbi(nnlm, gram_input)..."]:20: in function 'predict_viterbi'
	[string "N = 2..."]:15: in main chunk
	[C]: in function 'xpcall'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:179: in function <.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:143>
	.../virgileaudi/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...rgileaudi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	.../virgileaudi/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x01029c3b50: 