In [1]:
require 'hdf5';
require 'nn';

In [50]:
function build_model(dwin, nchar, nclass, hid1, hid2)
    -- Model with skip layer from Bengio, standards parameters
    -- should be:
    -- dwin = 5
    -- hid1 = 30
    -- hid2 = 100

    -- To store the whole model
    local dnnlm = nn.Sequential()

    -- Layer to embedd (and put the words along the window into one vector)
    local LT = nn.Sequential()
    local LT_ = nn.LookupTable(nchar,hid1)
    LT:add(LT_)
    LT:add(nn.View(-1, hid1*dwin))

    dnnlm:add(LT)

    local concat = nn.ConcatTable()

    local lin_tanh = nn.Sequential()
    lin_tanh:add(nn.Linear(hid1*dwin,hid2))
    lin_tanh:add(nn.Tanh())

    local id = nn.Identity()

    concat:add(lin_tanh)
    concat:add(id)

    dnnlm:add(concat)
    dnnlm:add(nn.JoinTable(2))
    dnnlm:add(nn.Linear(hid1*dwin + hid2, nclass))
    dnnlm:add(nn.LogSoftMax())

    -- Loss
    local criterion = nn.ClassNLLCriterion()

    return dnnlm, criterion
end

function compute_perplexity(gram_input, nnlm, crit, N)
    local perp = 0
    local context = torch.zeros(N-1)
    local probability = torch.zeros(2)
    -- Do not predict for the last char
    --for i=1,gram_input:size(1)-N do
    local size=gram_input:size(1) - (N-1)
    for i=1,size do
        context:copy(gram_input:narrow(1,i,N-1))
        -- Line where the model appears
        probability:copy(nnlm:forward(context))
        if gram_input[i+(N-1)] == 1 then
            L = crit:forward(probability,1)
        else
            L = crit:forward(probability,2)
        end
        perp = perp + L
    end
    perp = math.exp(perp/size)
    return perp
end


function train_model(train_input, train_output, dnnlm, criterion, dwin, nclass, eta, nEpochs, batchSize)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01

    -- To store the loss
    local av_L = 0

    -- Memory allocation
    local inputs_batch = torch.DoubleTensor(batchSize,dwin)
    local targets_batch = torch.DoubleTensor(batchSize)
    local outputs = torch.DoubleTensor(batchSize, nclass)
    local df_do = torch.DoubleTensor(batchSize, nclass)
    
    local train_perplexity = torch.DoubleTensor(nEpochs)
    
    for i = 1, nEpochs do
        -- timing the epoch
        local timer = torch.Timer()

        av_L = 0
        
        -- max renorm of the lookup table
        dnnlm:get(1):get(1).weight:renorm(2,1,1)
        
        -- mini batch loop
        for t = 1, train_input:size(1), batchSize do
            -- Mini batch data
            local current_batch_size = math.min(batchSize,train_input:size(1)-t)
            inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
            targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
            
            -- reset gradients
            dnnlm:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
            outputs:narrow(1,1,current_batch_size):copy(dnnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

            -- Average loss computation
            local f = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
            av_L = av_L +f

            -- Backward pass
            df_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size)))
            dnnlm:backward(inputs_batch:narrow(1,1,current_batch_size), df_do:narrow(1,1,current_batch_size))
            dnnlm:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        print('Average Loss: '..av_L/math.floor(train_input:size(1)/batchSize))
        
        train_perplexity[i] = math.exp(av_L/math.floor(train_input:size(1)/batchSize))
    end

    return train_perplexity

end

In [3]:
myFile = hdf5.open('../data_preprocessed/4-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 4

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()

input_data_test = data['input_data_test']:clone()

In [8]:
torch.manualSeed(1)
nnlm1, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_3, perp_valid_3 = train_model(train_input, train_output, nnlm1, crit, N-1, 2, 0.01, 15, 20, input_data_valid, N)

Epoch 1: 20.381507873535	


Average Loss: 0.2999279016966	


Epoch 2: 20.16481089592	
Average Loss: 0.26833430403236	


Epoch 3: 20.844784021378	
Average Loss: 0.24193435098863	


Epoch 4: 17.762979984283	
Average Loss: 0.22844307013896	


Epoch 5: 17.731731176376	
Average Loss: 0.21732259188103	


Epoch 6: 17.609931945801	
Average Loss: 0.20984310688726	


Epoch 7: 17.612291097641	
Average Loss: 0.20524385023812	


Epoch 8: 17.596704959869	
Average Loss: 0.2020497821542	


Epoch 9: 17.577955961227	
Average Loss: 0.19964882002223	


Epoch 10: 17.762266874313	
Average Loss: 0.19777197601831	


Epoch 11: 18.534914970398	
Average Loss: 0.19617452139248	


Epoch 12: 18.180015087128	
Average Loss: 0.19464074499331	


Epoch 13: 17.259115934372	
Average Loss: 0.19310638144052	


Epoch 14: 17.277981996536	
Average Loss: 0.19163403265493	


Epoch 15: 17.273786783218	
Average Loss: 0.19025300134216	


In [44]:
myFile = hdf5.open('../data_preprocessed/5-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 5

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()

input_data_test = data['input_data_test']:clone()

In [12]:
torch.manualSeed(1)
nnlm2, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_4, perp_valid_4 = train_model(train_input, train_output, nnlm2, crit, N-1, 2, 0.01, 15, 20, input_data_valid, N)

Epoch 1: 17.849504947662	
Average Loss: 0.29443654451786	


Epoch 2: 17.788382053375	
Average Loss: 0.27378908120055	


Epoch 3: 20.099086999893	
Average Loss: 0.24403272683775	


Epoch 4: 17.813330888748	
Average Loss: 0.22213200721034	


Epoch 5: 18.144514083862	
Average Loss: 0.20911243675907	


Epoch 6: 17.942358016968	
Average Loss: 0.19831868559116	


Epoch 7: 17.967022895813	
Average Loss: 0.18999367837257	


Epoch 8: 17.963499069214	
Average Loss: 0.18388471579704	


Epoch 9: 19.656719923019	
Average Loss: 0.17968586361457	


Epoch 10: 18.448729038239	
Average Loss: 0.17670111663555	


Epoch 11: 18.227150917053	
Average Loss: 0.17436727230211	


Epoch 12: 17.678957939148	
Average Loss: 0.17243752242786	


Epoch 13: 17.738479852676	
Average Loss: 0.17078575326792	


Epoch 14: 17.798008918762	
Average Loss: 0.16932815903052	


Epoch 15: 21.19105887413	
Average Loss: 0.16800128690437	


In [13]:
perp_valid_4

 1.5521
 1.5767
 1.6266
 1.6342
 1.6224
 1.6164
 1.6104
 1.6083
 1.6069
 1.6068
 1.6084
 1.6115
 1.6155
 1.6196
 1.6231
[torch.DoubleTensor of size 15]



In [45]:
myFile = hdf5.open('../data_preprocessed/6-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 6

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()

input_data_test = data['input_data_test']:clone()

In [49]:
torch.manualSeed(1)
nnlm3, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_5 = train_model(train_input, train_output, nnlm3, crit, N-1, 2, 0.01, 15, 20, input_data_valid, N)

Epoch 1: 18.536489963531	
Average Loss: 0.295621389953	


Epoch 2: 17.972072124481	
Average Loss: 0.27468023492365	


Epoch 3: 18.546638011932	
Average Loss: 0.25618872109793	


Epoch 4: 19.354636192322	
Average Loss: 0.22655003590524	


Epoch 5: 17.919018983841	
Average Loss: 0.20997346406349	


Epoch 6: 19.95677280426	
Average Loss: 0.19872725299921	


Epoch 7: 18.224895000458	
Average Loss: 0.18992938442935	


Epoch 8: 18.835289955139	
Average Loss: 0.18301274359037	


Epoch 9: 17.934269189835	
Average Loss: 0.17770018348404	


Epoch 10: 17.851098060608	
Average Loss: 0.17367809783134	


Epoch 11: 18.178263187408	
Average Loss: 0.1704792645748	


Epoch 12: 18.148210048676	
Average Loss: 0.16774662850185	


Epoch 13: 18.001205205917	
Average Loss: 0.16530856897846	


Epoch 14: 17.900924921036	
Average Loss: 0.16307442759655	


Epoch 15: 17.944164037704	
Average Loss: 0.16100459655088	
