In [4]:
require 'hdf5';
require 'nn';

In [6]:
function build_model(dwin, nchar, nclass, hid1, hid2)
    -- Model with skip layer from Bengio, standards parameters
    -- should be:
    -- dwin = 5
    -- hid1 = 30
    -- hid2 = 100

    -- To store the whole model
    local dnnlm = nn.Sequential()

    -- Layer to embedd (and put the words along the window into one vector)
    local LT = nn.Sequential()
    local LT_ = nn.LookupTable(nchar,hid1)
    LT:add(LT_)
    LT:add(nn.View(-1, hid1*dwin))

    dnnlm:add(LT)

    local concat = nn.ConcatTable()

    local lin_tanh = nn.Sequential()
    lin_tanh:add(nn.Linear(hid1*dwin,hid2))
    lin_tanh:add(nn.Tanh())

    local id = nn.Identity()

    concat:add(lin_tanh)
    concat:add(id)

    dnnlm:add(concat)
    dnnlm:add(nn.JoinTable(2))
    dnnlm:add(nn.Linear(hid1*dwin + hid2, nclass))
    dnnlm:add(nn.LogSoftMax())

    -- Loss
    local criterion = nn.ClassNLLCriterion()

    return dnnlm, criterion
end


function train_model(train_input, train_output, dnnlm, criterion, dwin, nclass, eta, nEpochs, batchSize, val_space, val)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01

    -- To store the loss
    local av_L = 0

    -- Memory allocation
    local inputs_batch = torch.DoubleTensor(batchSize,dwin)
    local targets_batch = torch.DoubleTensor(batchSize)
    local outputs = torch.DoubleTensor(batchSize, nclass)
    local df_do = torch.DoubleTensor(batchSize, nclass)
    
    local true_kag = predict_kaggle(val_space)
    
    local tr = torch.Tensor(nEpochs)
    local vrmse = torch.Tensor(nEpochs)
    
    
    for i = 1, nEpochs do
        -- timing the epoch
        local timer = torch.Timer()

        av_L = 0
        
        -- max renorm of the lookup table
        dnnlm:get(1):get(1).weight:renorm(2,1,1)
        
        -- mini batch loop
        for t = 1, train_input:size(1), batchSize do
            -- Mini batch data
            local current_batch_size = math.min(batchSize,train_input:size(1)-t)
            inputs_batch:narrow(1,1,current_batch_size):copy(train_input:narrow(1,t,current_batch_size))
            targets_batch:narrow(1,1,current_batch_size):copy(train_output:narrow(1,t,current_batch_size))
            
            -- reset gradients
            dnnlm:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass (selection of inputs_batch in case the batch is not full, ie last batch)
            outputs:narrow(1,1,current_batch_size):copy(dnnlm:forward(inputs_batch:narrow(1,1,current_batch_size)))

            -- Average loss computation
            local f = criterion:forward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size))
            av_L = av_L +f

            -- Backward pass
            df_do:narrow(1,1,current_batch_size):copy(criterion:backward(outputs:narrow(1,1,current_batch_size), targets_batch:narrow(1,1,current_batch_size)))
            dnnlm:backward(inputs_batch:narrow(1,1,current_batch_size), df_do:narrow(1,1,current_batch_size))
            dnnlm:updateParameters(eta)
            
        end
        Av = av_L/math.floor(train_input:size(1)/batchSize)
        tr[i] = Av
        print('Average Loss: '.. Av)
        
        tt = predict_space(dnnlm, val, dwin)
        tt_k = predict_kaggle(tt)
        
        rmse = torch.sqrt(torch.mean(torch.pow(tt_k:narrow(2,2,1)-true_kag:narrow(2,2,1),2)))
        vrmse[i] = rmse
        print('RMSE on valid: '.. rmse)
        
        print('Epoch '..i..': '..timer:time().real)
        
    end
    
    return tr, vrmse
end

In [7]:
function predict_space(nnlm, dat, Nwin)

    local dat_nospace = dat
    local s = dat:size(1)
    local it = 1
    local i = 1
    local nextpred = torch.Tensor(2)

    while it<s-(Nwin-1) do
        it = it + 1
        nextpred:copy(nnlm:forward(dat_nospace:narrow(1,i,Nwin)));
        m, argm = nextpred:max(1)

        if argm[1] == 2 then
            i = i + 1
        elseif argm[1] == 1 then 
            local dat_nospace_ = torch.LongTensor(dat_nospace:size(1)+1)
            dat_nospace_:narrow(1,1,i+(Nwin-1)):copy(dat_nospace:narrow(1,1,i+(Nwin-1)))
            dat_nospace_[i+Nwin] = 1
            dat_nospace_:narrow(1,i+(Nwin-1)+2,dat_nospace:size(1)-i-(Nwin-1)):copy(dat_nospace:narrow(1,i+(Nwin-1)+1,dat_nospace:size(1)-i-(Nwin-1)))
            dat_nospace = dat_nospace_
            i = i + 2
        end 
    end
    return dat_nospace
end

In [8]:
function predict_kaggle(dat_space)
    local num_sent = 0

    for i = 5,dat_space:size(1) do
        if dat_space[i] == 2 then
            num_sent = num_sent + 1
        end
    end

    local num_spaces = torch.DoubleTensor(num_sent,2)
    local row = 1
    local count_space = 0

    for i=5,dat_space:size(1) do
        if dat_space[i] == 2 then
            num_spaces[{row, 1}] = row
            num_spaces[{row, 2}] = count_space
            count_space = 0
            row = row + 1
        elseif dat_space[i] == 1 then
            count_space = count_space + 1
        end
    end

    return num_spaces
end

In [9]:
myFile = hdf5.open('../data_preprocessed/4-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 4

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()
input_data_valid_space = data['input_data_valid']
input_data_test = data['input_data_test']:clone()

In [10]:
torch.manualSeed(1)
nnlm1, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_3, rmse_valid_3 = train_model(train_input, train_output, nnlm1, crit, N-1, 2, 0.01, 15, 20,input_data_valid_space, input_data_valid)

Average Loss: 0.2999279016966	


RMSE on valid: 8.0990007022626	
Epoch 1: 37.549407958984	


Average Loss: 0.26833430403236	


RMSE on valid: 7.2910685526007	
Epoch 2: 37.056308031082	


Average Loss: 0.24193435098863	


RMSE on valid: 6.1446307477299	
Epoch 3: 40.801877975464	


Average Loss: 0.22844307013896	


RMSE on valid: 6.0093905224322	
Epoch 4: 38.326804876328	


Average Loss: 0.21732259188103	


RMSE on valid: 5.0090736032997	
Epoch 5: 40.018903017044	




Average Loss: 0.20984310688726	


RMSE on valid: 4.8695559192086	
Epoch 6: 41.548266887665	


Average Loss: 0.20524385023812	


RMSE on valid: 4.809857642272	
Epoch 7: 37.679234981537	


Average Loss: 0.2020497821542	


RMSE on valid: 4.9241755645001	
Epoch 8: 37.560721874237	


Average Loss: 0.19964882002223	


RMSE on valid: 4.8606325208757	
Epoch 9: 37.713301897049	


Average Loss: 0.19777197601831	


RMSE on valid: 4.5511069917438	
Epoch 10: 37.841428041458	


Average Loss: 0.19617452139248	


RMSE on valid: 4.4446024589342	
Epoch 11: 37.712862968445	


Average Loss: 0.19464074499331	


RMSE on valid: 4.3843534175771	
Epoch 12: 38.203027009964	


Average Loss: 0.19310638144052	


RMSE on valid: 4.408529203773	
Epoch 13: 38.824916124344	


Average Loss: 0.19163403265493	


RMSE on valid: 4.3576394935891	
Epoch 14: 37.849529027939	


Average Loss: 0.19025300134216	


In [11]:
myFile = hdf5.open('../data_preprocessed/5-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 5

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()
input_data_valid_space = data['input_data_valid']
input_data_test = data['input_data_test']:clone()

In [12]:
torch.manualSeed(1)
nnlm2, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_4, rmse_valid_4 = train_model(train_input, train_output, nnlm2, crit, N-1, 2, 0.01, 15, 20, input_data_valid_space, input_data_valid)

Average Loss: 0.29443654451786	


RMSE on valid: 7.5064842229383	
Epoch 1: 36.957608938217	


Average Loss: 0.27378908120055	


RMSE on valid: 7.0633008969517	
Epoch 2: 38.712803840637	


Average Loss: 0.24403272683775	


RMSE on valid: 5.9641309747048	
Epoch 3: 37.881150007248	


Average Loss: 0.22213200721034	


RMSE on valid: 5.4555909903412	
Epoch 4: 38.152885913849	


Average Loss: 0.20911243675907	


RMSE on valid: 5.8734151894808	
Epoch 5: 38.157357931137	


Average Loss: 0.19831868559116	


RMSE on valid: 5.2946136905101	
Epoch 6: 38.767784833908	


Average Loss: 0.18999367837257	


RMSE on valid: 4.8250841573747	
Epoch 7: 38.72487282753	


Average Loss: 0.18388471579704	


RMSE on valid: 4.7520349669742	
Epoch 8: 38.782788038254	


Average Loss: 0.17968586361457	


RMSE on valid: 4.5683979118546	
Epoch 9: 37.913828134537	


Average Loss: 0.17670111663555	


RMSE on valid: 4.5283537874102	
Epoch 10: 37.803889036179	


Average Loss: 0.17436727230211	


RMSE on valid: 4.4941745393809	
Epoch 11: 37.829329967499	


Average Loss: 0.17243752242786	


RMSE on valid: 4.4028660598431	
Epoch 12: 37.775954008102	


Average Loss: 0.17078575326792	




RMSE on valid: 4.3477803087063	
Epoch 13: 41.250059127808	


Average Loss: 0.16932815903052	


RMSE on valid: 4.3262647707078	
Epoch 14: 38.601723909378	


Average Loss: 0.16800128690437	


RMSE on valid: 4.3135584413736	
Epoch 15: 39.222057104111	


In [13]:
myFile = hdf5.open('../data_preprocessed/6-grams.hdf5','r')
data = myFile:all()
myFile:close()

N = 6

train_input = data['input_matrix_train']
train_output = data['output_matrix_train']
input_data_train = data['input_data_train']

input_data_valid = data['input_data_valid_nospace']:clone()
input_data_valid_space = data['input_data_valid']
input_data_test = data['input_data_test']:clone()

In [14]:
torch.manualSeed(1)
nnlm3, crit = build_model(N-1, 49, 2, 16, 80)
perp_train_5, rmse_valid_5 = train_model(train_input, train_output, nnlm3, crit, N-1, 2, 0.01, 15, 20, input_data_valid_space, input_data_valid)

Average Loss: 0.295621389953	


RMSE on valid: 7.0153808779193	
Epoch 1: 38.690722942352	


Average Loss: 0.27468023492365	


RMSE on valid: 6.7282583985638	
Epoch 2: 38.674744129181	


Average Loss: 0.25618872109793	


RMSE on valid: 5.7129435977347	
Epoch 3: 37.893794059753	


Average Loss: 0.22655003590524	


RMSE on valid: 5.1633979744655	
Epoch 4: 41.063241958618	


Average Loss: 0.20997346406349	


RMSE on valid: 4.9524686882185	
Epoch 5: 41.743567943573	


Average Loss: 0.19872725299921	


RMSE on valid: 4.7067709328977	
Epoch 6: 39.319875001907	


Average Loss: 0.18992938442935	


RMSE on valid: 4.5704727935834	
Epoch 7: 39.163685798645	


Average Loss: 0.18301274359037	


RMSE on valid: 4.4053587400736	
Epoch 8: 39.138303995132	


Average Loss: 0.17770018348404	


RMSE on valid: 4.3321432420755	
Epoch 9: 42.224860906601	


Average Loss: 0.17367809783134	


RMSE on valid: 4.2078009485187	
Epoch 10: 38.793121099472	


Average Loss: 0.1704792645748	


RMSE on valid: 4.1479620083066	
Epoch 11: 38.455412149429	


Average Loss: 0.16774662850185	


RMSE on valid: 4.2105276289524	
Epoch 12: 38.431823968887	


Average Loss: 0.16530856897846	


RMSE on valid: 4.2452274483029	
Epoch 13: 38.151856184006	


Average Loss: 0.16307442759655	


RMSE on valid: 4.2755630181014	
Epoch 14: 42.683506965637	


Average Loss: 0.16100459655088	


RMSE on valid: 4.2954741070563	
Epoch 15: 38.342741966248	


In [15]:
myFile = hdf5.open('toplot.f5', 'w')
myFile:write('perp3', perp_train_3)
myFile:write('rmse3', rmse_valid_3)
myFile:write('perp4', perp_train_4)
myFile:write('rmse4', rmse_valid_4)
myFile:write('perp5', perp_train_5)
myFile:write('rmse5', rmse_valid_5)
myFile:close()

In [16]:
torch.manualSeed(1)
nnlm4, crit = build_model(N-1, 49, 2, 20, 80)
perp_train_6, rmse_valid_6 = train_model(train_input, train_output, nnlm4, crit, N-1, 2, 0.01, 15, 20, input_data_valid_space, input_data_valid)

Average Loss: 0.2949427931284	


RMSE on valid: 6.8002583020097	
Epoch 1: 38.400814056396	


Average Loss: 0.27373425207181	


RMSE on valid: 6.4982724558285	
Epoch 2: 43.661610126495	


Average Loss: 0.25465911687678	


RMSE on valid: 5.3498372389292	
Epoch 3: 43.709435224533	


Average Loss: 0.22699203202076	


RMSE on valid: 5.2213480108014	
Epoch 4: 44.140863895416	


Average Loss: 0.2091878452534	


RMSE on valid: 5.1808606780408	
Epoch 5: 50.186378955841	


Average Loss: 0.19638709598742	


RMSE on valid: 4.912507967519	
Epoch 6: 48.567532062531	


Average Loss: 0.18749549196501	


RMSE on valid: 4.7041197385877	
Epoch 7: 43.146628141403	


Average Loss: 0.18116075302491	


RMSE on valid: 4.6562472789313	
Epoch 8: 39.406399965286	


Average Loss: 0.17661182813815	


RMSE on valid: 4.5200816059614	
Epoch 9: 38.942988872528	


Average Loss: 0.17311597855571	


RMSE on valid: 4.352139446809	
Epoch 10: 39.03311085701	


Average Loss: 0.17022321592714	


RMSE on valid: 4.2453449909067	
Epoch 11: 39.844140052795	


Average Loss: 0.16776687335378	


RMSE on valid: 4.1000462487046	
Epoch 12: 39.717175006866	


Average Loss: 0.16568973068901	


RMSE on valid: 4.0737953290799	


Epoch 13: 42.313410043716	


Average Loss: 0.16392057574123	


RMSE on valid: 3.9816194460513	
Epoch 14: 43.537487983704	


Average Loss: 0.16237699344759	


RMSE on valid: 3.9523105246215	
Epoch 15: 39.288264036179	


In [17]:
myFile = hdf5.open('toplot2.f5', 'w')
myFile:write('perp6', perp_train_6)
myFile:write('rmse6', rmse_valid_6)
myFile:close()