In [1]:
require 'hdf5'
require 'rnn'

In [2]:
N = 2
myFile = hdf5.open('../data_preprocessed/'..tostring(N)..'-grams.hdf5','r')
data = myFile:all()
F_train = data['F_train']
input_data_valid = data['input_data_valid']
output_matrix_train = data['output_matrix_train']
input_matrix_train = data['input_matrix_train']
input_data_train = data['input_data_train']
input_data_valid_nospace = data['input_data_valid_nospace']
input_data_test = data['input_data_test']
myFile:close()

In [3]:
print(input_matrix_train:size())
print(output_matrix_train:size())
print(input_data_train:size())

 599903
      1
[torch.LongStorage of size 2]

 599903
[torch.LongStorage of size 1]

 599905
[torch.LongStorage of size 1]



In [185]:
-- Formating the input
-- input is a 1d tensor
function get_train_input(input, len, batch_size)
    -- Building output (we put predict a padding at the end)
    local n = input:size(1)
    
    -- Get the closer multiple of batch_size*len below n
    local factor = -math.floor(-n/(len*batch_size))
    local n_new = factor*len*batch_size
    local input_new = torch.DoubleTensor(n_new)
    local t_input, t_output
    input_new:narrow(1,1,n):copy(input)
    input_new:narrow(1,n,n_new-n+1):fill(2) -- Filling with padding
    
    -- Building output
    local output = torch.DoubleTensor(n_new)
    for i=2, n_new do
        if input_new[i] ~= 1 then
            output[i-1] = 2
        else
            output[i-1] = input_new[i]
        end
    end
    output[n_new] = 2

    -- Issue with last sequence if batch_size does not divide n
    t_input = torch.split(input_new:view(batch_size,n_new/batch_size),len, 2)
    t_output = torch.split(output:view(batch_size,n_new/batch_size),len, 2)
    return t_input, t_output
end 

In [281]:
function build_RNN(embed_dim, rho)
    return nn.Recurrent(embed_dim, nn.Linear(embed_dim, embed_dim),nn.Linear(embed_dim, embed_dim), nn.Tanh(), rho)
end

function build_LSTM(embed_dim, rho)
    return nn.FastLSTM(embed_dim, embed_dim, rho)
end

function build_GRU(embed_dim, rho, dropout_p)
    return nn.GRU(embed_dim, embed_dim, rho,dropout_p)
end

function build_rnn(embed_dim, vocab_size, batch_size, recurrent_model, len)
    local batchRNN
    local params
    local grad_params
    -- generic RNN transduced
    batchRNN = nn.Sequential()
        :add(nn.LookupTable(vocab_size, embed_dim))
        :add(nn.SplitTable(1, batch_size))
    local rec = nn.Sequencer(recurrent_model)
    rec:remember('both')
    
    batchRNN:add(rec)
    
    -- Output
    batchRNN:add(nn.Sequencer(nn.Linear(embed_dim, 2)))
    batchRNN:add(nn.Sequencer(nn.LogSoftMax()))

    -- Retrieve parameters (To do only once!!!)
    params, grad_params = batchRNN:getParameters()
    -- Initializing all the parameters between -0.05 and 0.05
    for k=1,params:size(1) do
        params[k] = torch.uniform(-0.05,0.05)
    end
    
    return batchRNN, params, grad_params
end

In [158]:
-- Building output valid matrix
output_valid = torch.DoubleTensor(input_data_valid:size(1))
for i=2, input_data_valid:size(1) do
    if input_data_valid[i] ~= 1 then
        output_valid[i-1] = 2
    else
        output_valid[i-1] = input_data_valid[i]
    end
end

# Training

In [8]:
function train_model(t_input, t_output, model, params, grad_params,
                     criterion, eta, nEpochs, batch_size, len, n)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01
    local timer
    local pred
    local loss
    local dLdPred
    local t_inputT = torch.DoubleTensor(len,batch_size)
    local t_output_table
    local delta = 0.2

    -- To store the loss
    local av_L = 0

    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        old_L = av_L
        av_L = 0
        
        -- mini batch loop
        for k = 1, n/(batch_size * len) do
            -- Mini batch data
                
            t_inputT:copy(t_input[k]:t())
            t_output_table = torch.split(t_output[k],1,2)
            --format the output
            for j=1,len do
                t_output_table[j] = t_output_table[j]:squeeze()
            end 
            
            -- reset gradients
            grad_params:zero()
            
            -- Forward loop
            pred = model:forward(t_inputT)
            loss = criterion:forward(pred, t_output_table)
            av_L = av_L + loss

            -- Backward loop
            dLdPred = criterion:backward(pred, t_output_table)
            model:backward(t_inputT, dLdPred)
            
            -- gradient normalization with max norm 5 (l2 norm)
            grad_params:view(grad_params:size(1),1):renorm(1,2,5)
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        print('Average Loss: '..av_L/math.floor(n/batch_size))
        
    end
    
    if (old_L - av_L) < delta then
        eta = eta/2
        delta = delta/2
    end
    
    if (eta < 0.001) then eta = 0.1 end
end

In [198]:
function train_model_with_perp(t_input, t_output, model, model_flattened, params_flattened,
        params, grad_params, criterion, eta, nEpochs, batch_size, len, n, input_valid, output_valid, step)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01
    local timer
    local pred
    local loss
    local dLdPred
    local t_inputT = torch.DoubleTensor(len,batch_size)
    local t_output_table
    local size

    -- To store the loss
    local av_L = 0
    local perp = 0
    local old_perp = 0

    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        old_L = av_L
        old_perp = perp
        av_L = 0
        
        -- mini batch loop
        for k = 1, n/(batch_size * len) do
            -- Mini batch data
                
            t_inputT:copy(t_input[k]:t())
            t_output_table = torch.split(t_output[k],1,2)
            --format the output
            for j=1,len do
                t_output_table[j] = t_output_table[j]:squeeze()
            end 
            
            -- reset gradients
            grad_params:zero()
            
            -- Forward loop
            pred = model:forward(t_inputT)
            loss = criterion:forward(pred, t_output_table)
            av_L = av_L + loss

            -- Backward loop
            dLdPred = criterion:backward(pred, t_output_table)
            model:backward(t_inputT, dLdPred)
            
            -- gradient normalization with max norm 5 (l2 norm)
            grad_params:view(grad_params:size(1),1):renorm(1,2,5)
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        print('Average Loss: '..av_L/math.floor(n/batch_size))
        -- Print perplexity validity every step of iteration
        if (i%step == 0) then
            size = input_valid:size(1) - 1
            params_flattened:copy(params)
            perp = compute_perplexity(input_valid:narrow(1,1,size):view(size,1), output_valid, model_flattened)
            print('Valid perplexity: '..perp)
            
            if old_perp - perp < 0 then
                eta = eta/2
            end

            if (eta < 0.0001) then eta = 0.1 end

        end
    end
end

In [142]:
-- Building model
batchRNN, params, grad_params = build_rnn(embed_dim, vocab_size, len)
batchRNN_valid, params_valid, grad_params_valid = build_rnn(embed_dim, vocab_size, 1, len)

crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

In [19]:
nEpochs = 3
train_model(t_input, t_output, batchRNN, params, grad_params,
                     crit, eta, nEpochs, batch_size, len, n_new)

Epoch 1: 6.8074369430542	
Average Loss: 0.31228050204274	


Epoch 2: 6.7043259143829	
Average Loss: 0.26151257461365	


Epoch 3: 6.2467911243439	
Average Loss: 0.23146731025455	


In [94]:
nEpochs = 10
train_model_with_perp(t_input, t_output, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n, input_data_valid, output_valid)

Epoch 1: 6.0373430252075	
Average Loss: 0.31127603024063	


Valid perplexity: 1.3167066034457	


Epoch 2: 6.0228929519653	
Average Loss: 0.2595806616951	


Valid perplexity: 1.272589419065	


Epoch 3: 6.0724031925201	
Average Loss: 0.23316074090171	


Valid perplexity: 1.2505899734403	


Epoch 4: 6.7879350185394	
Average Loss: 0.21455134532284	


Valid perplexity: 1.2305572804994	


Epoch 5: 6.1380410194397	
Average Loss: 0.20159015092112	


Valid perplexity: 1.2191191235031	


Epoch 6: 6.0522859096527	
Average Loss: 0.19299361409045	


Valid perplexity: 1.2106942406731	


Epoch 7: 6.0655410289764	
Average Loss: 0.18630843005958	


Valid perplexity: 1.2040019027776	


Epoch 8: 6.0618779659271	
Average Loss: 0.18083623497307	


Valid perplexity: 1.1988415676649	


Epoch 9: 6.0254361629486	
Average Loss: 0.17666558202226	


Valid perplexity: 1.1948545425466	


Epoch 10: 6.0559229850769	
Average Loss: 0.17341161985071	


Valid perplexity: 1.191752075552	


In [96]:
nEpochs = 10
train_model_with_perp(t_input, t_output, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n, input_data_valid, output_valid)

Epoch 1: 6.1608598232269	
Average Loss: 0.17066010915357	


Valid perplexity: 1.1893514698956	


Epoch 2: 6.0460610389709	
Average Loss: 0.16842283821082	


Valid perplexity: 1.1876828130608	


Epoch 3: 6.1154489517212	
Average Loss: 0.16659270468897	


Valid perplexity: 1.1864067460075	


Epoch 4: 6.0670311450958	
Average Loss: 0.16518998250815	


Valid perplexity: 1.1853287486702	


Epoch 5: 6.1075801849365	
Average Loss: 0.16408510411659	


Valid perplexity: 1.1845620295284	


Epoch 6: 6.3404068946838	
Average Loss: 0.16316840120459	


Valid perplexity: 1.1840970283144	


Epoch 7: 6.0670158863068	
Average Loss: 0.16239600637254	


Valid perplexity: 1.1837860540369	


Epoch 8: 6.0414438247681	


Average Loss: 0.16169741966653	


Valid perplexity: 1.1833747002796	


Epoch 9: 6.0382349491119	
Average Loss: 0.16102260010531	


Valid perplexity: 1.1828553687182	


Epoch 10: 6.026673078537	
Average Loss: 0.16036868317739	


Valid perplexity: 1.1826217175028	


In [97]:
nEpochs = 10
train_model_with_perp(t_input, t_output, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n, input_data_valid, output_valid)

Epoch 1: 6.1355979442596	
Average Loss: 0.15972079728122	


Valid perplexity: 1.1824428813963	


Epoch 2: 6.4839720726013	
Average Loss: 0.15909980418245	


Valid perplexity: 1.182109999531	


Epoch 3: 6.2839608192444	
Average Loss: 0.15852116560753	


Valid perplexity: 1.181668471896	


Epoch 4: 6.2338001728058	
Average Loss: 0.15796460040097	


Valid perplexity: 1.1811241082699	


Epoch 5: 6.8121101856232	
Average Loss: 0.15744148391804	


Valid perplexity: 1.1806673353204	


Epoch 6: 6.2398941516876	
Average Loss: 0.15692715807122	


Valid perplexity: 1.1801969411692	


Epoch 7: 6.1931447982788	
Average Loss: 0.15642420072222	


Valid perplexity: 1.1797258097392	


Epoch 8: 6.2221410274506	
Average Loss: 0.15592075426583	


Valid perplexity: 1.1796751236756	


Epoch 9: 6.2164981365204	
Average Loss: 0.15546064818585	


Valid perplexity: 1.179654980452	


Epoch 10: 6.8618688583374	
Average Loss: 0.15505075502542	


Valid perplexity: 1.1795317990726	


# EXP RNN

In [392]:
len = 50
batch_size = 4
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 40

t_input_new, t_output_new = get_train_input(input_data_train, len, batch_size)
n_new = len * batch_size *(#t_input_new)
print('Input size is '..n_new)

Input size is 600000	


In [270]:
-- Building model
batchRNN, params, grad_params = build_rnn(embed_dim, vocab_size, batch_size, build_RNN(embed_dim, len), len)
batchRNN_valid, params_valid, grad_params_valid = build_rnn(embed_dim, vocab_size, 1,build_RNN(embed_dim))

crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

Input size is 600000	


In [271]:
-- Adaptive learning rate
nEpochs = 20
step = 1
train_model_with_perp(t_input_new, t_output_new, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 5)

Epoch 1: 25.876962900162	
Average Loss: 0.26690913494538	


Epoch 2: 25.171590089798	
Average Loss: 0.20740503567645	


Epoch 3: 24.630791187286	
Average Loss: 0.18643517302651	


Epoch 4: 25.385226964951	
Average Loss: 0.17807200826734	


Epoch 5: 26.317702054977	
Average Loss: 0.17290693506462	


Valid perplexity: 1.1970421011916	


Epoch 6: 24.52170085907	
Average Loss: 0.1673060464136	


Epoch 7: 24.278975009918	
Average Loss: 0.16566674484942	


Epoch 8: 28.287208080292	
Average Loss: 0.16435510963808	


Epoch 9: 32.050582170486	
Average Loss: 0.16316460718602	




Epoch 10: 25.480005025864	
Average Loss: 0.16213567361777	


Valid perplexity: 1.1868415078835	


Epoch 11: 24.611994981766	
Average Loss: 0.16118626629998	


Epoch 12: 26.228096961975	
Average Loss: 0.16037952564425	


Epoch 13: 31.00887298584	
Average Loss: 0.1597093858762	


Epoch 14: 25.062748908997	
Average Loss: 0.15915718304179	


Epoch 15: 25.973461866379	
Average Loss: 0.15864680840978	


Valid perplexity: 1.1839163119006	


Epoch 16: 26.059950113297	
Average Loss: 0.15818557140883	


Epoch 17: 25.154232025146	
Average Loss: 0.15776211477455	


Epoch 18: 25.887181043625	
Average Loss: 0.15734244795882	


Epoch 19: 27.179241895676	
Average Loss: 0.15696521569348	


Epoch 20: 27.432199001312	
Average Loss: 0.15663540902626	


Valid perplexity: 1.1824420394642	


In [272]:
-- Adaptive learning rate
nEpochs = 10
step = 1
train_model_with_perp(t_input_new, t_output_new, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 27.619936943054	
Average Loss: 0.15974601422568	


Valid perplexity: 1.1855367178374	


Epoch 2: 24.746417045593	
Average Loss: 0.15638448991217	


Valid perplexity: 1.1824438751704	


Epoch 3: 25.023906946182	
Average Loss: 0.15573398220736	


Valid perplexity: 1.1819785736185	


Epoch 4: 25.480671167374	
Average Loss: 0.15540230367625	


Valid perplexity: 1.1813094890523	


Epoch 5: 24.604247093201	
Average Loss: 0.15510953272084	


Valid perplexity: 1.1811523704405	


Epoch 6: 24.404500007629	
Average Loss: 0.15480920641107	


Valid perplexity: 1.180707110681	


Epoch 7: 25.181984901428	
Average Loss: 0.15454848056418	


Valid perplexity: 1.1808885187157	


Epoch 8: 26.033724784851	
Average Loss: 0.15324498182009	


Valid perplexity: 1.1788202946797	


Epoch 9: 24.985551118851	
Average Loss: 0.15289153281754	


Valid perplexity: 1.1785937593155	


Epoch 10: 26.281076908112	
Average Loss: 0.15275839278728	


Valid perplexity: 1.1785445168725	


In [393]:
-- Adaptive learning rate
nEpochs = 10
step = 1
train_model_with_perp(t_input_new, t_output_new, batchRNN, batchRNN_valid, params_valid,
        params, grad_params, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 25.253405094147	
Average Loss: 0.15771314623241	


Valid perplexity: 1.1839716613607	


Epoch 2: 23.995304822922	
Average Loss: 0.15447399569549	


Valid perplexity: 1.1807325957025	


Epoch 3: 23.647886991501	
Average Loss: 0.15392232752519	


Valid perplexity: 1.1804579521235	


Epoch 4: 24.548258066177	
Average Loss: 0.15368239953165	


Valid perplexity: 1.1801014532801	


Epoch 5: 23.791232824326	
Average Loss: 0.15351510321408	


Valid perplexity: 1.1804442996589	


Epoch 6: 23.86344408989	
Average Loss: 0.15224096155613	


Valid perplexity: 1.1786706140464	


Epoch 7: 24.195983171463	
Average Loss: 0.15184575294419	


Valid perplexity: 1.1785939836208	


Epoch 8: 23.519762992859	
Average Loss: 0.15167006138082	


Valid perplexity: 1.1785269433268	


Epoch 9: 23.866048812866	
Average Loss: 0.15154694132977	


Valid perplexity: 1.1784334320309	


Epoch 10: 26.384864091873	
Average Loss: 0.15144445357836	


Valid perplexity: 1.1783925081262	


# EXP GRU

In [282]:
len = 30
batch_size = 16
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 40

t_input_new, t_output_new = get_train_input(input_data_train, len, batch_size)
n_new = len * batch_size *(#t_input_new)
print('Input size is '..n_new)

-- Building model
batchGRU, params_gru, grad_params_gru = build_rnn(embed_dim, vocab_size, batch_size, build_GRU(embed_dim, len), len)
batchGRU_valid, params_valid_gru, grad_params_valid_gru = build_rnn(embed_dim, vocab_size, 1,build_GRU(embed_dim))

crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

Input size is 600000	


In [283]:
-- Adaptive learning (remember changed position)
nEpochs = 5
step = 1
train_model_with_perp(t_input_new, t_output_new, batchGRU, batchGRU_valid, params_valid_gru,
        params_gru, grad_params_gru, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 5)

Epoch 1: 21.985914945602	
Average Loss: 0.34754894743853	


Epoch 2: 20.822425127029	
Average Loss: 0.24662751444763	


Epoch 3: 20.380235910416	
Average Loss: 0.22644992760149	


Epoch 4: 20.386257171631	
Average Loss: 0.21586756464063	


Epoch 5: 20.260949134827	
Average Loss: 0.20588786611453	


Valid perplexity: 1.2267422844996	


In [284]:
-- Adaptive learning (remember changed position)
nEpochs = 10
step = 2
train_model_with_perp(t_input_new, t_output_new, batchGRU, batchGRU_valid, params_valid_gru,
        params_gru, grad_params_gru, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 20.033187150955	
Average Loss: 0.19548649256202	


Epoch 2: 20.111010074615	
Average Loss: 0.18753432665153	


Valid perplexity: 1.2088579236428	


Epoch 3: 20.048751831055	
Average Loss: 0.18135815098318	


Epoch 4: 19.980526924133	
Average Loss: 0.17843319981618	


Valid perplexity: 1.2006284614829	


Epoch 5: 22.001543998718	
Average Loss: 0.17582000858789	


Epoch 6: 21.397794961929	
Average Loss: 0.17342742262632	


Valid perplexity: 1.1953346338126	


Epoch 7: 20.03279709816	
Average Loss: 0.17120608813403	


Epoch 8: 21.058824062347	
Average Loss: 0.16911203085189	


Valid perplexity: 1.1908863198291	


Epoch 9: 19.768749952316	
Average Loss: 0.1671295096868	


Epoch 10: 20.265580892563	
Average Loss: 0.16527202300677	


Valid perplexity: 1.1871044336422	


In [285]:
-- Adaptive learning (remember changed position)
nEpochs = 10
step = 2
train_model_with_perp(t_input_new, t_output_new, batchGRU, batchGRU_valid, params_valid_gru,
        params_gru, grad_params_gru, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 20.54555106163	
Average Loss: 0.1644748422442	


Epoch 2: 20.115912914276	
Average Loss: 0.1616761848937	


Valid perplexity: 1.1840898392083	


Epoch 3: 19.895653963089	
Average Loss: 0.15819938054031	


Epoch 4: 21.474240064621	
Average Loss: 0.15682596094484	


Valid perplexity: 1.1792869281535	


Epoch 5: 20.169448137283	
Average Loss: 0.15560150461672	


Epoch 6: 20.641898155212	
Average Loss: 0.15444937533606	


Valid perplexity: 1.177232832283	


Epoch 7: 19.355062961578	
Average Loss: 0.1533688281671	


Epoch 8: 20.005716085434	
Average Loss: 0.1523599688517	


Valid perplexity: 1.1754881318031	


Epoch 9: 20.25895690918	
Average Loss: 0.15141961276476	


Epoch 10: 20.001791000366	
Average Loss: 0.15054239950379	


Valid perplexity: 1.1739414781614	


In [362]:
-- Adaptive learning (remember changed position)
nEpochs = 10
step = 2
train_model_with_perp(t_input_new, t_output_new, batchGRU, batchGRU_valid, params_valid_gru,
        params_gru, grad_params_gru, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 21.13591504097	
Average Loss: 0.15102133170376	


Epoch 2: 20.824810028076	
Average Loss: 0.14967832680039	


Valid perplexity: 1.17339501455	


Epoch 3: 20.168252944946	
Average Loss: 0.14712340869834	


Epoch 4: 19.862685918808	
Average Loss: 0.14634317440166	


Valid perplexity: 1.1697229317069	


Epoch 5: 20.198061943054	
Average Loss: 0.14568765712831	


Epoch 6: 21.455843925476	
Average Loss: 0.14506850216721	


Valid perplexity: 1.1684899324422	


Epoch 7: 20.93475985527	
Average Loss: 0.14447676786644	


Epoch 8: 23.790939092636	
Average Loss: 0.14390880808384	


Valid perplexity: 1.1673607266979	


Epoch 9: 20.453790903091	
Average Loss: 0.14336208173163	


Epoch 10: 20.399837970734	
Average Loss: 0.14283445960168	


Valid perplexity: 1.1663204973306	


In [382]:
-- Adaptive learning (remember changed position)
nEpochs = 10
step = 2
train_model_with_perp(t_input_new, t_output_new, batchGRU, batchGRU_valid, params_valid_gru,
        params_gru, grad_params_gru, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, step)

Epoch 1: 22.13996386528	
Average Loss: 0.14379205863432	


Epoch 2: 20.517323970795	
Average Loss: 0.14294781335128	


Valid perplexity: 1.1665943843895	


Epoch 3: 20.261568069458	
Average Loss: 0.14069050180087	


Epoch 4: 20.730599880219	
Average Loss: 0.14011809222691	


Valid perplexity: 1.1634903073514	


Epoch 5: 21.100613117218	
Average Loss: 0.13965992055933	


Epoch 6: 21.459497928619	
Average Loss: 0.13922807224974	


Valid perplexity: 1.162639710037	


Epoch 7: 22.623461008072	
Average Loss: 0.13881433381805	


Epoch 8: 19.699815988541	
Average Loss: 0.13841564025072	


Valid perplexity: 1.1618228377842	


Epoch 9: 19.508886098862	
Average Loss: 0.13803027044745	


Epoch 10: 21.358961105347	
Average Loss: 0.13765702408165	


Valid perplexity: 1.1610467126026	


# EXP LSTM

In [227]:
len = 50
batch_size = 16
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 40

t_input_new, t_output_new = get_train_input(input_data_train, len, batch_size)
n_new = len * batch_size *(#t_input_new)
print('Input size is '..n_new)

-- Building model
batchLTM, params_lstm, grad_params_lstm = build_rnn(embed_dim, vocab_size, batch_size, build_LSTM(embed_dim, len), len)
batchLTM_valid, params_valid_lstm, grad_params_valid_lstm = build_rnn(embed_dim, vocab_size, 1,build_LSTM(embed_dim))

crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

Input size is 600000	


In [228]:
-- Adaptive learning rate
nEpochs = 5
step = 1
train_model_with_perp(t_input_new, t_output_new, batchLTM, batchLTM_valid, params_valid_lstm,
        params_lstm, grad_params_lstm, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 5)

Epoch 1: 20.500305175781	
Average Loss: 0.42686851261555	


Epoch 2: 22.319206953049	
Average Loss: 0.26914507490737	


Epoch 3: 20.252361059189	
Average Loss: 0.23543355068168	


Epoch 4: 22.399124145508	
Average Loss: 0.22355271454239	


Epoch 5: 22.436785936356	
Average Loss: 0.21356005534607	


Valid perplexity: 1.2353700014788	


In [229]:
len = 40
batch_size = 16
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 40

t_input_new, t_output_new = get_train_input(input_data_train, len, batch_size)
n_new = len * batch_size *(#t_input_new)
print('Input size is '..n_new)

-- Building model
batchLTM, params_lstm, grad_params_lstm = build_rnn(embed_dim, vocab_size, batch_size, build_LSTM(embed_dim, len), len)
batchLTM_valid, params_valid_lstm, grad_params_valid_lstm = build_rnn(embed_dim, vocab_size, 1,build_LSTM(embed_dim))

crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

Input size is 600320	


In [230]:
-- Adaptive learning rate
nEpochs = 10
step = 1
train_model_with_perp(t_input_new, t_output_new, batchLTM, batchLTM_valid, params_valid_lstm,
        params_lstm, grad_params_lstm, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 2)

Epoch 1: 21.371793031693	
Average Loss: 0.40999359046677	


Epoch 2: 20.550037145615	
Average Loss: 0.26525554284651	


Epoch 3: 20.77099609375	
Average Loss: 0.22752339722576	


Epoch 4: 20.083611965179	
Average Loss: 0.21233982497507	


Epoch 5: 20.474061965942	
Average Loss: 0.20172383091247	


Valid perplexity: 1.2214339281157	


In [231]:
-- Adaptive learning rate
nEpochs = 10
step = 1
train_model_with_perp(t_input_new, t_output_new, batchLTM, batchLTM_valid, params_valid_lstm,
        params_lstm, grad_params_lstm, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 2)

Epoch 1: 22.699866771698	
Average Loss: 0.19419458614932	


Epoch 2: 20.968109130859	
Average Loss: 0.18793614822398	


Valid perplexity: 1.2091848838919	


Epoch 3: 20.89581489563	
Average Loss: 0.18274775433131	


Epoch 4: 20.497636079788	
Average Loss: 0.18005071151586	


Valid perplexity: 1.2022368029729	


Epoch 5: 21.626466989517	
Average Loss: 0.17758307255514	


Epoch 6: 22.789066076279	
Average Loss: 0.17534566218807	


Valid perplexity: 1.1973702355909	


Epoch 7: 19.516790866852	
Average Loss: 0.17335061858939	


Epoch 8: 18.870615959167	
Average Loss: 0.17155007475586	


Valid perplexity: 1.1933553148268	


Epoch 9: 19.937397003174	
Average Loss: 0.16989552580692	


Epoch 10: 20.051936149597	
Average Loss: 0.16835741463583	


Valid perplexity: 1.1898619072821	


In [232]:
-- Adaptive learning rate
nEpochs = 10
step = 1
train_model_with_perp(t_input_new, t_output_new, batchLTM, batchLTM_valid, params_valid_lstm,
        params_lstm, grad_params_lstm, crit, eta, nEpochs, batch_size, len, n_new, input_data_valid, output_valid, 2)

Epoch 1: 20.126780033112	
Average Loss: 0.16735650833021	


Epoch 2: 19.257142066956	
Average Loss: 0.16495719005838	


Valid perplexity: 1.1863063299435	


Epoch 3: 19.587309122086	
Average Loss: 0.16227134233296	


Epoch 4: 19.77908205986	
Average Loss: 0.16107603165942	


Valid perplexity: 1.1820073811279	


Epoch 5: 19.054136037827	
Average Loss: 0.16002025257414	


Epoch 6: 19.094585895538	
Average Loss: 0.15902858172604	


Valid perplexity: 1.1799882336849	


Epoch 7: 20.017446994781	
Average Loss: 0.15808955162245	


Epoch 8: 19.609482049942	
Average Loss: 0.15719549394668	


Valid perplexity: 1.1782467302895	


Epoch 9: 19.873846054077	
Average Loss: 0.15633938148774	


Epoch 10: 19.472234964371	
Average Loss: 0.15551462341618	


Valid perplexity: 1.1766833547014	


# Predictions

In [16]:
function compute_probability_model(model, input)
    return model:forward(input:view(input:size(1), 1))
end

In [23]:
-- Method to compute manually the perplexity
function compute_perplexity(input, output, model)
    -- Last Position filled in predictions
    -- Position to predict in input
    local position_input = 1
    local probability = torch.DoubleTensor(2)
    local probability_table
    local perp = 0

    -- Build mapping
    for i = 1,input:size(1) do
        -- Line where the model appears
        -- The model remember the states before, just need to feed into it a character
        probability_table = compute_probability_model(model, input:narrow(1,i,1))
        probability:copy(probability_table[1])
        perp = perp + probability[output[i]]
    end
    -- Cutting the output
    return math.exp(-perp/input:size(1))
end   

In [51]:
-- Computing perplexity
timer = torch.Timer()
size = input_data_train:size(1)
perp = compute_perplexity(input_data_train:narrow(1,1,size):view(size,1), output_matrix_train:narrow(1,1,size), batchRNN_valid)
print('Time elasped : '..timer:time().real)
print(perp)

Time elasped : 0.17292881011963	
1.2779225376256	


In [215]:
-- Computing perplexity on valid
timer = torch.Timer()
--size = input_data_valid:size(1)
size = 10000
perp = compute_perplexity(input_data_valid:narrow(1,1,size):view(size,1), output_valid:narrow(1,1,size), batchLTM_valid)
print('Time elasped : '..timer:time().real)
print(perp)

Time elasped : 2.5755980014801	
2.0049887210591	


In [233]:
-- Prediction on test
function predict_rnn_greedy(input, len, model)
    -- Last Position filled in predictions
    local position_prediction = 1
    -- Position to predict in input
    local position_input = 1
    -- We allocate the maximum of memory that could be needed
    -- Default value is -1 (to know where predictions end afterwards)
    local predictions = torch.ones(2*input:size(1)):mul(-1)
    -- Copy the first entry
    predictions[position_prediction] = input[position_input]
    local probability = torch.zeros(2)
    local probability_table

    -- Build mapping
    while position_input < input:size(1) do
        -- Line where the model appears
        -- The model remember the states before, just need to feed into it a character
        probability_table = compute_probability_model(model, predictions:narrow(1,position_prediction, 1))
        probability:copy(probability_table[1])

        m,a = probability:max(1)

        -- Case space predicted
        position_prediction = position_prediction +1
        if (a[1] == 1) then
            predictions[position_prediction] = 1
        else
            -- Copying next character
            position_input = position_input + 1
            predictions[position_prediction] = input[position_input] 
        end
    end
    -- Cutting the output
    return predictions:narrow(1,1,position_prediction)
end   

In [100]:
function get_kaggle_format(predictions_test, N)
    -- Counting sentences
    local num_sentence = 0
    for i=N-1,predictions_test:size(1) do
        if predictions_test[i] == 2 then
            num_sentence = num_sentence + 1
        end
    end

    -- Counting space per sentence
    local num_spaces = torch.DoubleTensor(num_sentence,2)
    local row = 1
    local count_space = 0
    for i=N-1,predictions_test:size(1) do
        if predictions_test[i] == 2 then
            num_spaces[{row, 1}] = row
            num_spaces[{row, 2}] = count_space
            count_space = 0
            row = row + 1
        elseif predictions_test[i] == 1 then
            count_space = count_space + 1
        end
    end
    return num_spaces
end

In [101]:
function compute_rmse(true_kaggle, pred_kaggle)
    local rmse = 0
    for i=1,true_kaggle:size(1) do
        rmse = rmse + math.pow(true_kaggle[{i,2}] - pred_kaggle[{i,2}], 2)
    end
    return(math.sqrt(rmse/ true_kaggle:size(1)))
end

In [238]:
kaggle_true_valid = get_kaggle_format(input_data_valid,2)

In [237]:
timer = torch.Timer()
size = input_data_valid_nospace:size(1)
pred_valid_lstm = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,size), len, batchLTM_valid)
print('Time elasped : '..timer:time().real)

Time elasped : 24.515151023865	


In [239]:
kaggle_lstm_valid = get_kaggle_format(pred_valid_lstm,2)
print('RMSE LSTM after 25 epochs')
rsme_lstm = compute_rmse(kaggle_true_valid, kaggle_lstm_valid)
print(rsme_lstm)

RMSE LSTM after 25 epochs	


4.4900644363655	


In [394]:
timer = torch.Timer()
size = input_data_valid_nospace:size(1)
pred_valid_rnn = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,size), len, batchRNN_valid)
print('Time elasped : '..timer:time().real)

Time elasped : 14.468178033829	


In [395]:
kaggle_rnn_valid = get_kaggle_format(pred_valid_rnn,2)
print('RMSE RNN')
rsme_rnn = compute_rmse(kaggle_true_valid, kaggle_rnn_valid)
print(rsme_rnn)

RMSE RNN	


4.0739178179273	


In [383]:
timer = torch.Timer()
size = input_data_valid_nospace:size(1)
pred_valid_gru = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,size), len, batchGRU_valid)
print('Time elasped : '..timer:time().real)

Time elasped : 31.173984050751	


In [384]:
kaggle_gru_valid = get_kaggle_format(pred_valid_gru,2)
print('RMSE GRU')
rsme_gru = compute_rmse(kaggle_true_valid, kaggle_gru_valid)
print(rsme_gru)

RMSE GRU	


3.5130211165419	


In [428]:
-- Ensemble combinations
wgru = 26
wrnn = 2
wlstm = 0
total = wgru + wrnn + wlstm
kaggle_ensemble = (torch.mul(kaggle_gru_valid, wgru) + torch.mul(kaggle_rnn_valid, wrnn) + torch.mul(kaggle_lstm_valid, wlstm)):div(total)
-- converting to int
kaggle_ensemble:add(0.5):floor()
print('RMSE ENSEMBLE')
rsme_ensemble = compute_rmse(kaggle_true_valid, kaggle_ensemble)
print(rsme_ensemble)

RMSE ENSEMBLE	


3.511174064807	


# Test sequences prediction

In [429]:
-- Pred on test
timer = torch.Timer()
size = input_data_test:size(1)
pred_test_gru = predict_rnn_greedy(input_data_test:narrow(1,1,size), len, batchGRU_valid)
print('Time elasped : '..timer:time().real)

Time elasped : 92.837053060532	


In [430]:
-- Saaving test prediction
kaggle_test = get_kaggle_format(pred_test_gru,2)

In [432]:
-- Saving the Kaggle format output
myFile = hdf5.open('../submission/pred_gru_l30_b16_e45', 'w')
myFile:write('num_spaces', kaggle_test)
myFile:close()