In [1]:
require 'hdf5'
require 'rnn'

In [2]:
N = 2
myFile = hdf5.open('../data_preprocessed/'..tostring(N)..'-grams.hdf5','r')
data = myFile:all()
F_train = data['F_train']
input_data_valid = data['input_data_valid']
output_matrix_train = data['output_matrix_train']
input_matrix_train = data['input_matrix_train']
input_data_train = data['input_data_train']
input_data_valid_nospace = data['input_data_valid_nospace']
input_data_test = data['input_data_test']
myFile:close()

In [3]:
print(input_matrix_train:size())
print(output_matrix_train:size())
print(input_data_train:size())

 599903
      1
[torch.LongStorage of size 2]

 599903
[torch.LongStorage of size 1]

 599905
[torch.LongStorage of size 1]



In [4]:
-- Formating the input
-- Currently using a hack to have nive divisions
n = input_data_train:size(1)
n_new = n - 705
len = 50
batch_size = 16
print(n_new)
print(n_new/(batch_size*len))

599200	
749	


In [5]:
-- Issue with last sequence if batch_size does not divide n
t_input = torch.split(input_data_train:narrow(1,1,n_new):view(batch_size,n_new/batch_size),len, 2)
t_output = torch.split(output_matrix_train:narrow(1,1,n_new):view(batch_size,n_new/batch_size),len, 2)

In [6]:
function build_rnn(embed_dim, vocab_size, batch_size, len)
    local batchRNN
    local params
    local grad_params
    -- generic RNN transduced
    batchRNN = nn.Sequential()
        :add(nn.LookupTable(vocab_size, embed_dim))
        :add(nn.SplitTable(1, batch_size))
    
    batchRNN:add(nn.Sequencer(nn.Recurrent(
       embed_dim, nn.Linear(embed_dim, embed_dim), 
       nn.Linear(embed_dim, embed_dim), nn.Tanh(), len)))
    -- Output
    batchRNN:add(nn.Sequencer(nn.Linear(embed_dim, 2)))
    batchRNN:add(nn.Sequencer(nn.LogSoftMax()))
    batchRNN:remember('both')

    -- Retrieve parameters (To do only once!!!)
    params, grad_params = batchRNN:getParameters()
    
    return batchRNN, params, grad_params
end

In [26]:
function build_lstm(embed_dim, vocab_size, batch_size)
    local batchRNN
    local params
    local grad_params
    -- Fast LSTM
    batchRNN = nn.Sequential()
        :add(nn.LookupTable(vocab_size, embed_dim))
        :add(nn.SplitTable(1, batch_size))
    batchRNN:add(nn.Sequencer((nn.FastLSTM(embed_dim, embed_dim))))
    -- Output
    batchRNN:add(nn.Sequencer(nn.Linear(embed_dim, 2)))
    batchRNN:add(nn.Sequencer(nn.LogSoftMax()))
    batchRNN:remember('both')

    -- Retrieve parameters (To do only once!!!)
    params, grad_params = batchRNN:getParameters()
    
    return batchRNN, params, grad_params
end

# Training

In [8]:
function train_model(t_input, t_output, model, params, grad_params,
                     criterion, eta, nEpochs, batch_size, len, n)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01
    local timer
    local pred
    local loss
    local dLdPred
    local t_inputT = torch.DoubleTensor(len,batch_size)
    local t_output_table
    local delta = 0.2

    -- To store the loss
    local av_L = 0
    
    -- Initializing all the parameters between -0.05 and 0.05
    for k=1,params:size(1) do
        params[k] = torch.uniform(-0.05,0.05)
    end

    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        old_L = av_L
        av_L = 0
        
        -- mini batch loop
        for k = 1, n/(batch_size * len) do
            -- Mini batch data
                
            t_inputT:copy(t_input[k]:t())
            t_output_table = torch.split(t_output[k],1,2)
            --format the output
            for j=1,len do
                t_output_table[j] = t_output_table[j]:squeeze()
            end 
            
            -- reset gradients
            grad_params:zero()
            
            -- Forward loop
            pred = model:forward(t_inputT)
            loss = criterion:forward(pred, t_output_table)
            av_L = av_L + loss

            -- Backward loop
            dLdPred = criterion:backward(pred, t_output_table)
            model:backward(t_inputT, dLdPred)
            
            -- gradient normalization with max norm 5 (l2 norm)
            grad_params:view(grad_params:size(1),1):renorm(1,2,5)
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        print('Average Loss: '..av_L/math.floor(n/batch_size))
        
    end
    
    if (old_L - av_L) < delta then
        eta = eta/2
        delta = delta/2
    end
    
    if (eta < 0.001) then eta = 0.1 end
end

In [29]:
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 10

-- Building model
batchRNN, params, grad_params = build_rnn(embed_dim, vocab_size, len)
crit = nn.SequencerCriterion(nn.ClassNLLCriterion())

train_model(t_input, t_output, batchRNN, params, grad_params,
                     crit, eta, nEpochs, batch_size, len, n_new)

Epoch 1: 7.422128200531	
Average Loss: 0.30979213901479	


Epoch 2: 6.4211950302124	
Average Loss: 0.25576773451975	


Epoch 3: 6.3254590034485	
Average Loss: 0.22644764109874	


Epoch 4: 6.4085111618042	
Average Loss: 0.20789739247033	


Epoch 5: 6.3699600696564	
Average Loss: 0.19375478694154	


Epoch 6: 6.381441116333	
Average Loss: 0.18503419077917	


Epoch 7: 6.9063639640808	
Average Loss: 0.1792965625867	


Epoch 8: 6.5559759140015	
Average Loss: 0.17522007718129	


Epoch 9: 6.9096751213074	
Average Loss: 0.17217148793924	


Epoch 10: 6.5440270900726	
Average Loss: 0.16966121016686	


In [30]:
vocab_size = 49
embed_dim = 20
eta = 0.5
nEpochs = 10

-- Building model
batchLSTM, params_lstm, grad_params_lstm = build_lstm(embed_dim, vocab_size)
crit2 = nn.SequencerCriterion(nn.ClassNLLCriterion())

In [31]:
train_model(t_input, t_output, batchLSTM, params_lstm, grad_params_lstm,
                     crit2, eta, nEpochs, batch_size, len, n_new)

Epoch 1: 36.954732894897	
Average Loss: 0.43456628210534	


Epoch 2: 36.342836141586	
Average Loss: 0.27944201210318	


Epoch 3: 39.1511490345	
Average Loss: 0.24199334790094	


Epoch 4: 34.9020819664	
Average Loss: 0.22528050802921	


Epoch 5: 34.562178134918	
Average Loss: 0.2153748178659	


Epoch 6: 35.032074928284	
Average Loss: 0.20743909895576	


Epoch 7: 32.87624502182	
Average Loss: 0.20070673120846	


Epoch 8: 32.899857997894	
Average Loss: 0.19397212705706	


Epoch 9: 35.779467105865	
Average Loss: 0.1880882761721	


Epoch 10: 34.576239109039	
Average Loss: 0.18328521033972	



# Predictions

In [40]:
-- Copying model into one with batch_size = 1
batchRNN_valid, params_valid, grad_params_valid = build_rnn(embed_dim, vocab_size, 1, len)
params_valid:copy(params)

-- Compute perplexity on validation

In [44]:
batch_lsm, params_valid_lsm, grad_params_valid_lsm = build_lstm(embed_dim, vocab_size, 1)
params_valid_lsm:copy(params_lstm)

In [78]:
timer = torch.Timer()
size = 10000
pred_train = batchRNN_valid:forward(input_data_train:narrow(1,1,size):view(size,1))
print('Time elasped : '..timer:time().real)

Time elasped : 11.978783130646	


In [None]:
timer = torch.Timer()
size = 10000
pred_train_short = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,size), len, batchRNN_valid)
print('Time elasped : '..timer:time().real)

In [19]:
-- Compute perplexity on train
pred_train = batchRNN_valid:forward(input_data_train:view(input_data_train:size(1),1))

In [None]:
t_output_table_train = {}
--format the output
for j=1, do
    t_output_table[j] = output_matrix_train:narrodata,i,1)
end 

In [None]:
input_valid = input_data_valid:view(input_data_valid:size(1), 1)
pred_valid = batchRNN_valid:forward(input_valid)

In [79]:
t_valid_output_table = {}
--format the output
for j=1,input_valid:size(1) do
    if input_valid[j] == 1 then
        t_valid_output_table[j] = torch.DoubleTensor({1})
    else
        t_valid_output_table[j] = torch.DoubleTensor({2})
    end
end 

In [88]:
loss_valid = crit:forward(pred_valid, t_valid_output_table)

In [95]:
w = batchRNN_valid:get(1)['weight']
w_original = batchRNN:get(1)['weight']

In [90]:
-- Compute perplexity
loss_valid_avg = loss_valid/input_valid:size(1)
perp_valid = math.exp(loss_valid_avg)
print(loss_valid_avg)
print(perp_valid)

0.38958298749322	
1.4763650028245	


In [37]:
function compute_probability_model(model, input)
    return model:forward(input:view(input:size(1), 1))
end

In [58]:
-- Prediction on test
function predict_rnn_greedy(input, len, model)
    -- Last Position filled in predictions
    local position_prediction = 1
    -- Position to predict in input
    local position_input = 1
    -- We allocate the maximum of memory that could be needed
    -- Default value is -1 (to know where predictions end afterwards)
    local predictions = torch.ones(2*input:size(1)):mul(-1)
    -- Copy the first entry
    predictions[position_prediction] = input[position_input]
    local probability = torch.zeros(2)
    local probability_table

    -- Build mapping
    while position_input < input:size(1) do
        -- Line where the model appears
        -- The model remember the states before, just need to feed into it a character
        probability_table = compute_probability_model(model, predictions:narrow(1,position_prediction, 1))
        probability:copy(probability_table[1])

        m,a = probability:max(1)

        -- Case space predicted
        position_prediction = position_prediction +1
        if (a[1] == 1) then
            predictions[position_prediction] = 1
        else
            -- Copying next character
            position_input = position_input + 1
            predictions[position_prediction] = input[position_input] 
        end
    end
    -- Cutting the output
    return predictions:narrow(1,1,position_prediction)
end   

In [61]:
pred_valid_short = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,20), len, batchRNN_valid)

In [60]:
-- LSM
pred_valid_short

 14
  9
  7
  1
 18
 17
 20
  4
  5
 18
  1
 20
  3
 16
 15
  3
  7
 10
 10
  9
 20
  9
[torch.DoubleTensor of size 22]



In [62]:
-- RNN
pred_valid_short

 14
  9
  7
 18
  1
 17
 20
  4
  5
 18
  1
 20
  3
 16
  1
 15
  3
  7
 10
 10
  9
 20
  1
  9
[torch.DoubleTensor of size 24]



In [67]:

timer = torch.Timer()
size = 10000
pred_valid_short = predict_rnn_greedy(input_data_valid_nospace:narrow(1,1,size), len, batchRNN_valid)
print('Time elasped : '..timer:time().real)

Time elasped : 1.6905431747437	


In [68]:
input_data_test:size()

 367515
[torch.LongStorage of size 1]

