In [1]:
require 'hdf5'
require 'nn'

In [333]:
-- Loading data
myFile = hdf5.open('../data/MM_data_cap.hdf5','r')
data = myFile:all()
input_matrix_train_cap = data['input_matrix_train_cap']
input_matrix_dev_cap = data['input_matrix_dev_cap']
input_matrix_test_cap = data['input_matrix_test_cap']
myFile:close()

In [334]:
-- Loading data
myFile = hdf5.open('../data/sent_start.hdf5','r')
data = myFile:all()
sent = data['sent_start']
myFile:close()

In [335]:
nwords = input_matrix_train_cap:size(1)
train_input = torch.Tensor(nwords-1,10)
train_input:narrow(2,1,1):copy(input_matrix_train_cap:narrow(2,1,1):narrow(1,2,nwords-1))
train_input:narrow(2,2,9):copy(input_matrix_train_cap:narrow(2,2,9):narrow(1,1,nwords-1))
train_output = input_matrix_train_cap:narrow(2,16,1):narrow(1,2,nwords-1)

In [336]:
myFile = hdf5.open('../data/embeddings.hdf5','r')
data2 = myFile:all()
embeddings = data2['embeddings']
myFile:close()

In [380]:
function compute_logscore(observations, i, model, C)
    local y = torch.zeros(C,C)
    local hot_1 = torch.zeros(C)
    for j = 1, C do
        hot_1:zero()
        hot_1[j] = 1
        y:narrow(1,j,1):copy(model:forward({observations[i]:view(1,1),hot_1:view(1,9)}))
    end
    return y
end

function viterbi(observations, compute_logscore, model, C)
    
    local y = torch.zeros(C,C)
    -- Formating tensors
    local initial = torch.zeros(C, 1)
    -- initial started with a start of sentence: <t>

    initial[{8,1}] = 1
    initial:log()

    -- number of classes
    local n = observations:size(1)
    local max_table = torch.Tensor(n, C)
    local backpointer_table = torch.Tensor(n, C)
    -- first timestep
    -- the initial most likely paths are the initial state distribution
    -- NOTE: another unnecessary Tensor allocation here
    local maxes, backpointers = (initial + compute_logscore(observations, 1, model, C)[8]):max(2)
    max_table[1] = maxes
    -- remaining timesteps ("forwarding" the maxes)
    for i=2,n do
        -- precompute edge scores
       
        y:copy(compute_logscore(observations, i, model, C))
        scores = y:transpose(1,2) + maxes:view(1, C):expand(C, C)

        -- compute new maxes (NOTE: another unnecessary Tensor allocation here)
        maxes, backpointers = scores:max(2)

        -- record
        max_table[i] = maxes
        backpointer_table[i] = backpointers
    end
    -- follow backpointers to recover max path
    local classes = torch.Tensor(n)
    maxes, classes[n] = maxes:max(1)
    for i=n,2,-1 do
        classes[i-1] = backpointer_table[{i, classes[i]}]
    end

    return classes
end

function train_model(train_input, sent, train_output, model, din, nclass, eta, nEpochs)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01

    -- Memory allocation
    inputs_batch = torch.DoubleTensor(100, din)
    gold_sequence = torch.DoubleTensor(100)
    high_score_seq = torch.DoubleTensor(100)
    grad_pos = torch.zeros(9)
    grad_neg = torch.zeros(9)
    pr1 = torch.zeros(1,9)
    pr2 = torch.zeros(1,9)
    
    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        
        -- mini batch loop
        for t = 2, sent:size(1)-1 do
            -- Mini batch data
            sent_size = sent[{t,2}]
--             print('here1')
            
            inputs_batch:narrow(1,1,sent_size+1):copy(train_input:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here2')
            
            gold_sequence:narrow(1,1,sent_size+1):copy(train_output:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here3')
            
            -- reset gradients
            model:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass on a batch subsequence:
            high_score_seq:narrow(1,1,sent_size+1):copy(viterbi(inputs_batch:narrow(1,1,sent_size+1):narrow(2,1,1), 
                                                                compute_logscore, model, nclass))
--             print('here4')
            
            
            for ii = 1, sent_size+1 do
                grad_neg:zero()
                grad_pos:zero()
                if high_score_seq[ii] ~= gold_sequence[ii] then
                    -- WARNING: Need to call backward right after the forward with the same input to compute correct gradients
                    
                    -- Use of a single gradient (grad_pos) with a penalization on the wrong class predicted (1)
                    -- and a valorisation (-1) on the correct class to predict
                    
                    model:forward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)})
                    grad_pos[gold_sequence[ii]] = -1
                    grad_pos[high_score_seq[ii]] = 1
                    model:backward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}, grad_pos:view(1,9))
                end
            end
--             print('here7')
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
       
    end
end

function train_model2(train_input, sent, train_output, model, din, nclass, eta, nEpochs, obs_val, true_val, f_score)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01
    
    val_res = torch.zeros(nEpochs,3)
    -- Memory allocation
    inputs_batch = torch.DoubleTensor(100, din)
    gold_sequence = torch.DoubleTensor(100)
    high_score_seq = torch.DoubleTensor(100)
    grad_pos = torch.zeros(9)
    grad_neg = torch.zeros(9)
    one_hot_true = torch.zeros(1,9)
    one_hot_false = torch.zeros(1,9)
    
    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        
        -- mini batch loop
        for t = 2, sent:size(1)-1 do
            -- Mini batch data
            sent_size = sent[{t,2}]
--             print('here1')
            
            inputs_batch:narrow(1,1,sent_size+1):copy(train_input:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here2')
            
            gold_sequence:narrow(1,1,sent_size+1):copy(train_output:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here3')
            
            -- reset gradients
            model:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass on a batch subsequence:
            high_score_seq:narrow(1,1,sent_size+1):copy(viterbi(inputs_batch:narrow(1,1,sent_size+1):narrow(2,1,1), 
                                                                compute_logscore, model, nclass))
--             print('here4')
            
            previous_error = false

            for ii = 1, sent_size+1 do
                
                grad_neg:zero()
                grad_pos:zero()
                
                if high_score_seq[ii] ~= gold_sequence[ii] and not previous_error then
                    -- WARNING: Need to call backward right after the forward with the same input to compute correct gradients
                    
                    -- Use of a single gradient (grad_pos) with a penalization on the wrong class predicted (1)
                    -- and a valorisation (-1) on the correct class to predict
                    
                    model:forward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)})
                    grad_pos[gold_sequence[ii]] = -1
                    grad_pos[high_score_seq[ii]] = 1
                    model:backward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}, grad_pos:view(1,9))
                    
                    grad_neg:zero()
                    grad_pos:zero()
                    if ii ~= (sent_size + 1) then
                        one_hot_true:zero()
                        one_hot_true[1][gold_sequence[ii]] = 1
                        model:forward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_true})
                        grad_neg[gold_sequence[ii+1]] = -1
                        model:backward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_true}, grad_neg:view(1,9) )
                        
                        one_hot_false:zero()
                        one_hot_false[1][high_score_seq[ii]] = 1
                        model:forward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_false})
                        grad_pos[gold_sequence[ii+1]] = 1
                        model:backward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_false}, grad_pos:view(1,9) )
                    end
                    
                    previous_error = true
                    
                elseif high_score_seq[ii] ~= gold_sequence[ii] and previous_error then
                    
                    if ii ~= sent_size + 1 then
                        one_hot_true:zero()
                        one_hot_true[1][gold_sequence[ii]] = 1
                        model:forward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_true})
                        grad_neg[gold_sequence[ii+1]] = -1
                        model:backward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_true}, grad_neg:view(1,9) )
                        
                        one_hot_false:zero()
                        one_hot_false[1][high_score_seq[ii]] = 1
                        model:forward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_false})
                        grad_pos[gold_sequence[ii+1]] = 1
                        model:backward({inputs_batch:narrow(1,ii+1,1):narrow(2,1,1),one_hot_false}, grad_pos:view(1,9) )
                    end
                    
                    previous_error = true
                    
                else
                    previous_error = false
                end
            end
--             print('here7')
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
        cl = viterbi(obs_val, compute_logscore, model, 9)
        val_res[i][1], val_res[i][2], val_res[i][3]  = f_score(cl, true_val)
        print('f-score: '.. val_res[i][1])
        
    end
    return val_res
end

In [376]:
function compute_score(predicted_classes, true_classes)
    local n = predicted_classes:size(1)
    local right_pred = 0
    local positive_true = 0
    local positive_pred = 0
    for i=1,n do
        if predicted_classes[i] > 1 then
            positive_pred = positive_pred + 1
        end
        if true_classes[i] > 1 then
            positive_true = positive_true + 1
        end
        if (true_classes[i] == predicted_classes[i]) and true_classes[i] > 1 then
            right_pred = right_pred + 1
        end
    end
    local precision = right_pred/positive_pred
    local recall = right_pred/positive_true
    return precision, recall
end
        
function f_score(predicted_classes, true_classes)
    local p,r = compute_score(predicted_classes, true_classes)
    return 2*p*r/(p+r), p, r
end

In [383]:
LT = nn.LookupTable(400002,50)
LT.weight:narrow(1, 1, 400000):copy(embeddings)

In [384]:
model = nn.Sequential()
t1 = nn.ParallelTable()

t1_1 = nn.Sequential()
t1_1:add(LT)
t1_1:add(nn.View(-1,50))

t1_2 = nn.Identity()

t1:add(t1_1)
t1:add(t1_2)

model:add(t1)
model:add(nn.JoinTable(2))

lin = nn.Linear(59,9)
model:add(lin)

In [385]:
lin.weight:zero();

In [391]:
observations = input_matrix_dev_cap:narrow(2,1,1):clone()

In [392]:
true_classes = input_matrix_dev_cap:narrow(2,16,1):squeeze()

In [389]:
val_res = train_model2(train_input, sent, train_output, model, 10, 9, 0.0001, 1, observations, true_classes, f_score)

Epoch 1: 256.44453811646	


f-score: 0.63478260869565	


In [390]:
val_res2 = train_model2(train_input, sent, train_output, model, 10, 9, 0.0001, 19, observations, true_classes, f_score)

Epoch 1: 253.30662202835	


f-score: 0.56481481481481	


Epoch 2: 246.12050509453	


f-score: 0.46610169491525	


Epoch 3: 257.61001491547	


f-score: 0.46153846153846	


Epoch 4: 249.08931493759	


f-score: 0.47577092511013	


Epoch 5: 247.63232302666	


f-score: 0.4688995215311	


Epoch 6: 247.5095949173	


f-score: 0.49090909090909	


Epoch 7: 247.81283092499	


f-score: 0.53	


Epoch 8: 248.55041408539	


f-score: 0.54639175257732	


Epoch 9: 248.59220290184	


f-score: 0.52884615384615	


Epoch 10: 249.00901293755	


f-score: 0.54455445544554	


Epoch 11: 248.41658306122	


f-score: 0.51231527093596	


Epoch 12: 247.52356481552	


f-score: 0.50485436893204	


Epoch 13: 247.83906602859	


f-score: 0.51485148514851	


Epoch 14: 247.31948900223	


f-score: 0.5049504950495	


Epoch 15: 247.84553194046	


f-score: 0.52307692307692	


Epoch 16: 247.52341294289	


f-score: 0.48756218905473	


Epoch 17: 247.16925907135	


f-score: 0.47474747474747	


Epoch 18: 247.20517706871	


f-score: 0.48275862068966	


Epoch 19: 247.14581179619	


f-score: 0.47236180904523	


In [402]:
true_classes:narrow(1,1,13)

 8
 1
 1
 4
 1
 1
 1
 1
 1
 1
 1
 1
 9
[torch.LongTensor of size 13]



In [400]:
viterbi(observations:narrow(1,1,13), compute_logscore, model, 9)

 8
 3
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 5
[torch.DoubleTensor of size 13]



In [393]:
cl = viterbi(observations, compute_logscore, model, 9)

In [394]:
print (f_score(cl, true_classes))

0.53327021909656	0.50733401955739	0.56200114025086	


In [368]:
-- 0.001 LR
print (f_score(cl, true_classes))

108	
123	
73	
0.63203463203463	


In [374]:
-- 0.01 LR
print (f_score(cl, true_classes))

108	
100	
58	
0.55769230769231	
