In [1]:
require 'hdf5'
require 'nn'

In [2]:
-- Loading data
myFile = hdf5.open('../data/MM_data_cap.hdf5','r')
data = myFile:all()
input_matrix_train_cap = data['input_matrix_train_cap']
input_matrix_dev_cap = data['input_matrix_dev_cap']
input_matrix_test_cap = data['input_matrix_test_cap']
myFile:close()

In [17]:
-- Loading data
myFile = hdf5.open('../data/sent_start.hdf5','r')
data = myFile:all()
sent = data['sent_start']
myFile:close()

In [8]:
nwords = input_matrix_train_cap:size(1)
train_input = torch.Tensor(nwords-1,10)
train_input:narrow(2,1,1):copy(input_matrix_train_cap:narrow(2,1,1):narrow(1,2,nwords-1))
train_input:narrow(2,2,9):copy(input_matrix_train_cap:narrow(2,2,9):narrow(1,1,nwords-1))
train_output = input_matrix_train_cap:narrow(2,16,1):narrow(1,2,nwords-1)

In [11]:
myFile = hdf5.open('../data/embeddings.hdf5','r')
data2 = myFile:all()
embeddings = data2['embeddings']
myFile:close()

In [305]:
function compute_logscore(observations, i, model, C)
    local y = torch.zeros(C,C)
    local hot_1 = torch.zeros(C)
    for j = 1, C do
        hot_1:zero()
        hot_1[j] = 1
        y:narrow(1,j,1):copy(model:forward({observations[i]:view(1,1),hot_1:view(1,9)}))
    end
    return y
end

function viterbi(observations, compute_logscore, model, C)
    
    local y = torch.zeros(C,C)
    -- Formating tensors
    local initial = torch.zeros(C, 1)
    -- initial started with a start of sentence: <t>

    initial[{8,1}] = 1
    initial:log()

    -- number of classes
    local n = observations:size(1)
    local max_table = torch.Tensor(n, C)
    local backpointer_table = torch.Tensor(n, C)
    -- first timestep
    -- the initial most likely paths are the initial state distribution
    -- NOTE: another unnecessary Tensor allocation here
    local maxes, backpointers = (initial + compute_logscore(observations, 1, model, C)[8]):max(2)
    max_table[1] = maxes
    -- remaining timesteps ("forwarding" the maxes)
    for i=2,n do
        -- precompute edge scores
       
        y:copy(compute_logscore(observations, i, model, C))
        scores = y:transpose(1,2) + maxes:view(1, C):expand(C, C)

        -- compute new maxes (NOTE: another unnecessary Tensor allocation here)
        maxes, backpointers = scores:max(2)

        -- record
        max_table[i] = maxes
        backpointer_table[i] = backpointers
    end
    -- follow backpointers to recover max path
    local classes = torch.Tensor(n)
    maxes, classes[n] = maxes:max(1)
    for i=n,2,-1 do
        classes[i-1] = backpointer_table[{i, classes[i]}]
    end

    return classes
end

function train_model(train_input, sent, train_output, model, din, nclass, eta, nEpochs)
    -- Train the model with a mini batch SGD
    -- standard parameters are
    -- nEpochs = 1
    -- batchSize = 32
    -- eta = 0.01

    -- Memory allocation
    inputs_batch = torch.DoubleTensor(100, din)
    gold_sequence = torch.DoubleTensor(100)
    high_score_seq = torch.DoubleTensor(100)
    grad_pos = torch.zeros(9)
    grad_neg = torch.zeros(9)
    pr1 = torch.zeros(9)
    pr2 = torch.zeros(9)
    
    for i = 1, nEpochs do
        -- timing the epoch
        timer = torch.Timer()
        
        -- mini batch loop
        for t = 2, sent:size(1)-1 do
            -- Mini batch data
            sent_size = sent[{t,2}]
--             print('here1')
            
            inputs_batch:narrow(1,1,sent_size+1):copy(train_input:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here2')
            
            gold_sequence:narrow(1,1,sent_size+1):copy(train_output:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here3')
            
            -- reset gradients
            model:zeroGradParameters()
            --gradParameters:zero()

            -- Forward pass on a batch subsequence:
            high_score_seq:narrow(1,1,sent_size+1):copy(viterbi(inputs_batch:narrow(1,1,sent_size+1):narrow(2,1,1), 
                                                                compute_logscore, model, nclass))
--             print('here4')
            
            
            for ii = 1, sent_size+1 do
                grad_neg:zero()
                grad_pos:zero()
                if high_score_seq[ii] ~= gold_sequence[ii] then
                    pr1:copy(model:forward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}))
                    pr2:copy(model:forward({torch.Tensor({high_score_seq[ii-1]}),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}))
                    m, a = pr2:view(1,9):max(2)
                    grad_neg[gold_sequence[ii]] = -1
                    grad_pos[a[1][1]] = 1
                    model:backward({inputs_batch:narrow(1,ii,1):narrow(2,1,1),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}, grad_pos:view(1,9))
                    model:backward({torch.Tensor({high_score_seq[ii-1]}),inputs_batch:narrow(1,ii,1):narrow(2,2,9)}, grad_neg:view(1,9))
                end
            end
--             print('here7')
            model:updateParameters(eta)
            
        end
            
        print('Epoch '..i..': '..timer:time().real)
       
    end
end

In [253]:
inputs_batch = torch.DoubleTensor(100, 10)
gold_sequence = torch.DoubleTensor(100)
high_score_seq = torch.DoubleTensor(100)
grad_pos = torch.zeros(9)
grad_neg = torch.zeros(9)
pr1 = torch.zeros(9)
pr2 = torch.zeros(9)

t = 4

sent_size = sent[{t,2}]
--             print('here1')

inputs_batch:narrow(1,1,sent_size+1):copy(train_input:narrow(1,sent[{t,1}]-1,sent_size+1))
--             print('here2')

gold_sequence:narrow(1,1,sent_size+1):copy(train_output:narrow(1,sent[{t,1}]-1,sent_size+1))

In [179]:
function compute_score(predicted_classes, true_classes)
    local n = predicted_classes:size(1)
    local right_pred = 0
    local positive_true = 0
    local positive_pred = 0
    for i=1,n do
        if predicted_classes[i] > 1 then
            positive_pred = positive_pred + 1
        end
        if true_classes[i] > 1 then
            positive_true = positive_true + 1
        end
        if (true_classes[i] == predicted_classes[i]) and true_classes[i] > 1 then
            right_pred = right_pred + 1
        end
    end
    print(positive_true)
    print(positive_pred)
    print(right_pred)
    local precision = right_pred/positive_pred
    local recall = right_pred/positive_true
    return precision, recall
end
        
function f_score(predicted_classes, true_classes)
    local p,r = compute_score(predicted_classes, true_classes)
    return 2*p*r/(p+r)
end

In [286]:
LT = nn.LookupTable(400002,50)
LT.weight:narrow(1, 1, 400000):copy(embeddings)

In [287]:
model = nn.Sequential()
t1 = nn.ParallelTable()

t1_1 = nn.Sequential()
t1_1:add(LT)
t1_1:add(nn.View(-1,50))

t1_2 = nn.Identity()

t1:add(t1_1)
t1:add(t1_2)

model:add(t1)
model:add(nn.JoinTable(2))

lin = nn.Linear(59,9)
model:add(lin)

In [None]:
lin.weight:zero()

In [314]:
train_model(train_input, sent, train_output, model, 10, 9, 0.0001, 1)

Epoch 1: 292.69423389435	



In [306]:
observations = input_matrix_dev_cap:narrow(2,1,1):narrow(1,1,500):clone()

In [307]:
true_classes = input_matrix_dev_cap:narrow(2,16,1):narrow(1,1,500):squeeze()

In [315]:
cl = viterbi(observations, compute_logscore, model, 9)

In [316]:
print (f_score(cl, true_classes))

108	
463	
17	
0.05954465849387	


In [227]:
cl:narrow(1,1,50)

 8
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 1
 4
 4
 4
 1
 4
 4
 1
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
 4
[torch.DoubleTensor of size 50]

