In [1]:
require 'nn'
require 'rnn'
require 'cutorch'
require 'cunn'
require 'cunnx'

In [51]:
function build_bowmlp(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.Sum(1, embed_dim, true)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_lstm(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.SplitTable(1, embed_dim)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Sequencer(nn.LSTM(embed_dim, embed_dim)))
    :add(nn.SelectTable(-1)) -- selects last state of the LSTM
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_model(model, vocab_size, embed_dim, outputSize, use_cuda)
    local nn_vocab = nn.LookupTableMaskZero(vocab_size, embed_dim)
    if model == 'bow' then
        print("Running BOW model")
        mod1 = build_bowmlp(nn_vocab, embed_dim)
        mod2 = build_bowmlp(nn_vocab, embed_dim)
        mod3 = build_bowmlp(nn_vocab, embed_dim)
    end
    if model == 'lstm' then         
        print("Running LSTM model")
        mod1 = build_lstm(nn_vocab, embed_dim)
        mod2 = build_lstm(nn_vocab, embed_dim)
        mod3 = build_lstm(nn_vocab, embed_dim)
    end

    local ParallelModel = nn.ParallelTable()
    ParallelModel:add(mod1)
    ParallelModel:add(mod2)
    ParallelModel:add(mod3)

    local FinalMLP = nn.Sequential()
    FinalMLP:add(ParallelModel)
    FinalMLP:add(nn.JoinTable(2))
    FinalMLP:add(nn.Linear(embed_dim * 3, 2) )
    FinalMLP:add(nn.Max(2) )
    FinalMLP:add(nn.Tanh())

    if use_cuda then
        return FinalMLP:cuda()
    else
        return FinalMLP
    end
end

In [38]:
function build_data(use_cuda)    
    if use_cuda then
      Tensor = torch.CudaTensor
      LongTensor = torch.CudaLongTensor
    else
      Tensor = torch.Tensor
      LongTensor = torch.LongTensor
    end
--     sentences = LongTensor{{0, 1, 3, 4}, {0, 2, 4, 3}}:t()
--     summary = LongTensor{{0, 0, 1, 4}, {0, 2, 3, 1}}:t()
--     query = LongTensor{{0, 0, 4, 3}, {0, 0, 0, 0}}:t()
    sentences = LongTensor{{0, 1, 3, 4}}:t()
    summary = LongTensor{{0, 0, 1, 4}}:t()
    query = LongTensor{{0, 0, 4, 3}}:t()
    yrouge = torch.rand(1, 1)
    if use_cuda then
        return sentences, summary, query, yrouge:cuda()
    else
        return sentences, summary, query,  yrouge
    end 
end   

In [39]:
usecuda = true
model = 'lstm'

batch_size = 2
vocab_size = 4
embed_dim = 10
outputSize = 1
learning_rate = 0.01

In [52]:
sentences, summary, query, yrouge = build_data(usecuda)
FinalMLP  = build_model(model, vocab_size, embed_dim, outputSize, usecuda)

Running LSTM model	


In [53]:
criterion = nn.MSECriterion():cuda()

In [54]:
FinalMLP:forward({sentences, summary, query})

0.01 *
 5.5973
[torch.CudaTensor of size 1]



In [55]:
FinalMLP:get(1).output

{
  1 : CudaTensor - size: 1x10
  2 : CudaTensor - size: 1x10
  3 : CudaTensor - size: 1x10
}


In [56]:
FinalMLP:get(2).output

Columns 1 to 10
-0.0205  0.1043 -0.2179 -0.0956  0.3087  0.2872 -0.0344  0.1342 -0.0806 -0.2100

Columns 11 to 20
 0.0211  0.0392  0.0362 -0.0125 -0.0987 -0.1972 -0.2201  0.0715 -0.3361 -0.1878

Columns 21 to 30
-0.0327  0.4685  0.2249  0.1029  0.3277 -0.3009 -0.1599 -0.2130 -0.1268 -0.0288
[torch.CudaTensor of size 1x30]



In [57]:
FinalMLP:get(3).output

0.01 *
  5.6031 -7.4797
[torch.CudaTensor of size 1x2]



In [58]:
FinalMLP:get(4).output

0.01 *
 5.6031
[torch.CudaTensor of size 1]



In [59]:
yrouge

 0.4763
[torch.CudaTensor of size 1x1]



In [60]:
for epoch = 1, 100 do
    preds = FinalMLP:forward({sentences, summary, query})
    loss = criterion:forward(preds, yrouge)
    FinalMLP:zeroGradParameters()
    -- This is where it fails
    grads = criterion:backward(preds, yrouge)
    FinalMLP:backward({sentences, summary, query}, grads)
    FinalMLP:updateParameters(learning_rate)
    if (epoch % 10)==0 then 
        print(string.format("Epoch %i, loss =%6f", epoch, loss))
    end
end

Epoch 10, loss =0.070904	


Epoch 20, loss =0.027853	


Epoch 30, loss =0.011818	


Epoch 40, loss =0.005300	


Epoch 50, loss =0.002469	


Epoch 60, loss =0.001180	


Epoch 70, loss =0.000575	


Epoch 80, loss =0.000283	


Epoch 90, loss =0.000141	


Epoch 100, loss =0.000070	
