In [1]:
require 'nn'
require 'rnn'
require 'cutorch'
require 'cunn'
require 'cunnx'

In [2]:
function build_bowmlp(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.Sum(1, embed_dim, true)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_lstm(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.SplitTable(1, embed_dim)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Sequencer(nn.LSTM(embed_dim, embed_dim)))
    :add(nn.SelectTable(-1)) -- selects last state of the LSTM
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_model(model, vocab_size, embed_dim, outputSize, use_cuda)
    local nn_vocab = nn.LookupTableMaskZero(vocab_size, embed_dim)
    if model == 'bow' then
        print("Running BOW model")
        mod1 = build_bowmlp(nn_vocab, embed_dim)
        mod2 = build_bowmlp(nn_vocab, embed_dim)
        mod3 = build_bowmlp(nn_vocab, embed_dim)
    end
    if model == 'lstm' then         
        print("Running LSTM model")
        mod1 = build_lstm(nn_vocab, embed_dim)
        mod2 = build_lstm(nn_vocab, embed_dim)
        mod3 = build_lstm(nn_vocab, embed_dim)
    end

    local ParallelModel = nn.ParallelTable()
    ParallelModel:add(mod1)
    ParallelModel:add(mod2)
    ParallelModel:add(mod3)

    local FinalMLP = nn.Sequential()
    FinalMLP:add(ParallelModel)
    FinalMLP:add(nn.JoinTable(2))
    FinalMLP:add(nn.Linear(embed_dim * 3, 2) )
    FinalMLP:add(nn.Max(2) )
    FinalMLP:add(nn.Tanh())

    if use_cuda then
        return FinalMLP:cuda()
    else
        return FinalMLP
    end
end

In [3]:
usecuda = true
model = 'lstm'

batch_size = 2
vocab_size = 4
embed_dim = 10
outputSize = 1
learning_rate = 0.01

In [4]:
if use_cuda then
  Tensor = torch.CudaTensor
  LongTensor = torch.CudaLongTensor
else
  Tensor = torch.Tensor
  LongTensor = torch.LongTensor
end

In [5]:
-- sentences, summary, query, yrouge = build_data(usecuda)
FinalMLP  = build_model(model, vocab_size, embed_dim, outputSize, usecuda)

Running LSTM model	


In [6]:
criterion = nn.MSECriterion():cuda()

In [7]:
sentences = {{0, 1, 3, 4}, {0, 2, 4, 3}}
summaries = {{0, 0, 1, 4}, {0, 2, 3, 1}}
scores = {0.74, -0.24}
queries = {{0, 1, 4, 3}, {0, 1, 4, 3}}

minibatch = 1

sentence = LongTensor({sentences[minibatch]}):t()
summary = LongTensor({summaries[minibatch]}):t()
query = LongTensor({queries[minibatch]}):t()
yrougue = Tensor({scores[minibatch]})

In [8]:
FinalMLP:forward({sentence, summary, query})

0.01 *
-4.4842
[torch.CudaTensor of size 1]



In [9]:
FinalMLP:get(1).output

{
  1 : CudaTensor - size: 1x10
  2 : CudaTensor - size: 1x10
  3 : CudaTensor - size: 1x10
}


In [10]:
FinalMLP:get(2).output

Columns 1 to 10
 0.0785 -0.1104  0.1924  0.0999  0.2157 -0.1067 -0.3475  0.1636  0.1813  0.2357

Columns 11 to 20
 0.0524  0.3517  0.1536 -0.1130  0.2979 -0.2889 -0.2363  0.1289  0.1409  0.2291

Columns 21 to 30
-0.0942  0.2010 -0.0956  0.1286  0.0615 -0.2787 -0.1118 -0.1678  0.1824 -0.0821
[torch.CudaTensor of size 1x30]



In [11]:
FinalMLP:get(3).output

-0.2189 -0.0449
[torch.CudaTensor of size 1x2]



In [12]:
FinalMLP:get(4).output

0.01 *
-4.4872
[torch.CudaTensor of size 1]



In [13]:
yrougue

 0.7400
[torch.DoubleTensor of size 1]



In [21]:
fullpreds = {0, 0}
for epoch = 1, 100 do
    for minibatch = 1, 2 do
        sentence = LongTensor({sentences[minibatch]}):t()
        summary = LongTensor({summaries[minibatch]}):t()
        query = LongTensor({queries[minibatch]}):t()
        yrougue = Tensor({scores[minibatch]}):cuda()
        preds = FinalMLP:forward({sentence, summary, query})
        --- storing predictions
        fullpreds[minibatch] = torch.totable(preds)
        
        loss = criterion:forward(preds, yrougue)
        FinalMLP:zeroGradParameters()
        grads = criterion:backward(preds, yrougue)
        FinalMLP:backward({sentence, summary, query}, grads)
        FinalMLP:updateParameters(learning_rate)

    end
    if (epoch % 10)==0 then 
        print(string.format("Epoch %i, loss =%6f", epoch, loss))
    end
end

Epoch 10, loss =0.132912	


Epoch 20, loss =0.178267	


Epoch 30, loss =0.189686	


Epoch 40, loss =0.186478	


Epoch 50, loss =0.177294	


Epoch 60, loss =0.165713	


Epoch 70, loss =0.153201	




Epoch 80, loss =0.140383	


Epoch 90, loss =0.127561	


Epoch 100, loss =0.114925	
