In [1]:
require 'nn'
require 'rnn'
require 'cutorch'
require 'cunn'
require 'cunnx'

In [2]:
function build_bowmlp(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.Sum(1, embed_dim, true)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_lstm(nn_vocab_module, embed_dim)
    local model = nn.Sequential()
    :add(nn_vocab_module)            -- returns a sequence-length x batch-size x embedDim tensor
    :add(nn.SplitTable(1, embed_dim)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Sequencer(nn.LSTM(embed_dim, embed_dim)))
    :add(nn.SelectTable(-1)) -- selects last state of the LSTM
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.Tanh())                     ---     :add(nn.ReLU()) <- this one did worse
   return model
end

function build_model(model, vocab_size, embed_dim, outputSize, use_cuda)
    local nn_vocab = nn.LookupTableMaskZero(vocab_size, embed_dim)
    if model == 'bow' then
        print("Running BOW model")
        mod1 = build_bowmlp(nn_vocab, embed_dim)
        mod2 = build_bowmlp(nn_vocab, embed_dim)
        mod3 = build_bowmlp(nn_vocab, embed_dim)
    end
    if model == 'lstm' then         
        print("Running LSTM model")
        mod1 = build_lstm(nn_vocab, embed_dim)
        mod2 = build_lstm(nn_vocab, embed_dim)
        mod3 = build_lstm(nn_vocab, embed_dim)
    end

    local ParallelModel = nn.ParallelTable()
    ParallelModel:add(mod1)
    ParallelModel:add(mod2)
    ParallelModel:add(mod3)

    local FinalMLP = nn.Sequential()
    FinalMLP:add(ParallelModel)
    FinalMLP:add(nn.JoinTable(2))
    FinalMLP:add(nn.Linear(embed_dim * 3, 2) )
    FinalMLP:add(nn.Max(1) )
    FinalMLP:add(nn.Tanh())

    if use_cuda then
        return FinalMLP:cuda()
    else
        return FinalMLP
    end
end

In [3]:
function build_data(use_cuda)    
    if use_cuda then
      Tensor = torch.CudaTensor
      LongTensor = torch.CudaLongTensor
    else
      Tensor = torch.Tensor
      LongTensor = torch.LongTensor
    end
    sentences = LongTensor{{0, 1, 3, 4}, {0, 2, 4, 3}}:t()
    summary = LongTensor{{0, 0, 1, 4}, {0, 2, 3, 1}}:t()
    query = LongTensor{{0, 0, 4, 3}, {0, 0, 0, 0}}:t()
    yrouge = torch.rand(2, 1)
    if use_cuda then
        return sentences, summary, query, yrouge:cuda()
    else
        return sentences, summary, query,  yrouge
    end 
end   

In [4]:
usecuda = true
model = 'lstm'

batch_size = 2
vocab_size = 4
embed_dim = 10
outputSize = 1
learning_rate = 0.01

In [5]:
sentences, summary, query, yrouge = build_data(usecuda)
FinalMLP  = build_model(model, vocab_size, embed_dim, outputSize, usecuda)

Running LSTM model	


In [6]:
criterion = nn.MSECriterion():cuda()

In [7]:
for epoch = 1, 100 do
    preds = FinalMLP:forward({sentences, summary, query})
    loss = criterion:forward(preds, yrouge)
    FinalMLP:zeroGradParameters()
    -- This is where it fails
    grads = criterion:backward(preds, yrouge)
    FinalMLP:backward({sentences, summary, query}, grads)
    FinalMLP:updateParameters(learning_rate)
    if (epoch % 10)==0 then 
        print(string.format("Epoch %i, loss =%6f", epoch, loss))
    end
end

Epoch 10, loss =0.330501	


Epoch 20, loss =0.232346	


Epoch 30, loss =0.169405	


Epoch 40, loss =0.127769	


Epoch 50, loss =0.099261	


Epoch 60, loss =0.079104	


Epoch 70, loss =0.064436	


Epoch 80, loss =0.053486	


Epoch 90, loss =0.045125	


Epoch 100, loss =0.038609	


In [8]:
FinalMLP:forward({sentences, summary, query})

 0.6371
 0.1516
[torch.CudaTensor of size 2]



In [9]:
FinalMLP:get(1).output

{
  1 : CudaTensor - size: 2x10
  2 : CudaTensor - size: 2x10
  3 : CudaTensor - size: 2x10
}


In [10]:
FinalMLP:get(2).output

Columns 1 to 10
 0.1126 -0.0082 -0.0498 -0.0210  0.0148  0.0800  0.0855 -0.0438 -0.1928 -0.2417
 0.0752  0.0006  0.0027 -0.0517 -0.0013  0.1085  0.1578 -0.0228 -0.2141 -0.2149

Columns 11 to 20
-0.1578  0.1624  0.1732  0.1026 -0.0580  0.2615  0.0664 -0.0732 -0.0714 -0.3815
-0.1720  0.0506  0.2035  0.1251 -0.1271  0.0673  0.1683  0.0636  0.0172 -0.3538

Columns 21 to 30
-0.1744  0.1986  0.0049 -0.0960  0.3064  0.2876 -0.0589  0.0099  0.3616 -0.0972
-0.0330  0.2032  0.0038 -0.0943  0.1777  0.1872  0.0145  0.0703  0.3125 -0.1944
[torch.CudaTensor of size 2x30]



In [16]:
FinalMLP:get(3).output

 0.7474  0.1440
 0.7532  0.1527
[torch.CudaTensor of size 2x2]



In [14]:
FinalMLP:get(4).output

 0.7532
 0.1527
[torch.CudaTensor of size 2]



In [13]:
yrouge

 0.9083
 0.2019
[torch.CudaTensor of size 2x1]



In [17]:
torch.totable(FinalMLP:get(3).output)

{
  1 : 
    {
      1 : 0.7473868727684
      2 : 0.14397549629211
    }
  2 : 
    {
      1 : 0.7532479763031
      2 : 0.15273851156235
    }
}
