# Starting model here

In [1]:
require 'torch'
require 'nn'
require 'rnn'
require 'csvigo'
require 'cutorch'
require 'cunn'
require 'cunnx'

--- Loading utility script
dofile("utils.lua")
dofile("model_utils.lua")

aurora_fn = '~/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/2012_aurora_shooting_first_sentence_numtext.csv'
nugget_fn = '~/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/aurora_nuggets_numtext.csv'
query_fn = '~/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/queries_numtext.csv'

data_file = csvigo.load({path = aurora_fn, mode = "large"})
nugget_file = csvigo.load({path = nugget_fn, mode = "large"})
query_file =  csvigo.load({path = query_fn, mode = "large"})

rK = 50
batch_size = 10
nepochs = 10
print_every = 1
embed_dim = 10
learning_rate = 0.1
usecuda = true

cuts = 4.                  --- This is the number of cuts we want
epsilon = 1.
base_explore_rate = 0.1
delta = 1./(nepochs/cuts) --- Only using epsilon greedy strategy for (nepochs/cuts)% of the epochs

torch.manualSeed(420)

function build_network(vocab_size, embed_dim)
    local model = nn.Sequential()
    :add(nn.LookupTableMaskZero(vocab_size, embed_dim)) -- will return a sequence-length x batch-size x embedDim tensor
    :add(nn.SplitTable(1, embed_dim)) -- splits into a sequence-length table with batch-size x embedDim entries
    :add(nn.Sequencer(nn.LSTM(embed_dim, embed_dim)))
    :add(nn.SelectTable(-1)) -- selects last state of the LSTM
    :add(nn.Linear(embed_dim, embed_dim)) -- map last state to a score for classification
    :add(nn.ReLU())
   return model
end

function build_model(vocab_size, embed_dim, outputSize, use_cuda)
    local mod1 = build_network(vocab_size, embed_dim)
    local mod2 = build_network(vocab_size, embed_dim)
    local mod3 = build_network(vocab_size, embed_dim)

    local mlp1 = nn.Sequential()
    mlp1:add(nn.Linear(1, embed_dim))
    mlp1:add(nn.ReLU())

    local ParallelModel = nn.ParallelTable()
    ParallelModel:add(mod1)
    ParallelModel:add(mod2)
    ParallelModel:add(mod3)
    ParallelModel:add(mlp1)

    local FinalMLP = nn.Sequential()
    FinalMLP:add(ParallelModel)
    FinalMLP:add(nn.JoinTable(2))
    FinalMLP:add( nn.Linear(embed_dim * 4, outputSize) )

    if use_cuda then
        return FinalMLP:cuda()
    else
        return FinalMLP
    end
end

vocab_sized = getVocabSize(data_file)                       --- getting length of dictionary
vocab_sizeq = getVocabSize(query_file)                      --- getting length of dictionary
vocab_size = math.max(vocab_sized, vocab_sizeq)

queries = grabNsamples(query_file, #query_file-1, nil)      --- Extracting all queries
nuggets = grabNsamples(nugget_file, #nugget_file-1, nil)    --- Extracting all samples
maxseqlend = getMaxseq(data_file)                             --- Extracting maximum sequence length
maxseqlenq = getMaxseq(query_file)                            --- Extracting maximum sequence length
maxseqlen = math.max(maxseqlenq, maxseqlend)

batchLSTM = build_model(vocab_size, embed_dim, 1, usecuda)
crit = nn.MSECriterion()

...Utils file loaded	
<csv>	parsing file: /Users/franciscojavierarceo/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/2012_aurora_shooting_first_sentence_numtext.csv	


<csv>	parsing done	
<csv>	parsing file: /Users/franciscojavierarceo/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/aurora_nuggets_numtext.csv	
<csv>	parsing done	
<csv>	parsing file: /Users/franciscojavierarceo/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/queries_numtext.csv	
<csv>	parsing done	


In [20]:
batch_size = 10
nepochs = 1 
qs = queries[3]
x = data_file
model = batchLSTM
-- crit = crit
-- epsilon
-- delta
mxl = maxseqlen
-- base_explore_rate
-- print_every
-- nuggets
-- learning_rate
K = rK
use_cuda = usecuda

In [8]:
if use_cuda then
  Tensor = torch.CudaTensor
  LongTensor = torch.CudaLongTensor
  crit = crit:cuda()
  print("Running DQN-LSTM with the GPU")
else
  Tensor = torch.Tensor
  LongTensor = torch.LongTensor
  print("Running DQN-LSTM with the CPU")
end

rscores, pscores, fscores = {}, {}, {}
yrouge = torch.totable(torch.randn(#x))
summary_list = populateOnes(#x, K)
action_list = torch.totable(torch.round(torch.rand(#x)))
preds_list = torch.totable(torch.round(torch.rand(#x)))
print("training model...")

Running DQN-LSTM with the GPU	


training model...	


In [23]:
for epoch=0, nepochs, 1 do
    loss = 0                    --- Compute a new MSE loss each time
    --- Reset the rougue each time
    r_t1 , p_t1, f_t1 = 0., 0., 0.
    --- Looping over each bach of sentences for a given query
    nbatches = torch.floor( #x / batch_size)
    for minibatch = 1, nbatches do
        if minibatch == 1 then          -- Need +1 to skip the first row
            nstart = 2
            nend = torch.round(batch_size * minibatch)
        end
        if minibatch == nbatches then 
            nstart = nend + 1
            nend = #x
        end
        if minibatch > 1 and minibatch < nbatches then 
            nstart = nend + 1
            nend = torch.round(batch_size * minibatch)
        end
        --- This step is processing the data
        x_ss  = geti_n(x, nstart, nend)
        xout  = grabNsamples(x_ss, 1, #x_ss)     --- Extracting N samples
        print('pass')
        xs  = padZeros(xout, mxl)                 --- Padding the data by the maximum length
        qs2 = padZeros({qs}, 5)
        qrep = repeatQuery(qs2[1], #xs)
        preds = geti_n(preds_list, nstart, nend)
        sumry_ss = buildSummary(preds, xs, 0)

        sentences = LongTensor(xs):t()
        summary = LongTensor(sumry_ss):t()
        query = LongTensor(qrep):t()
        actions = torch.Tensor(geti_n(action_list, nstart, nend)):resize(#xs, 1)
        labels = torch.Tensor(geti_n(yrouge, nstart, nend))

        print(actions:sum(), actions:mean(), actions:min(), actions:max())

        if use_cuda then
             actions =  actions:cuda()
             labels = labels:cuda()
        end
        myPreds = model:forward({sentences, summary, query, actions})
        loss = loss + crit:forward(myPreds, labels)
        grads = crit:backward(myPreds, labels)
        model:backward({sentences, summary, query, actions}, grads)
        model:updateParameters(learning_rate)        -- Update parameters after each minibatch
        model:zeroGradParameters()

        if use_cuda then
            myPreds = myPreds:double()
        end

        preds = policy(myPreds, epsilon, #xs)
        --- Concatenating predictions into a summary
        predsummary = buildPredSummary(preds, xs, K)
        --- Initializing rouge metrics at time {t-1} and save scores
        for i=1, #predsummary do
            --- Calculating rouge scores; Call get_i_n() to cumulatively compute rouge
            rscores[i] = rougeRecall(geti_n(predsummary, 1, i), nuggets, K) - r_t1
            pscores[i] = rougePrecision(geti_n(predsummary, 1, i), nuggets, K) - p_t1
            fscores[i] = rougeF1(geti_n(predsummary, 1, i), nuggets, K) - f_t1
            r_t1, p_t1, f_t1 = rscores[i], pscores[i], fscores[i]
        end
        --- Updating change in rouge
        yrouge = updateTable(yrouge, fscores, nstart)
        preds_list = updateTable(preds_list, preds, nstart)
        action_list = updateTable(action_list, torch.totable(actions:double()), nstart)
        
        --- Calculating last one to see actual last rouge, without delta
        rscore = rougeRecall(predsummary, nuggets, K)
        pscore = rougePrecision(predsummary, nuggets, K)
        fscore = rougeF1(predsummary, nuggets, K)
        
        perf_string = string.format(
            "Epoch %i, sum(y)/len(y) = %i/%i, {Recall = %.6f, Precision = %.6f, F1 = %.6f}", 
            epoch, sumTable(preds_list), #preds_list, rscore, pscore, fscore
            )
        print(perf_string)
    end
    epsilon = epsilon - delta           --- Decreasing the epsilon greedy strategy
    if epsilon <= 0 then                --- leave a random exploration rate
        epsilon = base_explore_rate
    end
end

pass	


5	0.55555555555556	0	1	


Epoch 0, sum(y)/len(y) = 6224/12510, {Recall = 0.000000, Precision = 0.000000, F1 = 0.000000}	


pass	


5	0.55555555555556	0	1	


Epoch 1, sum(y)/len(y) = 6224/12510, {Recall = 0.000000, Precision = 0.000000, F1 = 0.000000}	


In [26]:
predsummary[1][1]

{
  1 : 235
  2 : 218
  3 : 129
  4 : 54
  5 : 23
  6 : 248
  7 : 62
  8 : 249
  9 : 119
}


In [29]:
n2 = geti_n(nuggets, 2, #nuggets)

In [44]:
unpackZeros(xs[1]), preds[1]

{
  1 : 21
  2 : 16
  3 : 20
  4 : 2
  5 : 1
  6 : 4
  7 : 11
  8 : 22
  9 : 19
}
0	


In [41]:
summary[1]

   0
   0
   0
   0
   0
   0
   0
   0
 132
[torch.CudaLongTensor of size 9]



In [40]:
labels:size()

 9
[torch.LongStorage of size 1]



In [50]:
predsummary[1]

{
  1 : 
    {
      1 : 235
      2 : 218
      3 : 129
      4 : 54
      5 : 23
      6 : 248
      7 : 62
      8 : 249
      9 : 119
    }
  2 : 
    {
      1 : 329
      2 : 334
      3 : 332
      4 : 23
      5 : 335
      6 : 129
      7 : 333
      8 : 330
      9 : 119
    }
  3 : 
    {
      1 : 76
      2 : 406
      3 : 450
      4 : 537
      5 : 535
      6 : 534
      7 : 536
    }
  4 : 
    {
      1 : 659
      2 : 661
      3 : 474
      4 : 660
      5 : 83
      6 : 663
      7 : 57
      8 : 658
      9 : 120
    }
}


In [52]:
rougeF1(predsummary[1], nuggets, K)

0.015711645101664	


In [56]:
rougeF1(geti_n(predsummary, 1, 7)[1], nuggets, K)

0.015711645101664	


In [58]:
#sumry_ss

9	


In [129]:
function buildPredSummary(pred_action, xs) 
    local predsummary = {}
    --- This looks stupid but it's right because we have to retain
    --- the tmp1 when it's not 1, so it's a running total
    local tmp1 = {}
    for i=1, #xs do
        tmp = unpackZeros(xs[i])
        if pred_action[i]== 1 then
            predsummary[i] = tmp1
        else
            predsummary[i] = tmp1
        end
    end
    return predsummary
end

In [73]:
pxs = buildPredSummary(preds, xs)

In [78]:
unpackZeros(xs[1])

{
  1 : 21
  2 : 16
  3 : 20
  4 : 2
  5 : 1
  6 : 4
  7 : 11
  8 : 22
  9 : 19
}


In [80]:
x_ss

{
  1 : 
    {
      1 : 21 16 20 2 1 4 11 22 19 9 14 20 25 24 23 24 8 5 7 17 3 12
    }
  2 : 
    {
      1 : 235 218 129 54 23 248 62 249 119 224 245 23 57 262 5 238 174 242 216 265 83 231 266 250 257 54 265 62 221 263 258 223 244 247 1 229 224 265 241 100 220 1 243 264 3 129 1 272 1 228 264 3 236 132 237 255 225 263 24 230 261 1 228 232 240 270 3 129 1 24 260 1 252 240 270 3 236 54 265 239 24 67 28 227 266 250 257 192 222 1 129 83 269 253
    }
  3 : 
    {
      1 : 329 334 332 23 335 129 333 330 119 321 63 336
    }
  4 : 
    {
      1 : 20 437 438 119 432 192 24 359 436 434 3 433 129 431 224 440 429 140 3 435 83 430 439 441
    }
  5 : 
    {
      1 : 76 406 450 537 535 534 536
    }
  6 : 
    {
      1 : 659 661 474 660 83 663 57 658 120 656 666 657 668 664 119 24 129 665 3 24 232 669 662
    }
  7 : 
    {
      1 : 708 709 590 712 714 1 76 710 83 1 57 53 3 129 520 321 707 716 1 711
    }
  8 : 
    {
      1 : 835 24 225 24 495 836 828 830 3 138 1 831 837 83 24 829 23 24 8

In [67]:
rougeF1(geti_n(sumry_ss, 1, 2), nuggets, K)

0	


In [96]:
rougePrecision({unpackZeros(xs[1]), unpackZeros(xs[2]), unpackZeros(xs[3])}, nuggets, 3)

0.40740740740741	


In [105]:
preds[1], unpackZeros(xs[1])

0	{
  1 : 21
  2 : 16
  3 : 20
  4 : 2
  5 : 1
  6 : 4
  7 : 11
  8 : 22
  9 : 19
}


In [110]:
preds[1]

0	


In [117]:
out = {}
out[1] = zero_or_x(preds[1], unpackZeros(xs[1]))

In [126]:
out[2] =  zero_or_x(preds[2], unpackZeros(xs[2]))
out[3] =  zero_or_x(preds[3], unpackZeros(xs[3]))

In [144]:
tmp_preds  = {preds[1], preds[2], preds[3] }
tmp_xs = {unpackZeros(xs[1]), unpackZeros(xs[2]), unpackZeros(xs[3])} 

In [145]:
tmp_xs[1]

{
  1 : 21
  2 : 16
  3 : 20
  4 : 2
  5 : 1
  6 : 4
  7 : 11
  8 : 22
  9 : 19
}


In [146]:
function buildPredSummary(preds, xs)
    local out = {}
    for i=1, #xs do
        if i == 1 then 
            out[i] = zero_or_x(preds[i], unpackZeros(xs[i]))
        else 
            --- Update it by adding xs_i and out_{i-1}
            out[i] =  zero_or_x(preds[i], unpackZeros(xs[i]))
        end
    end
    return out
end

In [159]:
tst = buildPredSummary(geti_n(tmp_preds, 1, 2), tmp_xs)

In [160]:
rougeF1(tst, nuggets)

0.006514657980456	


In [173]:
for i=1, 9 do
    print(rougePrecision(buildPredSummary(geti_n(preds, 1, i), geti_n(xout, 1, i)), nuggets))
end

0.22222222222222	


0.38888888888889	
0.36842105263158	
0.35	
0.37037037037037	
0.41666666666667	


0.40540540540541	
0.39473684210526	
0.38461538461538	
