In [1]:
require 'torch'
require 'nn'
require 'rnn'
require 'csvigo'
require 'cutorch'
require 'cunn'
require 'cunnx'

In [2]:
nn_model = 'lstm'
nepochs = 200
K_tokens = 20
J_sentences = 10
batch_size = 200
thresh = 0.00
embed_dim = 50
learning_rate = 0.1
print_every = 1
usecuda = false
epsilon = 1
cuts = 4
base_explore_rate = 0.0
skip_rate = 0.
metric = "f1"

--- Loading utility script
dofile("utils.lua")
dofile("model_utils.lua")

torch.manualSeed(420)
math.randomseed(420)

data_path = '~/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/'

query_fn = data_path .. 'queries_numtext.csv'
query_file =  csvigo.load({path = query_fn, mode = "large", verbose = false})
queries = buildTermDocumentTable(query_file, nil)

aurora = {
        ['inputs'] = '2012_aurora_shooting_first_sentence_numtext2.csv', 
        ['nuggets'] = 'aurora_nuggets_numtext.csv',
        ['query'] = queries[3],
        ['query_name'] = 'aurora'
}

inputs = {
        aurora
    }
--- Only using epsilon greedy strategy for (nepochs/cuts)% of the epochs
delta = 1./(nepochs/cuts) 
crit = nn.MSECriterion()

...Utils file loaded	


In [3]:
input_path, query_file, batch_size, nepochs, inputs = data_path, query_file, batch_size, nepochs, inputs
nn_model, crit, thresh, embed_dim, epsilon, delta = nn_model, crit, thresh, embed_dim, epsilon, delta
base_explore_rate, print_every = base_explore_rate, print_every
learning_rate, J_sentences, K_tokens, use_cuda = learning_rate, J_sentences, K_tokens, usecuda
skiprate, emetric = skip_rate, metric

In [4]:
if use_cuda then
  Tensor = torch.CudaTensor
  LongTensor = torch.CudaLongTensor
  crit = crit:cuda()
  print("...running on GPU")
else
  Tensor = torch.Tensor
  LongTensor = torch.LongTensor
  print("...running on CPU")
end

print_string = string.format(
    "training model with metric = %s, learning rate = %.3f, K = %i, J = %i, threshold = %.3f, embedding size = %i",
            emetric, learning_rate, K_tokens, J_sentences, thresh, embed_dim, batch_size
            )

print(print_string)

vocab_size = 0
maxseqlen = 0
maxseqlenq = getMaxseq(query_file)

action_query_list = {}
yrougue_query_list = {}
pred_query_list = {}

--- Initializing query information
for query_id = 1, #inputs do
    input_fn = inputs[query_id]['inputs']
    nugget_fn = inputs[query_id]['nuggets']

    input_file = csvigo.load({path = input_path .. input_fn, mode = "large", verbose = false})
    nugget_file = csvigo.load({path = input_path .. nugget_fn, mode = "large", verbose = false})
    input_file = geti_n(input_file, 2, #input_file) 
    nugget_file = geti_n(nugget_file, 2, #nugget_file) 

    vocab_sized = getVocabSize(input_file)
    vocab_sizeq = getVocabSize(query_file)
    vocab_size = math.max(vocab_size, vocab_sized, vocab_sizeq)

    maxseqlend = getMaxseq(input_file)
    maxseqlen = math.max(maxseqlen, maxseqlenq, maxseqlend)
    action_list = torch.totable(torch.round(torch.rand(#input_file)))

    --- initialize the query specific lists
    action_query_list[query_id] = action_list
    yrougue_query_list[query_id] = torch.totable(torch.randn(#input_file, 1)) --- Actual
    pred_query_list[query_id] = torch.totable(torch.zeros(#input_file, 1))    --- Predicted
end
model  = build_model(nn_model, vocab_size, embed_dim, use_cuda)

...running on CPU	
training model with metric = f1, learning rate = 0.100, K = 20, J = 10, threshold = 0.000, embedding size = 50	


Running LSTM model	


In [48]:
for epoch=0, nepochs, 1 do
    loss = 0.                    --- Compute a new MSE loss each time
    --- Looping over each bach of sentences for a given query
    for query_id = 1, #inputs do
        --- Grabbing all of the input data
        qs = inputs[query_id]['query']
        input_file = csvigo.load({path = input_path .. inputs[query_id]['inputs'], mode = "large", verbose = false})
        nugget_file = csvigo.load({path = input_path .. inputs[query_id]['nuggets'], mode = "large", verbose = false})
        --- Dropping the headers
        input_file = geti_n(input_file, 2, #input_file) 
        nugget_file = geti_n(nugget_file, 2, #nugget_file) 

        --- Building table of all of the input sentences
        nuggets = buildTermDocumentTable(nugget_file, nil)
        xtdm  = buildTermDocumentTable(input_file, K_tokens)

        --- Extracting the query specific summaries, actions, and rougue
        action_list = action_query_list[query_id]
        yrougue = yrougue_query_list[query_id] 
        preds = pred_query_list[query_id]

        --- Loop over file to execute forward pass to estimate expected rougue
        for minibatch = 1, #xtdm do
            --- Notice that the actionlist is optimized at after each iteration
            local summaries = padZeros(buildCurrentSummary(action_list, xtdm, 
                                    K_tokens * J_sentences), 
                                    K_tokens * J_sentences)
            sentence = LongTensor(padZeros( {xtdm[minibatch]}, K_tokens) ):t()
            summary = LongTensor({ summaries[minibatch] }):t()
            query = LongTensor( padZeros({qs}, 5) ):t()

            --- Retrieve intermediate optimal action in model.get(3).output
            pred_rougue = model:forward({sentence, summary, query})   
            pred_actions = torch.totable(model:get(3).output)
            opt_action = (pred_actions[1][1] > pred_actions[1][2]) and 1 or 0

            -- Updating our book-keeping tables
            preds[minibatch] = pred_rougue[1]
            action_list[minibatch] = opt_action
        end

        --- Note setting the skip_rate = 0 means no random skipping of delta calculation
        yrougue = score_model(action_list, 
                            xtdm,
                            epsilon, 
                            thresh, 
                            skiprate, 
                            emetric)
        
        --- Updating variables
        pred_query_list[query_id] = preds
        yrougue_query_list[query_id] = yrougue
        action_query_list[query_id] = action_list

        --- Rerunning on the scoring on the full data and rescoring cumulatively
        --- Execute policy and evaluation based on our E[ROUGUE] after all of the minibatches
            --- Notice that pred_rougue gives us our optimal action by returning
            ---  E[ROUGUE | Select ] > E[ROUGUE | Skip]
        predsummary = buildPredSummary(action_list, xtdm, nil)
        predsummary = predsummary[#predsummary]

        rscore = rougeRecall({predsummary}, nuggets)
        pscore = rougePrecision({predsummary}, nuggets)
        fscore = rougeF1({predsummary}, nuggets)

        if (epoch % print_every)==0 then
            perf_string = string.format(
                "Epoch %i, epsilon = %.3f, sum(y)/len(y) = %i/%i, {Recall = %.6f, Precision = %.6f, F1 = %.6f}", 
                epoch, epsilon, sumTable(action_list), #action_list, rscore, pscore, fscore
                )
            print(perf_string)
        end

        --- creating the indices we want
        -- local qindices = {}
        xindices = {}
        for i=1, batch_size do
            -- qindices[i] = math.random(1, #inputs)
            xindices[i] = math.random(1, #xtdm)
        end

        summaries = padZeros(buildCurrentSummary(action_list, xtdm, 
                                    K_tokens * J_sentences), 
                                    K_tokens * J_sentences)

        --- Backward step
        for i= 1, batch_size do
            sentence = LongTensor(padZeros( {xtdm[xindices[i]]}, K_tokens) ):t()
            summary = LongTensor({summaries[xindices[i]]}):t()
            query = LongTensor(padZeros({qs}, 5)):t()

            labels = Tensor({yrougue[xindices[i]]})
            pred_rougue = Tensor({preds[xindices[i]]})

            --- Backprop model
            loss = loss + crit:forward(pred_rougue, labels)
            local grads = crit:backward(pred_rougue, labels)
            model:zeroGradParameters()
            --- For some reason doing this fixes it
            local tmp = model:forward({sentence, summary, query})
            model:backward({sentence, summary, query}, grads)
            model:updateParameters(learning_rate)
        end 
    end -- ends the query loop
    if (epsilon - delta) <= base_explore_rate then                --- and leaving a random exploration rate
        epsilon = base_explore_rate
    else 
        epsilon = epsilon - delta           --- Decreasing the epsilon greedy strategy
    end
end

Epoch 0, epsilon = 1.000, sum(y)/len(y) = 999/999, {Recall = 0.797317, Precision = 0.118764, F1 = 0.206734}	


Epoch 1, epsilon = 0.980, sum(y)/len(y) = 999/999, {Recall = 0.797317, Precision = 0.118764, F1 = 0.206734}	


Epoch 2, epsilon = 0.960, sum(y)/len(y) = 559/999, {Recall = 0.730714, Precision = 0.195538, F1 = 0.308517}	


Epoch 3, epsilon = 0.940, sum(y)/len(y) = 999/999, {Recall = 0.797317, Precision = 0.118764, F1 = 0.206734}	


In [None]:
grads = crit:backward(pred_rougue, labels)
model:zeroGradParameters()
model:backward({sentence, summary, query}, grads)
model:updateParameters(learning_rate)           

In [None]:
--- Updating variables
pred_query_list[query_id] = preds
yrougue_query_list[query_id] = yrougue
action_query_list[query_id] = opt_action

In [27]:
-- #query, #sentence, #summary, #pred_rougue, #labels
i = 1
sentence = LongTensor(padZeros( {xtdm[xindices[i]]}, K_tokens) ):t()
summary = LongTensor({summaries[xindices[i]]}):t()
query = LongTensor(padZeros({qs}, 5)):t()

labels = Tensor({yrougue[xindices[i]]})
pred_rougue = Tensor({preds[xindices[i]]})

In [37]:
--- Backprop model
loss = loss + crit:forward(pred_rougue, labels)
grads = crit:backward(pred_rougue, labels)

In [39]:
pred_rougue, labels, grads

0.01 *
 7.6549
[torch.DoubleTensor of size 1]

 0.2104
[torch.DoubleTensor of size 1]

-0.2678
[torch.DoubleTensor of size 1]



In [46]:
model:forward({sentence, summary, query})

 0.1565
[torch.DoubleTensor of size 1]



In [44]:
model:backward({sentence, summary, query}, grads)

{
  1 : LongTensor - size: 20x1
  2 : LongTensor - size: 200x1
  3 : LongTensor - size: 5x1
}


In [47]:
model:zeroGradParameters()
model:backward({sentence, summary, query}, grads)
model:updateParameters(learning_rate)

In [None]:
-- #query, #sentence, #summary, #pred_rougue, #labels
-- query, sentence, #summary, pred_rougue, labels

In [22]:
for i= 1, batch_size do
    print('running',i)
    sentence = LongTensor(padZeros( {xtdm[xindices[i]]}, K_tokens) ):t()
    summary = LongTensor({summaries[xindices[i]]}):t()
    query = LongTensor(padZeros({qs}, 5)):t()

    labels = Tensor({yrougue[xindices[i]]})
    pred_rougue = Tensor({preds[xindices[i]]})

    --- Backprop model
    loss = loss + crit:forward(pred_rougue, labels)
    local grads = crit:backward(pred_rougue, labels)
    model:zeroGradParameters()
    model:backward({sentence, summary, query}, grads)
    model:updateParameters(learning_rate)
end 

running	1	


...javierarceo/torch/install/share/lua/5.1/nn/Container.lua:67: 
In 1 module of nn.Sequential:
In 1 module of nn.ParallelTable:
In 3 module of nn.Sequential:
...iscojavierarceo/torch/install/share/lua/5.1/rnn/LSTM.lua:184: assertion failed!
stack traceback:
	[C]: in function 'assert'
	...iscojavierarceo/torch/install/share/lua/5.1/rnn/LSTM.lua:184: in function '_updateGradInput'
	...eo/torch/install/share/lua/5.1/rnn/AbstractRecurrent.lua:59: in function 'updateGradInput'
	...avierarceo/torch/install/share/lua/5.1/rnn/Sequencer.lua:121: in function <...avierarceo/torch/install/share/lua/5.1/rnn/Sequencer.lua:106>
	[C]: in function 'xpcall'
	...javierarceo/torch/install/share/lua/5.1/nn/Container.lua:63: in function 'rethrowErrors'
	...avierarceo/torch/install/share/lua/5.1/nn/Sequential.lua:55: in function <...avierarceo/torch/install/share/lua/5.1/nn/Sequential.lua:50>
	[C]: in function 'xpcall'
	...javierarceo/torch/install/share/lua/5.1/nn/Container.lua:63: in function 'rethrowErrors'
	...erarceo/torch/install/share/lua/5.1/nn/ParallelTable.lua:19: in function 'updateGradInput'
	...
	[C]: in function 'xpcall'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:210: in function <...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:174>
	...ojavierarceo/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0105660b90

WARNING: If you see a stack trace below, it doesn't point to the place where this error occurred. Please use only the one above.
stack traceback:
	[C]: in function 'error'
	...javierarceo/torch/install/share/lua/5.1/nn/Container.lua:67: in function 'rethrowErrors'
	...avierarceo/torch/install/share/lua/5.1/nn/Sequential.lua:88: in function 'backward'
	[string "for i= 1, batch_size do..."]:14: in main chunk
	[C]: in function 'xpcall'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:210: in function <...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:174>
	...ojavierarceo/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0105660b90: 