In [1]:
require 'torch'
require 'nn'
require 'rnn'
require 'csvigo'
require 'cutorch'
require 'cunn'
require 'cunnx'

In [8]:
nn_model = 'lstm'
nepochs = 200
K_tokens = 20
J_sentences = 10
batch_size = 200
thresh = 0.00
embed_dim = 50
learning_rate = 0.1
print_every = 1
usecuda = false
epsilon = 1
cuts = 4
base_explore_rate = 0.0
skip_rate = 0.
metric = "f1"

--- Loading utility script
dofile("utils.lua")
dofile("model_utils.lua")

torch.manualSeed(420)
math.randomseed(420)

data_path = '~/GitHub/DeepNLPQLearning/DO_NOT_UPLOAD_THIS_DATA/0-output/'

query_fn = data_path .. 'queries_numtext.csv'
query_file =  csvigo.load({path = query_fn, mode = "large", verbose = false})
queries = buildTermDocumentTable(query_file, nil)

aurora = {
        ['inputs'] = '2012_aurora_shooting_first_sentence_numtext2.csv', 
        ['nuggets'] = 'aurora_nuggets_numtext.csv',
        ['query'] = queries[3],
        ['query_name'] = 'aurora'
}

inputs = {
        aurora
    }
--- Only using epsilon greedy strategy for (nepochs/cuts)% of the epochs
delta = 1./(nepochs/cuts) 
crit = nn.MSECriterion()

...Utils file loaded	


In [9]:
input_path, query_file, batch_size, nepochs, inputs = data_path, query_file, batch_size, nepochs, inputs
nn_model, crit, thresh, embed_dim, epsilon, delta = nn_model, crit, thresh, embed_dim, epsilon, delta
base_explore_rate, print_every = base_explore_rate, print_every
learning_rate, J_sentences, K_tokens, use_cuda = learning_rate, J_sentences, K_tokens, usecuda
skiprate, emetric = skip_rate, metric

In [7]:
dofile("model_utils.lua")




In [10]:
model  = build_model(nn_model, vocab_size, embed_dim, use_cuda)

Running LSTM model	


model_utils.lua:56: attempt to index global 'mod1' (a nil value)
stack traceback:
	model_utils.lua:56: in function 'build_model'
	[string "model  = build_model(nn_model, vocab_size, em..."]:1: in main chunk
	[C]: in function 'xpcall'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:210: in function <...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:174>
	...ojavierarceo/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x01084b0b90: 

In [4]:
if use_cuda then
  Tensor = torch.CudaTensor
  LongTensor = torch.CudaLongTensor
  crit = crit:cuda()
  print("...running on GPU")
else
  Tensor = torch.Tensor
  LongTensor = torch.LongTensor
  print("...running on CPU")
end

print_string = string.format(
    "training model with metric = %s, learning rate = %.3f, K = %i, J = %i, threshold = %.3f, embedding size = %i",
            emetric, learning_rate, K_tokens, J_sentences, thresh, embed_dim, batch_size
            )

print(print_string)

vocab_size = 0
maxseqlen = 0
maxseqlenq = getMaxseq(query_file)

action_query_list = {}
yrougue_query_list = {}
pred_query_list = {}

--- Initializing query information
for query_id = 1, #inputs do
    input_fn = inputs[query_id]['inputs']
    nugget_fn = inputs[query_id]['nuggets']

    input_file = csvigo.load({path = input_path .. input_fn, mode = "large", verbose = false})
    nugget_file = csvigo.load({path = input_path .. nugget_fn, mode = "large", verbose = false})
    input_file = geti_n(input_file, 2, #input_file) 
    nugget_file = geti_n(nugget_file, 2, #nugget_file) 

    vocab_sized = getVocabSize(input_file)
    vocab_sizeq = getVocabSize(query_file)
    vocab_size = math.max(vocab_size, vocab_sized, vocab_sizeq)

    maxseqlend = getMaxseq(input_file)
    maxseqlen = math.max(maxseqlen, maxseqlenq, maxseqlend)
    action_list = torch.totable(torch.round(torch.rand(#input_file)))

    --- initialize the query specific lists
    action_query_list[query_id] = action_list
    yrougue_query_list[query_id] = torch.totable(torch.randn(#input_file, 1)) --- Actual
    pred_query_list[query_id] = torch.totable(torch.zeros(#input_file, 1))    --- Predicted
end

model  = build_model(model, vocab_size, embed_dim, use_cuda)

for epoch=0, nepochs, 1 do
    loss = 0.                    --- Compute a new MSE loss each time
    --- Looping over each bach of sentences for a given query
    for query_id = 1, #inputs do
        --- Grabbing all of the input data
        qs = inputs[query_id]['query']
        input_file = csvigo.load({path = input_path .. inputs[query_id]['inputs'], mode = "large", verbose = false})
        nugget_file = csvigo.load({path = input_path .. inputs[query_id]['nuggets'], mode = "large", verbose = false})
        --- Dropping the headers
        input_file = geti_n(input_file, 2, #input_file) 
        nugget_file = geti_n(nugget_file, 2, #nugget_file) 

        --- Building table of all of the input sentences
        local nuggets = buildTermDocumentTable(nugget_file, nil)
        local xtdm  = buildTermDocumentTable(input_file, K_tokens)

        --- Extracting the query specific summaries, actions, and rougue
        action_list = action_query_list[query_id]
        yrougue = yrougue_query_list[query_id] 
        preds = pred_query_list[query_id]

        --- Loop over file to execute forward pass to estimate expected rougue
        for minibatch = 1, #xtdm do
            --- Notice that the actionlist is optimized at after each iteration
            local summaries = padZeros(buildCurrentSummary(action_list, xtdm, 
                                    K_tokens * J_sentences), 
                                    K_tokens * J_sentences)
            sentence = LongTensor(padZeros( {xtdm[minibatch]}, K_tokens) ):t()
            summary = LongTensor({ summaries[minibatch] }):t()
            query = LongTensor( padZeros({qs}, 5) ):t()

            --- Retrieve intermediate optimal action in model.get(3).output
            local pred_rougue = model:forward({sentence, summary, query})   
            local pred_actions = torch.totable(model:get(3).output)
            opt_action = (pred_actions[1][1] > pred_actions[1][2]) and 1 or 0

            -- doing this way works just fine...                
            -- local labels = Tensor(yrougue[minibatch])
            -- local grads = crit:backward(pred_rougue, labels)
            -- model:zeroGradParameters()
            -- model:backward({sentence, summary, query}, grads)
            -- model:updateParameters(learning_rate)

            -- Updating our book-keeping tables
            preds[minibatch] = pred_rougue
            action_list[minibatch] = opt_action
        end
        --- Updating variables
        action_query_list[query_id] = action_list
        yrougue_query_list[query_id] = yrouge
        pred_query_list[query_id] = preds

        --- Note setting the skip_rate = 0 means no random skipping of delta calculation
        labels, opt_action = score_model(action_list, 
                                        xtdm,
                                        epsilon, 
                                        thresh, 
                                        skiprate, 
                                        emetric)

        --- Rerunning on the scoring on the full data and rescoring cumulatively
        --- Execute policy and evaluation based on our E[ROUGUE] after all of the minibatches
            --- Notice that pred_rougue gives us our optimal action by returning
            ---  E[ROUGUE | Select ] > E[ROUGUE | Skip]
        predsummary = buildPredSummary(action_list, xtdm, nil)
        predsummary = predsummary[#predsummary]

        rscore = rougeRecall({predsummary}, nuggets)
        pscore = rougePrecision({predsummary}, nuggets)
        fscore = rougeF1({predsummary}, nuggets)

        if (epoch % print_every)==0 then
            perf_string = string.format(
                "Epoch %i, epsilon = %.3f, sum(y)/len(y) = %i/%i, {Recall = %.6f, Precision = %.6f, F1 = %.6f}, query = %s", 
                epoch, epsilon, sumTable(action_list), #action_list, rscore, pscore, fscore, inputs[query_id]['query_name']
                )
            print(perf_string)
        end

        --- creating the indices we want
        -- local qindices = {}
        local xindices = {}
        for i=1, batch_size do
            -- qindices[i] = math.random(1, #inputs)
            xindices[i] = math.random(1, #xtdm)
        end

        local summaries = padZeros(buildCurrentSummary(action_list, xtdm, 
                                    K_tokens * J_sentences), 
                                    K_tokens * J_sentences)

        --- Backward step
        for i= 1, batch_size do
            print('running backprop')
            sentence = LongTensor(padZeros( {xtdm[xindices[i]]}, K_tokens) ):t()
            summary = LongTensor({summaries[xindices[i]]}):t()
            query = LongTensor(padZeros({qs}, 5)):t()

            labels = Tensor(yrougue[xindices[i]])
            pred_rougue = Tensor(preds[xindices[i]])

            print(sentence:size(), summary:size(), query:size(), pred_rougue:size(), labels:size())
            print(sentence:sum(), summary:sum(), query:sum(), pred_rougue:sum(), labels:sum())
            -- print(xtdm[xindices[i]], summaries[xindices[i]], qs, pred_rougue, labels )
            --- Backprop model
            loss = loss + crit:forward(pred_rougue, labels)
            local grads = crit:backward(pred_rougue, labels)
            model:zeroGradParameters()
            model:backward({sentence, summary, query}, grads)
            model:updateParameters(learning_rate)
            print('pass', i)
        end 
    end -- ends the query loop
    if (epsilon - delta) <= base_explore_rate then                --- and leaving a random exploration rate
        epsilon = base_explore_rate
    else 
        epsilon = epsilon - delta           --- Decreasing the epsilon greedy strategy
    end
end

...running on CPU	
training model with metric = f1, learning rate = 0.100, K = 20, J = 10, threshold = 0.000, embedding size = 50	


model_utils.lua:56: attempt to index global 'mod1' (a nil value)
stack traceback:
	model_utils.lua:56: in function 'build_model'
	[string "if use_cuda then..."]:51: in main chunk
	[C]: in function 'xpcall'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:210: in function <...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:174>
	...ojavierarceo/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...vierarceo/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	...ojavierarceo/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x01084b0b90: 