In [1]:
require 'nn'
require 'rnn'
require 'image'
require 'optim'
require 'parallel'
require 'cutorch'
require 'cunn'
require 'cunnx'
dl = require 'dataload'

In [2]:
dofile("DQN_Batch_Queries_Simulation.lua")




In [3]:
-- Setting parameters
n = 1
n_s = 5
k = 3
q = 4
a = 1
b = 10
embDim = 50
gamma = 0.0
SKIP = 1
SELECT = 2
epsilon = 1
nepochs = 400

base_explore_rate = 0.1
end_baserate = torch.round(nepochs * 0.25)
endexplorerate = 0.5

learning_rate = 1e-5
fast = true
usecuda = false
adapt = false
cuts = 4
mem_multiplier = 3
batch_size = 25
delta = cuts/nepochs
print_perf = false

In [4]:
torch.manualSeed(420)
if usecuda then
    Tensor = torch.CudaTensor
    LongTensor = torch.CudaLongTensor   
    ByteTensor = torch.CudaByteTensor
    maskLayer = nn.MaskedSelect():cuda()
    print("...running on GPU")
else
    Tensor = torch.Tensor
    LongTensor = torch.LongTensor
    ByteTensor = torch.ByteTensor
    maskLayer = nn.MaskedSelect()
    print("...running on CPU")
end

local SKIP = 1
local SELECT = 2

optimParams = { learningRate = learning_rate }
delta = cuts / nepochs
end_baserate = torch.round(nepochs * endexplorerate )

-- Simulating streams and queries
queries = genNbyK(n, q, a, b)

-- Note that the sentences are batched by sentence index so sentences[1] is the first sentence of each article
sentences = {}
for i=1, n_s do
    sentences[i] = genNbyK(n, k, a, b)
end

-- Optimal predicted summary
trueSummary = LongTensor(n, k * n_s):fill(0)

-- Using this to generate the optimal actions
true_actions = {}
for i=1, n_s do 
    ---- Simulating the data
    trueqValues = torch.rand(n, 2)
     ---- Generating the max values and getting the indices
    qMaxtrue, qindxtrue = torch.max(trueqValues, 2)

    --- I want to select the qindx elements for each row
    true_actions[i] = torch.zeros(n, 2):scatter(2, qindxtrue, torch.ones(trueqValues:size()))
    best_sentences = buildPredsummaryFast(true_actions[i], sentences[i], SELECT)
    trueSummary = buildTotalSummaryFast(best_sentences, trueSummary, usecuda)
end

tmptrueactions = Tensor(n_s * n, 2)
for i = 1, n_s do 
    tmptrueactions[i]:copy(true_actions[i])
end
print('true actions =')
print(tmptrueactions)
print('true summary =')
print(trueSummary)

qTokens = {}
for i=1, n do
    qTokens[i] = Tokenize(trueSummary[i]:totable())
end

-- Building the model
-- model = buildModel('bow', b, embDim, 'f1', adapt, usecuda)
model = buildModel('lstm', b, embDim, 'f1', adapt, usecuda)
params, gradParams = model:getParameters()

if adapt then 
    criterion = nn.ParallelCriterion():add(nn.MSECriterion()):add(nn.BCECriterion())
    criterion["weights"] = {1, adapt_lambda}
else 
    criterion = nn.MSECriterion()
end 

totalPredsummary = {}
qValues = {}
qActions = {}
qPreds = {}
rewards = {}
rougue_scores = {}
lossfull = {}
rouguef1 = {}

totalPredsummary = LongTensor(n, n_s * k):fill(0)

memfull = false
curr_memsize = 0
memsize = n * n_s * mem_multiplier
queryMemory = Tensor(memsize, q):fill(0)
qActionMemory = Tensor(memsize, 2):fill(0)
predSummaryMemory = Tensor(memsize, n_s * k):fill(0)
sentenceMemory = Tensor(memsize, k):fill(0)
qPredsMemory = Tensor(memsize, 2):fill(0)
qValuesMemory = Tensor(memsize, 1):fill(0)
rewardMemory = Tensor(memsize, 1):fill(0)

if adapt then
    regPreds = {}
    regMemory = Tensor(memsize, 1):fill(0) 
end
--- Initializing thingss
for i = 1, n_s do
    qPreds[i] = Tensor(n, 2):fill(0) 
    qValues[i] = Tensor(n, 1):fill(0)
    qActions[i] = Tensor(n, 2):fill(0)
    rewards[i] = Tensor(n, 1):fill(0)
    rougue_scores[i] = Tensor(n, 1):fill(0)
    if adapt then
        regPreds[i] = Tensor(n, 1):fill(0)
    end        
end 

if usecuda then
    criterion = criterion:cuda()
    model = model:cuda()
end

...running on CPU	
true actions =	
 1  0
 1  0
 0  1
 0  1
 1  0
[torch.DoubleTensor of size 5x2]

true summary =	
 9  3  4  7  3  9  0  0  0  0  0  0  0  0  0
[torch.LongTensor of size 1x15]

Running LSTM model to learn f1	


In [5]:
nClock = os.clock()
for epoch=1, nepochs do
    --- Reset things at the start of each epoch
    for i=1, n_s do
        qPreds[i]:fill(0)
        qValues[i]:fill(0)
        qActions[i]:fill(0)
        rougue_scores[i]:fill(0)
        rewards[i]:fill(0)
        totalPredsummary:fill(0)
        
        if adapt then
            regMemory[i]:fill(0)
        end        
    end

    for i=1, n_s do
        totalPreds = model:forward({queries, sentences[i], totalPredsummary})

        if adapt then 
            qPreds[i]:copy(totalPreds[1])
            regPreds[i]:copy(totalPreds[2])
        else
            qPreds[i]:copy(totalPreds)
        end

        if torch.uniform(0, 1) <= epsilon then
            -- randomly choosing actions
            xrand = torch.rand(qPreds[i]:size())
            qActions[i]:select(2, SELECT):copy(torch.ge(xrand:select(2, SELECT), xrand:select(2, SKIP)))
            qActions[i]:select(2, SKIP):copy(torch.ge(xrand:select(2, SKIP), xrand:select(2, SELECT)))
            qValues[i]:copy( maskLayer:forward({qPreds[i], qActions[i]:byte()}) )
        else 
            qMax, qindx = torch.max(qPreds[i], 2)  -- Pulling the best actions
            -- Here's the fast way to select the optimal action for each query
            qActions[i]:copy(
                qActions[i]:scatter(2, qindx, torch.ones(qPreds[i]:size())):clone()
            )
            qValues[i]:copy(
                qMax
            )
        end

        -- This is where we begin to store the data in our memory 
            -- notice that we store the reward after this part
        start_row = curr_memsize + 1
        if memsize < (start_row + n) then 
            start_row = memsize - n + 1
            end_row = start_row + n - 1
            curr_memsize = 0
            if (end_row + n) >= memsize then 
                memfull = true
            end 
        else 
            end_row = start_row + n - 1
            curr_memsize = end_row
        end
        
        -- Update memory sequentially until it's full then restart updating it
        queryMemory[{{start_row, end_row}}]:copy(queries)
        sentenceMemory[{{start_row, end_row}}]:copy(sentences[i])
        predSummaryMemory[{{start_row, end_row}}]:copy(totalPredsummary)
        
        -- Now that we've stored our memory, we can build the summary to evaluate our action
        predsummary = buildPredsummaryFast(qActions[i], sentences[i], SELECT)
        totalPredsummary = buildTotalSummaryFast(predsummary, totalPredsummary, usecuda)
        if i < n_s then
--             querytp1Memory[{{start_row, end_row}}]:copy(queries)
            sentencetp1Memory[{{start_row, end_row}}]:copy(sentences[i + 1])
            predSummarytp1Memory[{{start_row, end_row}}]:copy(totalPredsummary)
        end 
        
        qActionMemory[{{start_row, end_row}}]:copy(qActions[i])
        qPredsMemory[{{start_row, end_row}}]:copy(qPreds[i])
        qValuesMemory[{{start_row, end_row}}]:copy(qValues[i])

        if adapt then
            regMemory[{{start_row, end_row}}]:copy(regPreds[i])
        end


        for j = 1, n do
            recall, prec, f1 = rougeScores( Tokenize(totalPredsummary[j]:totable()),
                                            qTokens[j]
                )
            rougue_scores[i][j]:fill(f1)
        end

        if i == n_s then 
            rouguef1[epoch] = rougue_scores[i]:mean()
        end 

        if i > 1 then
            -- Calculating change in rougue f1
            rewards[i]:copy(rougue_scores[i] - rougue_scores[i-1])
        else 
            rewards[i]:copy(rougue_scores[i])
        end
    end
    tmp = Tensor(n_s * n, 2)
    tmpq = Tensor(n_s * n, 2) 
    for i = 1, n_s do 
        tmp[i]:copy(qActions[i])
        tmpq[i]:copy(qPreds[i])
    end
    
--     print(tmp:select(2, SELECT):clone():resize(1, 5))

    for i=1, n_s do
        -- this is how we incorporate the discount paremeter on future predictions
        if i  < n_s then
            rewardMemory[{{n * (i-1) + 1, n * i}}]:copy(
                    rewards[i] + (gamma * qValues[i + 1])
                )
        else
            -- for terminal predictions we use the final reward
            rewardMemory[{{n * (i-1) + 1, n * i}}]:copy(
                    rewards[i] 
                )
        end
    end

    if memfull then 
        memrows = memsize
    else 
        memrows = curr_memsize
    end

    if usecuda then 
        dataloader = dl.TensorLoader({
                        queryMemory[{{1, memrows}}]:cuda(), 
                        sentenceMemory[{{1, memrows}}]:cuda(), 
                        predSummaryMemory[{{1, memrows}}]:cuda(),
                        qPredsMemory[{{1, memrows}}]:cuda(), 
                        ByteTensor(memrows, 2):copy(qActionMemory[{{1, memrows}}]), 
                        qValuesMemory[{{1, memrows}}]:cuda()
                        }, 
                    rewardMemory[{{1, memrows}}]:cuda()
                )
        if adapt then            
            table.insert(dataloader['inputs'], regMemory[{{1, memrows}}]:cuda() )
        end
    else 
        dataloader = dl.TensorLoader({
                    queryMemory[{{1, memrows}}], 
                    sentenceMemory[{{1, memrows}}], 
                    predSummaryMemory[{{1, memrows}}], 
                    qPredsMemory[{{1, memrows}}], 
                    ByteTensor(memrows, 2):copy(qActionMemory[{{1, memrows}}]), 
                    qValuesMemory[{{1, memrows}}]
                    }, 
                rewardMemory[{{1, memrows}}]
            )
        if adapt then
            table.insert(dataloader['inputs'], regMemory[{{1, memrows}}] )
        end
    end
    loss = {}
    c = 1
    for k, xin, reward in dataloader:sampleiter(batch_size, memsize) do
         local function feval(params)
            gradParams:zero()
            if adapt then
                local ignore = model:forward({xin[1], xin[2], xin[3]})
                local predQOnActions = maskLayer:forward({xin[4], xin[5]}) 
                local ones = torch.ones(reward:size(1)):resize(reward:size(1))
                if usecuda then
                    ones = ones:cuda()
                end
                lossf = criterion:forward({predQOnActions, xin[7]}, {reward, ones})
                local gradOutput = criterion:backward({predQOnActions, xin[6]}, {reward, ones})
                local gradMaskLayer = maskLayer:backward({xin[4], xin[5]}, gradOutput[1])
                model:backward({xin[1], xin[2], xin[3]}, {gradMaskLayer[1], gradOutput[2]})
            else 
                local ignore = model:forward({xin[1], xin[2], xin[3]})
                local predQOnActions = maskLayer:forward({xin[4], xin[5]}) 
                lossf = criterion:forward(predQOnActions, reward)
                local gradOutput = criterion:backward(predQOnActions, reward)
                local gradMaskLayer = maskLayer:backward({xin[4], xin[5]}, gradOutput)
                model:backward({xin[1], xin[2], xin[3]}, gradMaskLayer[1])
            end 
            return lossf, gradParams
        end
        --- optim.rmsprop returns \theta, f(\theta):= loss function
         _, lossv  = optim.rmsprop(feval, params, optimParams)
        loss[c] = lossv[1]
        c = c + 1
    end

    lossfull[epoch] = Tensor(loss):sum() / #lossv
    if print_perf then
        print(
            string.format('epoch = %i; rougue = %.6f; epsilon = %.6f; loss = %.6f' , 
                epoch, rouguef1[epoch], epsilon, lossfull[epoch])
            )
    end

    if (epsilon - delta) <= base_explore_rate then
        epsilon = base_explore_rate
        if epoch > end_baserate then 
            base_explore_rate = 0.
        end
    else 
        epsilon = epsilon - delta
    end

end

In [6]:
print(string.format("Elapsed time: %.5f" % (os.clock()-nClock) ))
print(
    string.format('First rougue = %.6f; Last rougue = %.6f',
        rouguef1[1], rouguef1[nepochs]) 
    )

Elapsed time: 25.72021	
First rougue = 0.666667; Last rougue = 0.800000	


In [8]:
Plot = require 'itorch.Plot'

loss = torch.Tensor(lossfull)
rougue = torch.Tensor(rouguef1)
indices = torch.linspace(1, loss:size(1), loss:size(1)):long() 
plot = Plot():line(indices, loss, 'red', 'hi'):title('Plot of loss'):draw()
plot = Plot():line(indices, rougue, 'blue', 'hi'):title('Plot of Rougue-F1'):draw()