In [1]:
require 'nn'
require 'rnn'
require 'image'
require 'optim'

In [2]:
-- Some useful functions
function genNbyK(n, k, a, b)
    out = torch.LongTensor(n, k)
    for i=1, n do
        for j = 1, k do
            out[i][j] = torch.random(a, b)
        end
    end
    return out
end

function buildModel(model, vocabSize, embeddingSize, metric, adapt, use_cuda)
    -- Small experiments seem to show that the Tanh activations performed better\
    --      than the ReLU for the bow model
    if model == 'bow' then
        print(string.format("Running bag-of-words model to learn %s", metric))
        sentenceLookup = nn.Sequential()
                    :add(nn.LookupTableMaskZero(vocabSize, embeddingSize))
                    :add(nn.Sum(2, 3, true)) -- Not averaging blows up model so keep this true
                    :add(nn.Tanh())
    else
        print(string.format("Running LSTM model to learn %s", metric))
        sentenceLookup = nn.Sequential()
                    :add(nn.LookupTableMaskZero(vocabSize, embeddingSize))
                    :add(nn.SplitTable(2))
                    :add(nn.Sequencer(nn.LSTM(embeddingSize, embeddingSize)))
                    :add(nn.SelectTable(-1))            -- selects last state of the LSTM
                    :add(nn.Linear(embeddingSize, embeddingSize))
                    :add(nn.ReLU())
    end
    local queryLookup = sentenceLookup:clone("weight", "gradWeight") 
    local summaryLookup = sentenceLookup:clone("weight", "gradWeight")
    local pmodule = nn.ParallelTable()
                :add(sentenceLookup)
                :add(queryLookup)
                :add(summaryLookup)

    if model == 'bow' then
        nnmodel = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(nn.Tanh())
            :add(nn.Linear(embeddingSize * 3, 2))
    else
        nnmodel = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(nn.ReLU())
            :add(nn.Linear(embeddingSize * 3, 2))
    end

    if adapt then 
        print("Adaptive regularization")
        local logmod = nn.Sequential()
            :add(nn.Linear(embeddingSize * 3, 1))
            :add(nn.LogSigmoid())
            :add(nn.SoftMax())

        local regmod = nn.Sequential()
            :add(nn.Linear(embeddingSize * 3, 2))

        local fullmod = nn.ConcatTable()
            :add(regmod)
            :add(logmod)

        local final = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(fullmod)

        nnmodel = final
    end

    if use_cuda then
        return nnmodel:cuda()
    end
    return nnmodel
end

function Tokenize(inputdic)
    --- This function tokenizes the words into a unigram dictionary
    local out = {}

    for k, v in pairs(inputdic) do
        if out[v] == nil then
            out[v] = 1
        else 
            out[v] = 1 + out[v]
        end
    end
    return out
end

function rougeScores(genSummary, refSummary)
    local genTotal = 0
    local refTotal = 0
    local intersection = 0
    -- Inserting the missing keys
    for k, genCount in pairs(genSummary) do
        if refSummary[k] == nil then
            refSummary[k] = 0
        end
    end
    for k, refCount in pairs(refSummary) do
        local genCount = genSummary[k]
        if genCount == nil then 
            genCount = 0 
        end
        intersection = intersection + math.min(refCount, genCount)
        refTotal = refTotal + refCount
        genTotal = genTotal + genCount
    end

    recall = intersection / refTotal
    prec = intersection / genTotal
    if refTotal == 0 then
        recall = 0
    end 
    if genTotal == 0 then
        prec = 0
    end
    -- tmp = {intersection, refTotal, genTotal}
    if recall > 0 or prec > 0 then
        f1 = (2 * recall * prec) / (recall + prec)
    else 
        f1 = 0
    end
    return recall, prec, f1
end

In [3]:
function buildPredsummary(summary, chosenactions, inputsentences, select_index)
    if summary == nil then
        summary = torch.zeros(inputsentences:size())
    end
    for i=1, chosenactions:size(1) do
        -- the 2 is for the SELECT index, will have to make this more general later
        if chosenactions[i][select_index] == 1 then
            summary[i]:copy(inputsentences[i])
        end
    end    
    return summary
end

function buildPredsummaryFast(summary, chosenactions, inputsentences, select_index)
    n = inputsentences:size(1)
    k = inputsentences:size(2)
    if summary == nil then
        summary = torch.zeros(inputsentences:size())
    end
    actionmatrix = chosenactions:select(2, select_index):clone():resize(n, 1):view(n, 1):expand(n, k):clone()
    --     This line didn't work for whatever reason...gives weird indexing...
    --     actionmatrix = chosenactions:select(2, select_index):resize(1, n):view(n, 1):expand(n, k):clone()
    return actionmatrix:cmul(inputsentences:double())
end

In [4]:
function buildTotalSummary(predsummary, totalPredsummary)
    nps = predsummary:size(1)
    n_l = totalPredsummary:size(2)
    indices = torch.linspace(1, n_l, n_l):long() 
    for i=1, predsummary:size(1) do
        if predsummary[i]:sum() > 0 then 
            maxindex = 0
            for j = 1, totalPredsummary[i]:size(1) do 
                if totalPredsummary[i][j] == 0 then
                    maxindex = maxindex + 1
                end
            end
            lenx = predsummary[i]:size(1)
            totalPredsummary[i][{{maxindex - lenx + 1, maxindex}}]:copy(predsummary[i])
        end
    end
end

function buildTotalSummaryFast(predsummary, totalPredsummary)
    nps = predsummary:size(1)
    n_l = totalPredsummary:size(2)
    indices = torch.linspace(1, n_l, n_l):long() 
    for i=1, predsummary:size(1) do
        if predsummary[i]:sum() > 0 then 
            -- Finding the largest index with a zero
            maxindex = torch.max(indices[torch.eq(totalPredsummary[i], 0)])
            lenx = predsummary[i]:size(1)
            totalPredsummary[i][{{maxindex - lenx + 1, maxindex}}]:copy(predsummary[i])
        end
    end
end

In [5]:
-- Setting parameters
n = 10
n_s = 5
k = 7
q = 5
a = 1
b = 100
embDim = 50
SKIP = 1
SELECT = 2
epsilon = 1
nepochs = 1000
fast = true

maskLayer = nn.MaskedSelect()
optimParams = { learningRate = 0.1 }

# Simulating the data

In [6]:
-- Simulating streams and queries
queries = genNbyK(n, q, a, b)

-- Note that the sentences are batched by sentence index so sentences[1] is the first sentence of each article
sentences = {}
for i=1, n_s do
    sentences[i] = genNbyK(n, k, a, b)
end

-- Optimal predicted summary
trueSummary = torch.zeros(n, k * n_s)
-- Using this to generate the optimal actions
true_actions = {}
for i=1, n_s do 
    ---- Simulating the data
    trueqValues = torch.rand(n, 2)
    
     ---- Generating the max values and getting the indices
    qMaxtrue, qindxtrue = torch.max(trueqValues, 2)
    
    --- I want to select the qindx elements for each row
    true_actions[i] = torch.zeros(n, 2):scatter(2, qindxtrue, torch.ones(trueqValues:size()))
    best_sentences = buildPredsummaryFast(best_sentences, true_actions[i], sentences[i], SELECT)
    buildTotalSummaryFast(best_sentences, trueSummary)
end

qTokens = {}
for i=1, n do
    qTokens[i] = Tokenize(trueSummary[i]:totable())
end

# Scoring the rougue metrics on the simulation

In [7]:
print(rougeScores(Tokenize(trueSummary[1]:totable()), Tokenize(trueSummary[1]:totable())))

1	1	1	


# Building the model

In [8]:
model = buildModel('bow', b, embDim, 'f1', false, false)

params, gradParams = model:getParameters()
criterion = nn.MSECriterion()

Running bag-of-words model to learn f1	


# Scoring the model on the sentences

In [9]:
nepochs = 100

In [15]:
totalPredsummary = {}
qValues = {}
qActions = {}
qPreds = {}
rewards = {}
lossfull = {}
rouguef1 = {}

for epoch=1, nepochs do
    for i = 1, n_s do
        --- Initializing things
        if epoch == 1 then 
            qPreds[i] = torch.zeros(n, 2)
            qValues[i] = torch.zeros(n, 1) 
            qActions[i] = torch.zeros(n, 1)
            rewards[i] = torch.zeros(n, 1)
            totalPredsummary[i] = torch.LongTensor(n, n_s * k):fill(0)
        else
            --- Reset things
            qPreds[i]:fill(0)
            qValues[i]:fill(0)
            qActions[i]:fill(0)
            rewards[i]:fill(0)
            totalPredsummary[i]:fill(0)
        end 
    end
    for i=1, n_s do
        if torch.uniform(0, 1) <= epsilon then 
            qPreds[i]:copy(torch.rand(n, 2))
            -- Need to run a forward pass for the backward to work...wonky
            ignore = model:forward({sentences[i], queries, totalPredsummary[i]})
        else 
            qPreds[i]:copy(model:forward({sentences[i], queries, totalPredsummary[i]}) )
        end 
        if fast then 
            qMax, qindx = torch.max(qPreds[i], 2)  -- Pulling the best actions
            -- Here's the fast way to select the optimal action for each query
            qActions[i] = torch.zeros(n, 2):scatter(2, qindx, torch.ones(qPreds[i]:size())):clone()
            qValues[i]:copy(qMax)
            predsummary = buildPredsummaryFast(predsummary, qActions[i], sentences[i], SELECT)
            buildTotalSummaryFast(predsummary, totalPredsummary[i])
        else 
            actions = torch.zeros(n, 2)
            for j=1, n do
                if preds[j][SELECT] > preds[j][SKIP] then
                    actions[j][SELECT] = 1
                else
                    actions[j][SKIP] = 1
                end
            end
            predsummary = buildPredsummary(predsummary, actions, sentences[i], SELECT)
            buildTotalSummary(predsummary, totalPredsummary[i])       
        end
        for j = 1, n do
            recall, prec, f1 = rougeScores( Tokenize(trueSummary[j]:totable()), 
                                            Tokenize(totalPredsummary[i][j]:totable()) )
            rewards[i][j]:fill(f1)
        end
    end
    rouguef1[epoch] = rewards[n_s]:sum() / rewards[n_s]:size(1)

    lossv = {}
    --- This backprops through the sentences sequentially...which is fine for now
    for i=1, n_s do
        function feval(params)
        --     model:clearState()
            gradParams:zero()
            lossf = criterion:forward(qValues[i], rewards[i])
            local gradOutput = criterion:backward(qValues[i], rewards[i])
            local gradMaskLayer = maskLayer:backward({qPreds[i], qActions[i]:byte()}, gradOutput:resize(rewards[i]:size(1)))
            model:backward({sentences[i], queries, totalPredsummary[i]}, gradMaskLayer[1] )
            return lossf, gradParams
        end
        _, lossf = optim.rmsprop(feval, params, optimParams)
        lossv[i] = lossf[1]
    end
    lossfull[epoch] = torch.Tensor(lossv):sum() / #lossv
    epsilon = epsilon / 2.
end

In [16]:
Plot = require 'itorch.Plot'

loss = torch.Tensor(lossfull)
rougue = torch.Tensor(rouguef1)
indices = torch.linspace(1, loss:size(1), loss:size(1)):long() 

In [17]:
plot = Plot():line(indices, loss, 'red', 'hi'):title('Plot of loss'):draw()
plot = Plot():line(indices, rougue, 'blue', 'hi'):title('Plot of Rougue-F1'):draw()

    # This is on a single example
    
    i=1
    xinput = {sentences[i], queries, totalPredsummary[i]}

    gradParams:zero()
    print(criterion:forward(qValues[i], rewards[i]))

    gradOutput = criterion:backward(qValues[i], rewards[i])
    gradMaskLayer = maskLayer:backward({qPreds[i], qActions[i]:byte()}, gradOutput:resize(10))
    model:backward(xinput, gradMaskLayer[1])

    function feval(params, xinput, predQOnActions, predQ, reward, actions_in, gradOutput)
        gradParams:zero()
        lossf = criterion:forward(predQOnActions, reward)
        local gradOutput = criterion:backward(predQOnActions, reward)
        local gradMaskLayer = maskLayer:backward({predQ, actions_in}, gradOutput:resize(reward:size(1)))
        model:backward(xinput, gradMaskLayer[1])
        return lossf, gradParams
    end

    i = 1
    xinput = {sentences[i], queries, totalPredsummary[i]}
    feval(params, xinput,  qValues[i], qPreds[i], rewards[i], qActions[i]:byte(), gradOutput)

In [None]:
function stackMemory(newinput, memory_hist, memsize, adapt, use_cuda)
    local sentMemory = torch.cat(newinput[1][1]:double(), memory_hist[1][1]:double(), 1)
    local queryMemory = torch.cat(newinput[1][2]:double(), memory_hist[1][2]:double(), 1)
    local sumryMemory = torch.cat(newinput[1][3]:double(), memory_hist[1][3]:double(), 1)
    local rewardMemory = torch.cat(newinput[2]:double(), memory_hist[2]:double(), 1)

    if adapt then
        regMemory = torch.cat(newinput[4]:double(), memory_hist[4]:double(), 1)
    end 

    if use_cuda then 
        actionMemory = torch.cat(newinput[3]:double(), memory_hist[3]:double(), 1)
    else 
        actionMemory = torch.cat(newinput[3], memory_hist[3], 1)
    end
    --- specifying rows to index 
    if sentMemory:size(1) <= memsize then
        nend = sentMemory:size(1)
        nstart = 1
    else 
        nstart = math.max(memsize - sentMemory:size(1), 1)
        nend = memsize + nstart
    end
    --- Selecting n last data points
    sentMemory = sentMemory[{{nstart, nend}}]
    queryMemory = queryMemory[{{nstart, nend}}]
    sumryMemory = sumryMemory[{{nstart, nend}}]
    rewardMemory = rewardMemory[{{nstart, nend}}]
    actionMemory = actionMemory[{{nstart, nend}}]

    if use_cuda then
        inputMemory = {sentMemory:cuda(), queryMemory:cuda(), sumryMemory:cuda()}
        rewardMemory = rewardMemory:cuda()
        actionMemory = torch.ByteTensor(#actionMemory):copy(actionMemory):cuda()
    end

    inputMemory = {sentMemory, queryMemory, sumryMemory}
    if adapt then
        regMemory = regMemory[{{nstart, nend}}]
        return {inputMemory, rewardMemory, actionMemory, regMemory}
    end 
    return {inputMemory, rewardMemory, actionMemory}
end    