In [1]:
require 'nn'
require 'rnn'
require 'image'
require 'optim'
require 'parallel'
require 'cutorch'
require 'cunn'
require 'cunnx'
dl = require 'dataload'

In [2]:
-- Some useful functions
function genNbyK(n, k, a, b)
    out = torch.LongTensor(n, k)
    for i=1, n do
        for j = 1, k do
            out[i][j] = torch.random(a, b)
        end
    end
    return out
end

function buildModel(model, vocabSize, embeddingSize, metric, adapt, use_cuda)
    -- Small experiments seem to show that the Tanh activations performed better\
    --      than the ReLU for the bow model
    if model == 'bow' then
        print(string.format("Running bag-of-words model to learn %s", metric))
        sentenceLookup = nn.Sequential()
                    :add(nn.LookupTableMaskZero(vocabSize, embeddingSize))
                    :add(nn.Sum(2, 3, true)) -- Not averaging blows up model so keep this true
                    :add(nn.Tanh())
    else
        print(string.format("Running LSTM model to learn %s", metric))
        sentenceLookup = nn.Sequential()
                    :add(nn.LookupTableMaskZero(vocabSize, embeddingSize))
                    :add(nn.SplitTable(2))
                    :add(nn.Sequencer(nn.LSTM(embeddingSize, embeddingSize)))
                    :add(nn.SelectTable(-1))            -- selects last state of the LSTM
                    :add(nn.Linear(embeddingSize, embeddingSize))
                    :add(nn.ReLU())
    end
    local queryLookup = sentenceLookup:clone("weight", "gradWeight") 
    local summaryLookup = sentenceLookup:clone("weight", "gradWeight")
    local pmodule = nn.ParallelTable()
                :add(sentenceLookup)
                :add(queryLookup)
                :add(summaryLookup)

    if model == 'bow' then
        nnmodel = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(nn.Tanh())
            :add(nn.Linear(embeddingSize * 3, 2))
    else
        nnmodel = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(nn.ReLU())
            :add(nn.Linear(embeddingSize * 3, 2))
    end

    if adapt then 
        print("Adaptive regularization")
        local logmod = nn.Sequential()
            :add(nn.Linear(embeddingSize * 3, 1))
            :add(nn.LogSigmoid())
            :add(nn.SoftMax())

        local regmod = nn.Sequential()
            :add(nn.Linear(embeddingSize * 3, 2))

        local fullmod = nn.ConcatTable()
            :add(regmod)
            :add(logmod)

        local final = nn.Sequential()
            :add(pmodule)
            :add(nn.JoinTable(2))
            :add(fullmod)

        nnmodel = final
    end

    if use_cuda then
        return nnmodel:cuda()
    end
    return nnmodel
end

function Tokenize(inputdic)
    --- This function tokenizes the words into a unigram dictionary
    local out = {}
    for k, v in pairs(inputdic) do
        if v ~= 0 then 
            if out[v] == nil then
                out[v] = 1
            else 
                out[v] = 1 + out[v]
            end
        end
    end
    return out
end

function rougeScores(genSummary, refSummary)
    local genTotal = 0
    local refTotal = 0
    local intersection = 0
    -- Inserting the missing keys
    for k, genCount in pairs(genSummary) do
        if refSummary[k] == nil then
            refSummary[k] = 0
        end
    end
    for k, refCount in pairs(refSummary) do
        local genCount = genSummary[k]
        if genCount == nil then 
            genCount = 0 
        end
        intersection = intersection + math.min(refCount, genCount)
        refTotal = refTotal + refCount
        genTotal = genTotal + genCount
    end

    recall = intersection / refTotal
    prec = intersection / genTotal
    if refTotal == 0 then
        recall = 0
    end 
    if genTotal == 0 then
        prec = 0
    end
    -- tmp = {intersection, refTotal, genTotal}
    if recall > 0 or prec > 0 then
        f1 = (2 * recall * prec) / (recall + prec)
    else 
        f1 = 0
    end
    return recall, prec, f1
end

function buildPredsummary(chosenactions, inputsentences, select_index)
    local summary = torch.zeros(inputsentences:size())
    for i=1, chosenactions:size(1) do
        -- the 2 is for the SELECT index, will have to make this more general later
        if chosenactions[i][select_index] == 1 then
            summary[i]:copy(inputsentences[i])
        end
    end    
    return summary
end

function buildPredsummaryFast(chosenactions, inputsentences, select_index)
    local n = inputsentences:size(1)
    local k = inputsentences:size(2)
    local summary = torch.zeros(inputsentences:size())
    actionmatrix = chosenactions:select(2, select_index):clone():resize(n, 1):view(n, 1):expand(n, k):clone()
    --     This line didn't work for whatever reason...gives weird indexing...
    --     actionmatrix = chosenactions:select(2, select_index):resize(1, n):view(n, 1):expand(n, k):clone()
    return actionmatrix:cmul(inputsentences:double())
end

function buildTotalSummary(predsummary, totalPredsummary)
    nps = predsummary:size(1)
    n_l = totalPredsummary:size(2)
    indices = torch.linspace(1, n_l, n_l):long() 
    for i=1, predsummary:size(1) do
        if predsummary[i]:sum() > 0 then 
            minindex = 1
            for j = 1, totalPredsummary[i]:size(1) do 
                if totalPredsummary[i][j] > 0 then
                    minindex = minindex + 1
                end
            end
            lenx = predsummary[i]:size(1)
            totalPredsummary[i][{{minindex, minindex + lenx - 1}}]:copy(predsummary[i])

        end
    end
end

function buildTotalSummaryFast(predsummary, inputTotalSummary, usecuda)
    tmpSummary = inputTotalSummary:clone()
    nps = predsummary:size(1)
    n_l = inputTotalSummary:size(2)    
    indices = torch.linspace(1, n_l, n_l):long()
    if usecuda then
        indices = indices:cuda()
    end
    for i=1, predsummary:size(1) do
        if predsummary[i]:sum() > 0 then
            -- Finding the largest index with a zero
            -- maxindex = torch.max(indices[torch.eq(totalPredsummary[i], 0)])
            -- totalPredsummary[i][{{maxindex - lenx + 1, maxindex}}]:copy(predsummary[i])
            -- Finding the smallest index with a zero
            minindex = torch.min(indices[torch.eq(tmpSummary[i], 0)])
            lenx = predsummary[i]:size(1)
            tmpSummary[i][{{minindex, minindex + lenx - 1}}]:copy(predsummary[i])
        end
    end
    return tmpSummary
end

In [10]:
-- Setting parameters
n = 10
n_s = 5
k = 3
q = 4
a = 1
b = 1000
embDim = 50
gamma = 0.0
SKIP = 1
SELECT = 2
epsilon = 1
nepochs = 100
base_explore_rate = 0.1
end_baserate = torch.round(nepochs * 0.8)

lr = 1e-8
fast = true
usecuda = false
adapt = false
cuts = 4
mem_multiplier = 1
batch_size = 25
delta = cuts/nepochs
print_perf = true

In [11]:
-- Simulating the data

if usecuda then
    Tensor = torch.CudaTensor
    LongTensor = torch.CudaLongTensor   
    ByteTensor = torch.CudaByteTensor
    maskLayer = nn.MaskedSelect():cuda()
    print("...running on GPU")
else
    maskLayer = nn.MaskedSelect()
    torch.setnumthreads(8)
    Tensor = torch.Tensor
    LongTensor = torch.LongTensor
    ByteTensor = torch.ByteTensor
    print("...running on CPU")
end

-- Simulating streams and queries
queries = genNbyK(n, q, a, b)

-- Note that the sentences are batched by sentence index so sentences[1] is the first sentence of each article
sentences = {}
for i=1, n_s do
    sentences[i] = genNbyK(n, k, a, b)
end
-- Optimal predicted summary
trueSummary = LongTensor(n, k * n_s):fill(0)

-- Using this to generate the optimal actions
true_actions = {}
for i=1, n_s do 
    ---- Simulating the data
    trueqValues = torch.rand(n, 2)

     ---- Generating the max values and getting the indices
    qMaxtrue, qindxtrue = torch.max(trueqValues, 2)

    --- I want to select the qindx elements for each row
    true_actions[i] = torch.zeros(n, 2):scatter(2, qindxtrue, torch.ones(trueqValues:size()))
    best_sentences = buildPredsummaryFast(true_actions[i], sentences[i], SELECT)
    trueSummary = buildTotalSummaryFast(best_sentences, trueSummary, usecuda)
end

qTokens = {}
for i=1, n do
    qTokens[i] = Tokenize(trueSummary[i]:totable())
end

...running on CPU	


# Scoring the rougue metrics on the simulation

In [12]:
print(rougeScores(Tokenize(trueSummary[1]:totable()), Tokenize(trueSummary[1]:totable())))

1	1	1	


# Building the model

In [13]:
model = buildModel('lstm', b, embDim, 'f1', adapt, usecuda)
-- model = buildModel('lstm', b, embDim, 'f1', false, usecuda)
-- model = buildModel('bow', b, embDim, 'f1', false, false)

params, gradParams = model:getParameters()
if adapt then 
    criterion = nn.ParallelCriterion():add(nn.MSECriterion()):add(nn.BCECriterion())
    criterion['weights'] = {1, 0.25}
else 
    criterion = nn.MSECriterion()
end 

if usecuda then
    criterion = criterion:cuda()
    model = model:cuda()
end

Running LSTM model to learn f1	


# Scoring the model on the sentences

In [14]:
totalPredsummary = {}
qValues = {}
qActions = {}
qPreds = {}
rewards = {}
lossfull = {}
rouguef1 = {}

totalPredsummary = LongTensor(n, n_s * k):fill(0)

memfull = false
curr_memsize = 0
memsize = n * n_s * mem_multiplier
queryMemory = Tensor(memsize, q):fill(0)
qActionMemory = Tensor(memsize, 2):fill(0)
predSummaryMemory = Tensor(memsize, n_s * k):fill(0)
sentenceMemory = Tensor(memsize, k):fill(0)
qPredsMemory = Tensor(memsize, 2):fill(0)
qValuesMemory = Tensor(memsize, 1):fill(0)
rewardMemory = Tensor(memsize, 1):fill(0)

if adapt then
    regPreds = {}
    regMemory = Tensor(memsize, 1):fill(0) 
end

--- Initializing thingss
for i = 1, n_s do
    qPreds[i] = Tensor(n, 2):fill(0) 
    qValues[i] = Tensor(n, 1):fill(0)
    qActions[i] = Tensor(n, 2):fill(0)
    rewards[i] = Tensor(n, 1):fill(0)
    if adapt then
        regPreds[i] = Tensor(n, 1):fill(0)
    end        
end 

if usecuda then
    criterion = criterion:cuda()
    model = model:cuda()
end

for epoch=1, nepochs do
    --- Reset things at the start of each epoch
    for i=1, n_s do
        qPreds[i]:fill(0)
        qValues[i]:fill(0)
        qActions[i]:fill(0)
        rewards[i]:fill(0)
        totalPredsummary:fill(0)
        if adapt then
            regMemory[i]:fill(0)
        end        
    end

    for i=1, n_s do
        totalPreds = model:forward({sentences[i], queries, totalPredsummary})
        
        if adapt then 
            qPreds[i]:copy(totalPreds[1])
            regPreds[i]:copy(totalPreds[2])
        else
            qPreds[i]:copy(totalPreds)
        end

        if torch.uniform(0, 1) <= epsilon then
            --- Need to do something here
            xrand = torch.rand(qPreds[i]:size())
            qActions[i]:select(2, SELECT):copy(torch.ge(xrand:select(2, SELECT), xrand:select(2, SKIP)))
            qActions[i]:select(2, SKIP):copy(torch.ge(xrand:select(2, SKIP), xrand:select(2, SELECT)))
            qValues[i]:copy(maskLayer:forward({totalPreds, qActions[i]:byte() }) )
        else 
            qMax, qindx = torch.max(qPreds[i], 2)  -- Pulling the best actions
            -- Here's the fast way to select the optimal action for each query
            qActions[i]:copy(qActions[i]:scatter(2, qindx, torch.ones(qPreds[i]:size())):clone())
            qValues[i]:copy(qMax)

        end 
        predsummary = buildPredsummaryFast(qActions[i], sentences[i], SELECT)
        totalPredsummary = buildTotalSummaryFast(predsummary, totalPredsummary, usecuda)
        
        for j = 1, n do
            recall, prec, f1 = rougeScores( qTokens[j],
                                            Tokenize(totalPredsummary[j]:totable()))
            rewards[i][j]:fill(f1)
        end
        if i > 1 then
            -- Calculating change in rougue f1
            rewards[i]:copy(rewards[i] - rewards[i-1])
        end
        -- here's the row indexing
        start_row = curr_memsize + 1
        if memsize < (start_row + n) then 
            start_row = memsize - n + 1
            end_row = start_row + n - 1
            memfull = true
            curr_memsize = 0
        else 
            end_row = start_row + n - 1
            curr_memsize = end_row
        end            
        -- Update memory sequentially until it's full then restart updating it
        qActionMemory[{{start_row, end_row}}]:copy(qActions[i])
        predSummaryMemory[{{start_row, end_row}}]:copy(totalPredsummary)
        sentenceMemory[{{start_row, end_row}}]:copy(sentences[i])
        qPredsMemory[{{start_row, end_row}}]:copy(qPreds[i])
        qValuesMemory[{{start_row, end_row}}]:copy(qValues[i])
        queryMemory[{{start_row, end_row}}]:copy(queries)
        
        if adapt then
            regMemory[{{start_row, end_row}}]:copy(regPreds[i])
        end        

    end
    for i=1, n_s do
        if i  < n_s then
            rewardMemory[{{n * (i-1) + 1, n * i}}]:copy(rewards[i] + gamma * rewards[i + 1] )
        else
            rewardMemory[{{n * (i-1) + 1, n * i}}]:copy(rewards[i] )
        end
    end
    -- Adding back the delta for the last one
    rouguef1[epoch] = (rewards[n_s] + rewards[ n_s - 1] ):mean()

    if memfull then 
        memrows = memsize
    else 
        memrows = curr_memsize
    end
    if usecuda then 
        dataloader = dl.TensorLoader({
                        queryMemory[{{1, memrows}}]:cuda(), 
                        sentenceMemory[{{1, memrows}}]:cuda(), 
                        predSummaryMemory[{{1, memrows}}]:cuda(),
                        qPredsMemory[{{1, memrows}}]:cuda(), 
                        ByteTensor(memrows, 2):copy(qActionMemory[{{1, memrows}}]), 
                        qValuesMemory[{{1, memrows}}]:cuda()
                        }, 
                    rewardMemory[{{1, memrows}}]:cuda()
                )
        if adapt then            
            table.insert(dataloader['inputs'], regMemory[{{1, memrows}}]:cuda() )
        end
    else 
        dataloader = dl.TensorLoader({
                    queryMemory[{{1, memrows}}], 
                    sentenceMemory[{{1, memrows}}], 
                    predSummaryMemory[{{1, memrows}}], 
                    qPredsMemory[{{1, memrows}}], 
                    ByteTensor(memrows, 2):copy(qActionMemory[{{1, memrows}}]), 
                    qValuesMemory[{{1, memrows}}]
                    }, 
                rewardMemory[{{1, memrows}}]
            )
        if adapt then
            table.insert(dataloader['inputs'], regMemory[{{1, memrows}}] )
        end
    end
    loss = {}
    c = 1
    for k, xin, reward in dataloader:sampleiter(batch_size, memsize) do
        local function feval(params)
            gradParams:zero()
            if adapt then
                local ignore = model:forward({xin[1], xin[2], xin[3]})
                local predQOnActions = maskLayer:forward({xin[4], xin[5]}) 
                ones = torch.ones(xin[6]:size(1)):resize(xin[6]:size(1))
                if usecuda then
                    ones = ones:cuda()
                end
                lossf = criterion:forward({predQOnActions, xin[7]}, {reward, ones})
                local gradOutput = criterion:backward({predQOnActions, xin[6]}, {reward, ones})
                local gradMaskLayer = maskLayer:backward({xin[4], xin[5]}, gradOutput[1])
                model:backward({xin[1], xin[2], xin[3]}, {gradMaskLayer[1], gradOutput[2]})
            else 
                local ignore = model:forward({xin[1], xin[2], xin[3]})
                local predQOnActions = maskLayer:forward({xin[4], xin[5]}) 
                lossf = criterion:forward(predQOnActions, reward)
                local gradOutput = criterion:backward(predQOnActions, reward)
                local gradMaskLayer = maskLayer:backward({xin[4], xin[5]}, gradOutput)
                model:backward({xin[1], xin[2], xin[3]}, gradMaskLayer[1])
            end 
            return lossf, gradParams
        end
        --- optim.rmsprop returns \theta, f(\theta):= loss function
         _, lossv  = optim.rmsprop(feval, params, optimParams)
        loss[c] = lossv[1]
        c = c + 1
    end

    lossfull[epoch] = Tensor(loss):sum() / #lossv
    if print_perf then
        print(
            string.format('epoch = %i; rougue = %.6f; epsilon = %.6f; loss = %.6f' , 
                epoch, rouguef1[epoch], epsilon, lossfull[epoch])
            )
    end

    if (epsilon - delta) <= base_explore_rate then
        epsilon = base_explore_rate
        if epoch > end_baserate then 
            base_explore_rate = 0.
        end
    else 
        epsilon = epsilon - delta
    end
end

epoch = 1; rougue = 0.441905; epsilon = 1.000000; loss = 0.198384	


epoch = 2; rougue = 0.585238; epsilon = 0.960000; loss = 1447.331028	


epoch = 3; rougue = 0.417778; epsilon = 0.920000; loss = 8.074948	


epoch = 4; rougue = 0.388889; epsilon = 0.880000; loss = 17.963194	


epoch = 5; rougue = 0.387698; epsilon = 0.840000; loss = 14.400504	


epoch = 6; rougue = 0.495159; epsilon = 0.800000; loss = 0.890343	


epoch = 7; rougue = 0.466667; epsilon = 0.760000; loss = 18.901718	


epoch = 8; rougue = 0.514365; epsilon = 0.720000; loss = 791.727762	


epoch = 9; rougue = 0.435000; epsilon = 0.680000; loss = 24.049262	


epoch = 10; rougue = 0.447143; epsilon = 0.640000; loss = 22.987530	


epoch = 11; rougue = 0.445476; epsilon = 0.600000; loss = 118.802661	


epoch = 12; rougue = 0.358810; epsilon = 0.560000; loss = 1954.854960	


epoch = 13; rougue = 0.398810; epsilon = 0.520000; loss = 22211.046678	


epoch = 14; rougue = 0.441667; epsilon = 0.480000; loss = 65222.256417	


epoch = 15; rougue = 0.331190; epsilon = 0.440000; loss = 290344.009201	


epoch = 16; rougue = 0.400000; epsilon = 0.400000; loss = 560910.901437	


epoch = 17; rougue = 0.330476; epsilon = 0.360000; loss = 3175989.724833	


epoch = 18; rougue = 0.331429; epsilon = 0.320000; loss = 4403616.825208	


epoch = 19; rougue = 0.527857; epsilon = 0.280000; loss = 13650054.475425	


epoch = 20; rougue = 0.183333; epsilon = 0.240000; loss = 21052903.031737	


epoch = 21; rougue = 0.000000; epsilon = 0.200000; loss = 19814403.064725	


epoch = 22; rougue = 0.000000; epsilon = 0.160000; loss = 45087595.142296	


epoch = 23; rougue = 0.000000; epsilon = 0.120000; loss = 85887885.602907	


epoch = 24; rougue = 0.000000; epsilon = 0.100000; loss = 155795181.082346	


epoch = 25; rougue = 0.263889; epsilon = 0.100000; loss = 228716791.459958	


epoch = 26; rougue = 0.000000; epsilon = 0.100000; loss = 296574104.545946	


epoch = 27; rougue = 0.347222; epsilon = 0.100000; loss = 441117176.330105	


epoch = 28; rougue = 0.000000; epsilon = 0.100000; loss = 707841326.276625	


epoch = 29; rougue = 0.456667; epsilon = 0.100000; loss = 486205373.225448	




epoch = 30; rougue = 0.497222; epsilon = 0.100000; loss = 675924388.541191	


epoch = 31; rougue = 0.497222; epsilon = 0.100000; loss = 661431478.929634	


epoch = 32; rougue = 0.521032; epsilon = 0.100000; loss = 884251156.086987	


epoch = 33; rougue = 0.521032; epsilon = 0.100000; loss = 987860068.733271	


epoch = 34; rougue = 0.521032; epsilon = 0.100000; loss = 810641869.385338	


epoch = 35; rougue = 0.521032; epsilon = 0.100000; loss = 1008855547.653472	


epoch = 36; rougue = 0.494048; epsilon = 0.100000; loss = 2126430297.230240	


epoch = 37; rougue = 0.496032; epsilon = 0.100000; loss = 1998380515.203047	


epoch = 38; rougue = 0.521032; epsilon = 0.100000; loss = 2005649654.437991	




epoch = 39; rougue = 0.704365; epsilon = 0.100000; loss = 1943267007.256223	


epoch = 40; rougue = 0.704365; epsilon = 0.100000; loss = 2174889314.264749	


epoch = 41; rougue = 0.619762; epsilon = 0.100000; loss = 3820088839.231437	


epoch = 42; rougue = 0.753492; epsilon = 0.100000; loss = 3946804873.469470	


epoch = 43; rougue = 0.704365; epsilon = 0.100000; loss = 4440669713.367082	


epoch = 44; rougue = 0.720556; epsilon = 0.100000; loss = 4686382272.299315	


epoch = 45; rougue = 0.704365; epsilon = 0.100000; loss = 6267204803.102913	


epoch = 46; rougue = 0.667381; epsilon = 0.100000; loss = 6520288798.225548	


epoch = 47; rougue = 0.704365; epsilon = 0.100000; loss = 6127623287.969687	


epoch = 48; rougue = 0.711825; epsilon = 0.100000; loss = 9638899520.740551	


epoch = 49; rougue = 0.710635; epsilon = 0.100000; loss = 9551520611.314098	


epoch = 50; rougue = 0.704365; epsilon = 0.100000; loss = 12464658634.357006	


epoch = 51; rougue = 0.687222; epsilon = 0.100000; loss = 15195031309.686562	


epoch = 52; rougue = 0.446032; epsilon = 0.100000; loss = 21980799888.524448	


epoch = 53; rougue = 0.272222; epsilon = 0.100000; loss = 25689992528.045418	


epoch = 54; rougue = 0.272222; epsilon = 0.100000; loss = 20456975359.595421	


epoch = 55; rougue = 0.272222; epsilon = 0.100000; loss = 24766878127.516457	


epoch = 56; rougue = 0.269048; epsilon = 0.100000; loss = 25651804586.747040	


epoch = 57; rougue = 0.190000; epsilon = 0.100000; loss = 27347706772.815598	


epoch = 58; rougue = 0.187619; epsilon = 0.100000; loss = 30127501361.621349	


epoch = 59; rougue = 0.572143; epsilon = 0.100000; loss = 24652445255.045803	


epoch = 60; rougue = 0.543254; epsilon = 0.100000; loss = 27382387324.296127	


epoch = 61; rougue = 0.621032; epsilon = 0.100000; loss = 25956605910.972595	


epoch = 62; rougue = 0.690476; epsilon = 0.100000; loss = 33772718050.825199	


epoch = 63; rougue = 0.658651; epsilon = 0.100000; loss = 46862691025.537338	


epoch = 64; rougue = 0.513889; epsilon = 0.100000; loss = 53430133277.558197	


epoch = 65; rougue = 0.704365; epsilon = 0.100000; loss = 33891574384.999516	


epoch = 66; rougue = 0.704365; epsilon = 0.100000; loss = 44032237052.611679	


epoch = 67; rougue = 0.704365; epsilon = 0.100000; loss = 42531155206.621048	


epoch = 68; rougue = 0.577381; epsilon = 0.100000; loss = 68995573617.905457	


epoch = 69; rougue = 0.672619; epsilon = 0.100000; loss = 52380274413.951668	


epoch = 70; rougue = 0.614127; epsilon = 0.100000; loss = 71412757138.493805	


epoch = 71; rougue = 0.672619; epsilon = 0.100000; loss = 91733927987.008759	


epoch = 72; rougue = 0.672619; epsilon = 0.100000; loss = 81342742792.708588	


epoch = 73; rougue = 0.672619; epsilon = 0.100000; loss = 89053337968.536438	


epoch = 74; rougue = 0.699921; epsilon = 0.100000; loss = 105396579591.721405	


epoch = 75; rougue = 0.622063; epsilon = 0.100000; loss = 133288587375.486053	


epoch = 76; rougue = 0.622063; epsilon = 0.100000; loss = 112047589848.883041	


epoch = 77; rougue = 0.622063; epsilon = 0.100000; loss = 107184809690.762314	


epoch = 78; rougue = 0.561429; epsilon = 0.100000; loss = 123102613418.917847	


epoch = 79; rougue = 0.272222; epsilon = 0.100000; loss = 131691827657.567444	


epoch = 80; rougue = 0.622063; epsilon = 0.100000; loss = 165839603661.298218	


epoch = 81; rougue = 0.272222; epsilon = 0.100000; loss = 154640568939.593323	


epoch = 82; rougue = 0.340317; epsilon = 0.100000; loss = 161801594127.395538	


epoch = 83; rougue = 0.272222; epsilon = 0.060000; loss = 157942462442.290344	


epoch = 84; rougue = 0.140000; epsilon = 0.020000; loss = 166733062947.333832	


epoch = 85; rougue = 0.000000; epsilon = 0.000000; loss = 175404532495.822449	


epoch = 86; rougue = 0.000000; epsilon = 0.000000; loss = 178895536854.994080	


epoch = 87; rougue = 0.000000; epsilon = 0.000000; loss = 197419860211.278076	


epoch = 88; rougue = 0.000000; epsilon = 0.000000; loss = 237447216747.040466	


epoch = 89; rougue = 0.000000; epsilon = 0.000000; loss = 232863051301.587952	


epoch = 90; rougue = 0.000000; epsilon = 0.000000; loss = 246588946278.613586	


epoch = 91; rougue = 0.000000; epsilon = 0.000000; loss = 251048560602.916870	


epoch = 92; rougue = 0.000000; epsilon = 0.000000; loss = 313214806765.825928	


epoch = 93; rougue = 0.000000; epsilon = 0.000000; loss = 333952020797.333496	


epoch = 94; rougue = 0.000000; epsilon = 0.000000; loss = 334778464221.148560	


epoch = 95; rougue = 0.000000; epsilon = 0.000000; loss = 381606558324.430786	


epoch = 96; rougue = 0.000000; epsilon = 0.000000; loss = 345872843108.208130	


epoch = 97; rougue = 0.000000; epsilon = 0.000000; loss = 413046796190.239807	


epoch = 98; rougue = 0.000000; epsilon = 0.000000; loss = 431917391440.981567	


epoch = 99; rougue = 0.000000; epsilon = 0.000000; loss = 425762789210.167480	


epoch = 100; rougue = 0.000000; epsilon = 0.000000; loss = 488463628267.096680	


In [15]:
Plot = require 'itorch.Plot'

loss = torch.Tensor(lossfull)
rougue = torch.Tensor(rouguef1)
indices = torch.linspace(1, loss:size(1), loss:size(1)):long() 
plot = Plot():line(indices, loss, 'red', 'hi'):title('Plot of loss'):draw()
plot = Plot():line(indices, rougue, 'blue', 'hi'):title('Plot of Rougue-F1'):draw()