In [1]:
require 'dpnn'
require "cunn"
require "nngraph"
require 'hdf5'
require 'xlua'
require 'optim'
require 'itorch.Plot'

In [2]:
local pre_file = hdf5.open('../dataminingcapstone/ntm40.hdf5', 'r')
pre_w1 = pre_file:read('/w1'):all():transpose(1,2):type('torch.DoubleTensor')
examples = pre_file:read('/examples'):all():add(1):transpose(1,2):type('torch.DoubleTensor')
le_in = pre_file:read('/le'):all():transpose(1,2):type('torch.DoubleTensor')

In [3]:
local pre_file = hdf5.open('../dataminingcapstone/W1_pretrain_40.hdf5', 'r')
pre_w2 = pre_file:read('/layer_2/param_0'):all():type('torch.DoubleTensor')

In [4]:
-- the gram stack
gram_stack = nn.Sequential()
le_start = nn.Dictionary(le_in:size()[1], le_in:size()[2])
le_start.weight = le_in
-- mark this not-trainable
function le_start:updateParameters(learningRate)
end
gram_stack:add(le_start)
gram_stack:add(nn.Reshape(le_in:size()[2], true))

local lt1 = nn.Linear(pre_w2:size()[1], pre_w2:size()[2])
lt1.weight = pre_w2:transpose(1,2)
lt1.bias = torch.zeros(pre_w2:size()[2])
-- No bias in the lt module
function lt1:accUpdateGradParameters(input, gradOutput, lr)
   local gradWeight = self.gradWeight
   self.gradWeight = self.weight
   self:accGradParameters(input, gradOutput, -lr)
   self.gradWeight = gradWeight
end

function lt1:accGradParameters(input, gradOutput, scale)
   scale = scale or 1
   if input:dim() == 1 then
      self.gradWeight:addr(scale, gradOutput, input)
   elseif input:dim() == 2 then
      self.gradWeight:addmm(scale, gradOutput:t(), input)
   end
end

gram_stack:add(lt1)
gram_stack:add(nn.Sigmoid())

-- the document stack
doc_stack = nn.Sequential()
local ld_1 = nn.Dictionary(pre_w1:size()[1], pre_w1:size()[2])
ld_1.weight = pre_w1
doc_stack:add(ld_1)
doc_stack:add(nn.SoftMax())
doc_stack:add(nn.Reshape(2,pre_w1:size()[2],true))
doc_stack:add(nn.SplitTable(2,3))
-- the scoring stack


din = nn.Identity()()
ld_pos, ld_neg = doc_stack(din):split(2)

ld_pos:annotate({name = 'ld_pos'})
ld_neg:annotate({name = 'ld_neg'})

g = nn.Identity()()
lt = gram_stack({g}) 

ls_pos = nn.DotProduct()({lt, ld_pos})
ls_pos:annotate({name = 'ls_pos'})
ls_neg = nn.DotProduct()({lt, ld_neg})
ls_neg:annotate({name = 'ls_neg'})

ntm = nn.gModule({g, din}, {ls_pos, ls_neg})

loss_out = nn.L1HingeEmbeddingCriterion(0.5):cuda()

In [5]:
ntm:cuda();

### Convert the input data to a dataset that can be used by sgd

In [22]:
input_meta = {
    __index = function( self, k ) 
--        local i = torch.LongTensor{k}
        return {{self.g[k], self.d[k]}, -1} 
    end
}

input_data = {
    g = examples:index(2, torch.LongTensor{2}), 
    d = examples:index(2, torch.LongTensor{1,3}),
    size = function (self) 
        return self.g:size()[1]
    end,
    shuffle = function (self, len)
        self.shuffledIndices = torch.randperm(self.g:size()[1], 'torch.LongTensor')
        self.batchIndex = 1
        self.g_batch = torch.DoubleTensor(len, self.g:size()[2])
        self.d_batch = torch.DoubleTensor(len, self.d:size()[2])
        
        self.t_batch = torch.DoubleTensor(len, 1):fill(-1)
        return {self.g_batch, self.d_batch}, self.t_batch
    end,
    next_batch = function (self, len) 
        local maxidx = math.min(self.batchIndex + len - 1, self:size())
        local batchindices = self.shuffledIndices[{{self.batchIndex, maxidx}}]
--        return {self.g:index(1, batchindices, 
--                self.d:index(1, batchindices))}
        self.g_batch:index(self.g, 1, batchindices)
        self.d_batch:index(self.d, 1, batchindices)
        self.batchIndex = self.batchIndex + len
        return {self.g_batch, self.d_batch}, self.t_batch
                --{self.g[{{self.batchIndex - len, self.batchIndex}}],
               -- self.d[{{self.batchIndex - len, self.batchIndex}}]}
    end
}
setmetatable(input_data, input_meta)

### Train

In [29]:
-- ignore this - its only used to test the loss function

trainer = nn.StochasticGradient(ntm, loss_out)
trainer.maxIteration = 1
trainer.learningRate = 0.01
n_examples = input_data:size()
i = 0
window = itorch.html("<p>Progress:</p>")
startTime = os.time()
trainer.hookExample = function(self, example) 
    if i == 0 then
        startTime = os.time()
    end
    
    i = i + 1
    
    if (i - 1) % 10000 == 0 then
--        xlua.progress(i, n_examples)
        local percCompl = i / n_examples
        local eta = ((os.time() - startTime) / percCompl) - (os.time() - startTime)
        itorch.html(string.format('<p>Progress:</p><p>%d / %d [' .. 
                string.rep("=", percCompl * 50) .. '>' .. 
                string.rep(".", 49 - (percCompl * 50)) .. '] - ETA: %d seconds - Loss: Unknown</p>', 
                    i, n_examples, eta),
            window)
    end
end
trainer.hookIteration = function(self, iteration, currentError) 
        itorch.html(string.format('<p>Iteration %d completed in %d seconds with average loss %.6f.</p>', 
                iteration, os.time() - startTime, currentError),
            window)
        window = itorch.html('<p>Progress:</p>')
end
trainer:train(input_data)

In [15]:
-- function for useful plotting interface
smart_trainer = function(model, criterion,
        input_data, n_classes,
        batch_size, cv_split, class_train, max_epochs, 
        optimizer, optimizer_params, 
        l1, l2, chart)
 
    local training_loss_history = {}
    local validation_loss_history = {}

    local function feval(x_in)
        
        local prediction = model:forward(inputs, targets)
        local losses = criterion:forward(prediction, targets)
        gradients:zero()
        local df = criterion:backward(prediction, targets)
        model:backward(inputs, df)
        
        -- regularize
        local norm,sign = torch.norm, torch.sign
        
        local loss = 0
        if type(losses) == 'number' then
            loss = losses
        else
            for i=1,#losses do loss = loss + losses[i] end
        end
        
        if l1 ~= 0 then
            loss = loss + l1 * norm(x, 1)
            gradients:add(sign(x):mul(l1))
        end
        if l2 ~= 0 then
            loss = loss + l2 * norm(x,2)^2/2       
            gradients:add( x:clone():mul(l2))
        end
        
        -- accuracy matrix
        if class_train then confusion:batchAdd(prediction, targets) end
        
--        gradients:div(n_batches)
        
        return loss, gradients
    end

    local epoch = 0
    local max_idx = input_data:size() * (1 - cv_split)
    local old_text = 'Progress:<br>'
    local window = itorch.html(old_text)
    
    while true do
        model:training()
    
        if class_train then
            train_confusion = optim.ConfusionMatrix(n_classes)
            val_confusion = optim.ConfusionMatrix(n_classes)
        end
        local startTime = os.time()
        epoch = epoch + 1
        local currentError = 0
        
        if class_train then 
            train_confusion:zero()
            val_confusion:zero()
        end

        inputs,targets = input_data:shuffle(batch_size)
        
        x, gradients = model:getParameters()
        
        for batch_index=1, max_idx, batch_size do
            collectgarbage()
            inputs, targets = input_data:next_batch(math.min(batch_size, max_idx - batch_index - 1))

            _, f_table = optimizer(feval, x, optimizer_params)

            local thisLoss = 0
            for i=1,#f_table do  thisLoss = thisLoss + f_table[i] end
            
            currentError = ((currentError * batch_index) + (thisLoss * batch_size)) / (batch_index + batch_size)

            local percCompl = math.floor(50 * batch_index / max_idx)
            local eta = ((os.time() - startTime) / (percCompl / 50)) - (os.time() - startTime)
            itorch.html(old_text .. 
                string.format('[%d / %d] [' .. 
                    string.rep("=", percCompl - 1) .. '>' .. 
                    string.rep(".", 49 - percCompl ) .. '] ETA: %d seconds - Batch Loss: %.6f - Avg. Epoch Loss: %.6f<br>', 
                        batch_index, input_data:size(), 
                        eta, 
                        thisLoss * batch_size, 
                        currentError * batch_size
                ),
                window) 
        end
        table.insert(training_loss_history, currentError)
        
        -- validation
        model:evaluate()
        local validation_loss = 0
        for batch_index=max_idx,input_data:size(),batch_size do 
            collectgarbage()
            inputs, targets = input_data:next_batch(math.min(batch_size, 
                    input_data:size() - batch_index - 1))
            local prediction = model:forward(inputs, targets)
            local losses = criterion:forward(prediction, targets)
            
            if type(losses) == 'number' then
                validation_loss = validation_loss + losses
            else
                for i=1,#losses do validation_loss = validation_loss + losses[i] end
            end
        end
        table.insert(validation_loss_history, validation_loss)
        -- report update
        old_text = old_text .. string.format('Epoch %d completed in %d seconds with training avg loss %.8f - ' ..
                                                'Val loss %.8f.<br>', 
                    epoch, os.time() - startTime, currentError, validation_loss)
        itorch.html(old_text,
                window)
        if chart then 
            plot = Plot()
            local x_vals = torch.linspace(1,epoch + 10)
            plot:line(x_vals, training_loss_history, 
                'red', 'Training Loss')
            plot:line(x_vals, validation_loss_history, 
                'blue', 'Validation Loss')
            plot:xaxis('Training vs Validation Loss')
            plot:legend(true)
            plot:gfx()
        end

        if max_epochs and epoch >= max_epochs then
            break
        end
    end
end

In [23]:
smart_trainer(ntm, loss_out,
        input_data, nil,
        10000, 0.2, false, 500, 
        optim.sgd, {learningRate = 0.01}, 
        0, 0.001, false)

[string "-- function for useful plotting interface..."]:121: attempt to call global 'Plot' (a nil value)
stack traceback:
	[string "-- function for useful plotting interface..."]:121: in function 'f'
	[string "local f = function() return smart_trainer(ntm..."]:5: in main chunk
	[C]: in function 'xpcall'
	/usr/local/share/lua/5.1/itorch/main.lua:179: in function </usr/local/share/lua/5.1/itorch/main.lua:143>
	/usr/local/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	/usr/local/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	/usr/local/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/usr/local/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/usr/local/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x010ee74b70: 

In [None]:
collectgarbage()

In [None]:
input_data:shuffle(10)

In [None]:
this_batch, targets = input_data:next_batch(10)

In [None]:
this_batch

In [None]:
prediction = ntm:forward(this_batch)

In [None]:
prediction

In [None]:
prediction = ntm:forward({torch.LongTensor{{1},{2}}, torch.LongTensor{{1,2},{3,4}}})

In [None]:
torch.LongTensor{{1},{2}}:dim()

In [None]:
test = ld_1:forward(torch.LongTensor{{1,2},{2,3}, {4,6}})

In [None]:
test2 = nn.Reshape(2,40,true):forward(test:type('torch.DoubleTensor'))

In [None]:
nn.SplitTable(2,3):forward(test2)

In [None]:
this_g = le_start:forward(this_batch[1])
this_lef = nn.Reshape(le_in:size()[2], true):forward(this_g:type('torch.DoubleTensor'))
this_lt1 = lt1:forward(this_lef)
this_lt = nn.Sigmoid():forward(this_lt1)

In [None]:
this_ld1 = ld_1:forward(this_batch[2]) 
this_ldb = nn.SoftMax():forward(this_ld1)
this_ldf = nn.Reshape(2,pre_w1:size()[2],true):forward(this_ldb)
this_lds = nn.SplitTable(2,3):forward(this_ldf)

In [None]:
nn.MM(false,true):forward({torch.DoubleTensor{{1,2,3},{7,8,9}},torch.DoubleTensor{{3,4,5}, {6,7,8}}})

In [None]:
input1 = torch.DoubleTensor{{1,2,3},{7,8,9}}
input2 = torch.DoubleTensor{{3,4,5}, {6,7,8}}
buffer = input1:new()

In [None]:
buffer:cmul(input1, input2):sum(1)

In [None]:
buffer = this_lt:new()
bob = buffer:cmul(this_lt, this_lds[1]):sum(2) 
print(bob)

In [None]:
test_lt = gram_stack:forward(this_batch[1])

In [None]:
test_ld = doc_stack:forward(this_batch[2])

In [None]:
nn.DotProduct():forward({test_lt, test_ld[1]})

In [None]:
g = nn.Identity()()
din = nn.Identity()()
-- the gram stack
le_start = nn.Dictionary(le_in:size()[1], le_in:size()[2])
le_start.weight = le_in
le = le_start({g}) 
-- mark this not-trainable
function le:updateParameters(learningRate)
end
le_flat = nn.Reshape(le_in:size()[2], true)(le)

lt1 = nn.Linear(pre_w2:size()[1], pre_w2:size()[2])
lt1.weight = pre_w2:transpose(1,2)
lt1.bias = torch.zeros(pre_w2:size()[2])
-- No bias in the lt module
function lt1:accUpdateGradParameters(input, gradOutput, lr)
   local gradWeight = self.gradWeight
   self.gradWeight = self.weight
   self:accGradParameters(input, gradOutput, -lr)
   self.gradWeight = gradWeight
end

function lt1:accGradParameters(input, gradOutput, scale)
   scale = scale or 1
   if input:dim() == 1 then
      self.gradWeight:addr(scale, gradOutput, input)
   elseif input:dim() == 2 then
      self.gradWeight:addmm(scale, gradOutput:t(), input)
   end
end
lt2 = lt1({le_flat})
lt = nn.Sigmoid()({lt2})

-- the document stack
ld_1 = nn.Dictionary(pre_w1:size()[1], pre_w1:size()[2])
ld_1.weight = pre_w1
ld_2 = ld_1({din})
ld_both = nn.SoftMax()(ld_2)

-- the scoring stack
ld_flat = nn.Reshape(2,pre_w1:size()[2],true)({ld_both})
ld_split = nn.SplitTable(2,3)(ld_flat)
ld_pos, ld_neg = ld_split:split(2)
ls_pos = nn.DotProduct()({lt, ld_pos})
ls_neg = nn.DotProduct()({lt, ld_neg})

ntm = nn.gModule({g, din}, {ls_pos, ls_neg})

loss_out = nn.L1HingeEmbeddingCriterion(0.5):cuda()