In [236]:
require 'torch';
require 'nn';
require 'optim';

### Prepair data

In [237]:
local matio = require 'matio'
data = matio.load('ex4data1.mat')

In [238]:
trainset = {}
trainset.data = data.X
trainset.label = data.y[{ {}, 1}]

In [239]:
trainset

In [240]:
setmetatable(trainset,
    {__index = function(t,i)
                return {t.data[i], t.label[i]}
        end}
);
 
function trainset:size()
    return self.data:size(1)
end

{
  data : DoubleTensor - size: 5000x400
  label : DoubleTensor - size: 5000
}


In [241]:
mean = {}
stdv = {}
for i=1,400 do
    mean[i] = trainset.data[{ {},{i} }]:mean()
    stdv[i] = trainset.data[{ {}, {i} }]:std()
    --print(i .. 'th mean: ' .. mean[i])
    --print(i .. 'th std dev: ' .. stdv[i])
    trainset.data[{ {},{i} }]:add(-mean[i])
    if stdv[i] ~= 0 then
        trainset.data[{ {},{i} }]:div(stdv[i])
    end
end

### define model

In [242]:
n_train_data = trainset:size(1) -- number of training data
n_inputs = trainset:size(2)     -- number of cols = number of dims of input
n_outputs = 10   -- highest label = # of classes

In [243]:
net = nn.Sequential()
net:add(nn.Linear(400,25))
net:add(nn.Sigmoid())
net:add(nn.Linear(25,10))
net:add(nn.Sigmoid())
net:add(nn.LogSoftMax())

### define loss function

In [244]:
opt = {
    optimization = 'sgd',
    batch_size = 5000,
    train_size = 5000,  -- set to 0 or 60000 to use all 60000 training data
    test_size = 0,      -- 0 means load all data
    epochs = 1e3,         -- **approximate** number of passes through the training data (see below for the `iterations` variable, which is calculated from this)
}         -- these options are used throughout


In [245]:
criterion = nn.ClassNLLCriterion()

In [246]:
parameters, gradParameters = net:getParameters()

In [247]:
counter = 0
feval = function(x)
  if x ~= parameters then
    parameters:copy(x)
  end

  gradParameters:zero()
    
  local batch_inputs = trainset.data[{{}, {}}]
  local batch_targets = trainset.label[{{}}]

  batch_outputs = net:forward(batch_inputs)
  batch_loss = criterion:forward(batch_outputs, batch_targets)
  dloss_doutput = criterion:backward(batch_outputs, batch_targets) 
  net:backward(batch_inputs, dloss_doutput)

  return batch_loss, gradParameters
end  

### train

In [248]:
optimState = {
    learningRate = 5,
    weightDecay = 0,
    momentum = 0,
    learningRateDecay = 1e-2
}
optimMethod = optim.sgd

In [249]:
losses = {}          -- training losses for each iteration/minibatch
epochs = opt.epochs  -- number of full passes over all the training data
iterations = epochs * math.ceil(n_train_data / opt.batch_size) -- integer number of minibatches to process

for i = 1, iterations do
  local _, minibatch_loss = optimMethod(feval, parameters, optimState)

  if i % 10 == 1 then -- don't print *every* iteration, this is enough to get the gist
      print(string.format("minibatches processed: %6s, loss = %6.6f", i, minibatch_loss[1]))
  end
  losses[#losses + 1] = minibatch_loss[1] -- append the new loss
end


minibatches processed:      1, loss = 2.302787	


minibatches processed:     11, loss = 2.008609	


minibatches processed:     21, loss = 1.847087	


minibatches processed:     31, loss = 1.771381	


minibatches processed:     41, loss = 1.727683	


minibatches processed:     51, loss = 1.699008	


minibatches processed:     61, loss = 1.678630	


minibatches processed:     71, loss = 1.663267	


minibatches processed:     81, loss = 1.651158	


minibatches processed:     91, loss = 1.641290	


minibatches processed:    101, loss = 1.633047	


minibatches processed:    111, loss = 1.626030	


minibatches processed:    121, loss = 1.619969	


minibatches processed:    131, loss = 1.614673	


minibatches processed:    141, loss = 1.610000	


minibatches processed:    151, loss = 1.605841	


minibatches processed:    161, loss = 1.602110	


minibatches processed:    171, loss = 1.598740	


minibatches processed:    181, loss = 1.595678	


minibatches processed:    191, loss = 1.592878	


minibatches processed:    201, loss = 1.590306	


minibatches processed:    211, loss = 1.587931	


minibatches processed:    221, loss = 1.585730	


minibatches processed:    231, loss = 1.583682	


minibatches processed:    241, loss = 1.581770	


minibatches processed:    251, loss = 1.579980	


minibatches processed:    261, loss = 1.578299	


minibatches processed:    271, loss = 1.576717	


minibatches processed:    281, loss = 1.575224	


minibatches processed:    291, loss = 1.573812	


minibatches processed:    301, loss = 1.572475	


minibatches processed:    311, loss = 1.571207	


minibatches processed:    321, loss = 1.570001	


minibatches processed:    331, loss = 1.568853	


minibatches processed:    341, loss = 1.567758	


minibatches processed:    351, loss = 1.566714	


minibatches processed:    361, loss = 1.565716	


minibatches processed:    371, loss = 1.564761	


minibatches processed:    381, loss = 1.563846	


minibatches processed:    391, loss = 1.562969	


minibatches processed:    401, loss = 1.562128	


minibatches processed:    411, loss = 1.561319	


minibatches processed:    421, loss = 1.560542	


minibatches processed:    431, loss = 1.559794	


minibatches processed:    441, loss = 1.559074	


minibatches processed:    451, loss = 1.558379	


minibatches processed:    461, loss = 1.557709	


minibatches processed:    471, loss = 1.557062	


minibatches processed:    481, loss = 1.556437	


minibatches processed:    491, loss = 1.555833	


minibatches processed:    501, loss = 1.555248	


minibatches processed:    511, loss = 1.554682	


minibatches processed:    521, loss = 1.554134	


minibatches processed:    531, loss = 1.553602	


minibatches processed:    541, loss = 1.553086	


minibatches processed:    551, loss = 1.552586	


minibatches processed:    561, loss = 1.552100	


minibatches processed:    571, loss = 1.551627	


minibatches processed:    581, loss = 1.551168	


minibatches processed:    591, loss = 1.550722	


minibatches processed:    601, loss = 1.550287	


minibatches processed:    611, loss = 1.549864	


minibatches processed:    621, loss = 1.549452	


minibatches processed:    631, loss = 1.549051	


minibatches processed:    641, loss = 1.548660	


minibatches processed:    651, loss = 1.548279	


minibatches processed:    661, loss = 1.547907	


minibatches processed:    671, loss = 1.547544	


minibatches processed:    681, loss = 1.547189	


minibatches processed:    691, loss = 1.546843	


minibatches processed:    701, loss = 1.546505	


minibatches processed:    711, loss = 1.546175	


minibatches processed:    721, loss = 1.545852	


minibatches processed:    731, loss = 1.545536	


minibatches processed:    741, loss = 1.545227	


minibatches processed:    751, loss = 1.544925	


minibatches processed:    761, loss = 1.544629	


minibatches processed:    771, loss = 1.544340	


minibatches processed:    781, loss = 1.544056	


minibatches processed:    791, loss = 1.543778	


minibatches processed:    801, loss = 1.543506	


minibatches processed:    811, loss = 1.543239	


minibatches processed:    821, loss = 1.542977	


minibatches processed:    831, loss = 1.542721	


minibatches processed:    841, loss = 1.542469	


minibatches processed:    851, loss = 1.542222	


minibatches processed:    861, loss = 1.541980	


minibatches processed:    871, loss = 1.541742	


minibatches processed:    881, loss = 1.541508	


minibatches processed:    891, loss = 1.541279	


minibatches processed:    901, loss = 1.541054	


minibatches processed:    911, loss = 1.540832	


minibatches processed:    921, loss = 1.540615	


minibatches processed:    931, loss = 1.540401	


minibatches processed:    941, loss = 1.540191	


minibatches processed:    951, loss = 1.539984	


minibatches processed:    961, loss = 1.539781	


minibatches processed:    971, loss = 1.539581	


minibatches processed:    981, loss = 1.539384	


minibatches processed:    991, loss = 1.539190	


### test

In [250]:
correction = 0
for i=1,trainset:size() do
    local answer = trainset.label[i]
    local prediction = net:forward(trainset.data[i])
    local confidences, indices = torch.sort(prediction, true)
    if (answer == indices[1]) then
        correction = correction + 1
    end
end
print(correction, 100*correction/trainset:size() .. '%')

4746	94.92%	
