In [1]:
require 'nn'
require 'optim'
require 'mnist'
require 'dataset-mnist'

In [2]:
torch.manualSeed(0)
torch.setnumthreads(4)

In [3]:
classes = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }
geometry = { 32, 32 }

net = nn.Sequential()

net:add(nn.SpatialConvolution(1, 6, 5, 5))
net:add(nn.ReLU())
net:add(nn.SpatialMaxPooling(2, 2, 2, 2))

net:add(nn.SpatialConvolution(6, 16, 5, 5))
net:add(nn.ReLU())
net:add(nn.SpatialMaxPooling(2, 2, 2, 2))

net:add(nn.View(16*5*5))
net:add(nn.Linear(16*5*5, 120))
net:add(nn.ReLU())
net:add(nn.Linear(120, 84))
net:add(nn.ReLU())
net:add(nn.Linear(84, #classes))
net:add(nn.LogSoftMax())

In [4]:
parameters, gradParameters = net:getParameters()

In [5]:
criterion = nn.ClassNLLCriterion()

In [6]:
n_training_patches = 60000
n_testing_patches = 10000

train_data = mnist.loadTrainSet(n_training_patches, geometry)
train_data:normalizeGlobal(mean, std)

test_data = mnist.loadTestSet(n_testing_patches, geometry)
test_data:normalizeGlobal(mean, std)

<mnist> done	


<mnist> done	


In [7]:
confusion = optim.ConfusionMatrix(classes)
epoch_limit = 5

In [8]:
batch_size = 10
for epoch = 1, epoch_limit do
    print("Epoch: " .. epoch)
    for t = 1, train_data:size(), batch_size do
        local inputs = torch.Tensor(batch_size, 1, geometry[1], geometry[2])
        local targets = torch.Tensor(batch_size)
        local k = 1

        for i = t, math.min(t + batch_size - 1, train_data:size()) do
            local sample = train_data[i]
            local input = sample[1]:clone()
            local _, target = sample[2]:clone():max(1)

            target = target:squeeze()
            inputs[k] = input
            targets[k] = target
            k = k + 1
        end

        local feval = function(x)
            collectgarbage()

            if x ~= parameters then
                parameters:copy(x)
            end
            gradParameters:zero()

            local outputs = net:forward(inputs)
            local f = criterion:forward(outputs, targets)
            local df_do = criterion:backward(outputs, targets)
            net:backward(inputs, df_do)

            for i = 1, batch_size do
                confusion:add(outputs[i], targets[i])
            end

            return f, gradParameters
        end

        sgd_state = sgd_state or {
            learning_rate = 0.06,
            momentum = 0,
            learning_rate_decay = 5e-7
        }
        optim.sgd(feval, parameters, sgd_state)

    end
    
    print(confusion)
    confusion:zero()
end



Epoch: 1	


ConfusionMatrix:
[[    4794       4      35      15       7      56      42     701      76     193]   80.939% 	[class: 1]
 [       8    5287     312      34       4      12      14     976      85      10]   78.419% 	[class: 2]
 [     389      42    3799      88      91      18     181    1190      76      84]   63.763% 	[class: 3]
 [     325      76     216    3571      10     138      18    1575     106      96]   58.245% 	[class: 4]
 [     343      52      68       5    3343       3     102    1528      31     367]   57.224% 	[class: 5]
 [     931      47     207     229      61    2352      88    1115     275     116]   43.387% 	[class: 6]
 [     898     156     451       1      47      66    3400     837      37      25]   57.452% 	[class: 7]
 [      66      81      97      24      53      12       5    5707      26     194]   91.093% 	[class: 8]
 [     435     202     474     283      55     176      67    1416    2652      91]   45.326% 	[class: 9]
 [     346      55      36   

ConfusionMatrix:
[[    5688       1      26       7       7      42      61      11      61      19]   96.032% 	[class: 1]
 [       1    6539      44      31       5      24      10       6      65      17]   96.989% 	[class: 2]
 [      35      36    5411      94      77      24      68      95      92      26]   90.819% 	[class: 3]
 [      18      41     113    5556       4     161       9      79      92      58]   90.621% 	[class: 4]
 [       5      18      34       4    5373       6      91      14      30     267]   91.972% 	[class: 5]
 [      40      38      31     141      22    4956      68       8      69      48]   91.422% 	[class: 6]
 [      44      36      45       0      70      65    5622       0      35       1]   94.998% 	[class: 7]
 [      27      36      83      39      47      12       0    5782      17     222]   92.291% 	[class: 8]
 [      25     129      71     131      26      80      52      28    5197     112]   88.822% 	[class: 9]
 [      41      33      20   

ConfusionMatrix:
[[    5772       1      16       3       8      25      35      12      37      14]   97.451% 	[class: 1]
 [       1    6606      38      18       8       4       3       8      43      13]   97.983% 	[class: 2]
 [      21      34    5616      67      36      13      27      70      58      16]   94.260% 	[class: 3]
 [      11      19      96    5740       3     103       7      50      59      43]   93.623% 	[class: 4]
 [       6      18      18       1    5547       3      47       9      19     174]   94.950% 	[class: 5]
 [      27      20      16      74       8    5156      36       8      46      30]   95.112% 	[class: 6]
 [      38      22      15       0      41      39    5735       0      27       1]   96.908% 	[class: 7]
 [      15      20      75      30      30      13       1    5943      11     127]   94.860% 	[class: 8]
 [      18      91      43      62      22      49      46      23    5421      76]   92.651% 	[class: 9]
 [      29      23       5   

ConfusionMatrix:
[[    5809       1      13       0       6      16      27       8      30      13]   98.075% 	[class: 1]
 [       1    6624      38      13       8       0       5       8      34      11]   98.250% 	[class: 2]
 [      18      26    5713      45      15       9      14      56      50      12]   95.888% 	[class: 3]
 [       6      10      72    5835       2      78       4      39      48      37]   95.172% 	[class: 4]
 [       5      15      14       2    5620       2      33       8      14     129]   96.200% 	[class: 5]
 [      21      12       9      56       8    5210      28       7      39      31]   96.108% 	[class: 6]
 [      29      21       9       1      29      36    5772       0      20       1]   97.533% 	[class: 7]
 [      10      18      56      22      19       7       1    6032      10      90]   96.281% 	[class: 8]
 [      18      62      38      55      16      38      36      15    5516      57]   94.274% 	[class: 9]
 [      21      17       3   

ConfusionMatrix:
[[    5835       1       9       0       4      12      22       6      21      13]   98.514% 	[class: 1]
 [       2    6638      32      13       7       0       5       9      28       8]   98.457% 	[class: 2]
 [      15      26    5758      35      12       4      10      49      42       7]   96.643% 	[class: 3]
 [       7       7      57    5879       2      69       3      33      42      32]   95.890% 	[class: 4]
 [       7      11       9       1    5675       1      21       8      11      98]   97.141% 	[class: 5]
 [      16       9       5      50       5    5235      26       7      41      27]   96.569% 	[class: 6]
 [      20      16       5       1      20      30    5805       0      20       1]   98.091% 	[class: 7]
 [       7      17      50      14      16       6       1    6074      12      68]   96.951% 	[class: 8]
 [      15      54      31      50      17      31      28      14    5563      48]   95.078% 	[class: 9]
 [      20      15       2   

In [9]:
confusion:zero()

for t = 1, test_data:size(), 10 do
    local inputs = torch.Tensor(batch_size, 1, geometry[1], geometry[2])
    local targets = torch.Tensor(batch_size)
    local k = 1
    
    for i = t, math.min(t + batch_size - 1, test_data:size()) do
        local sample = test_data[i]
        local input = sample[1]:clone()
        local _, target = sample[2]:clone():max(1)
        target = target:squeeze()
        inputs[k] = input
        targets[k] = target
        k = k + 1
    end
    
    local preds = net:forward(inputs)
    
    for i = 1, batch_size do
        confusion:add(preds[i], targets[i])
    end
end

print(confusion)

ConfusionMatrix:
[[     971       0       1       0       1       3       0       1       3       0]   99.082% 	[class: 1]
 [       0    1127       3       1       0       0       1       0       3       0]   99.295% 	[class: 2]
 [       5       5     995      11       1       1       0       5       9       0]   96.415% 	[class: 3]
 [       1       1       2     982       0       7       0       7       4       6]   97.228% 	[class: 4]
 [       1       0       3       0     959       0       1       1       2      15]   97.658% 	[class: 5]
 [       2       3       0       6       0     874       2       1       1       3]   97.982% 	[class: 6]
 [      16       4       1       0       5      25     904       0       3       0]   94.363% 	[class: 7]
 [       1       6      15       2       0       0       0     981       2      21]   95.428% 	[class: 8]
 [       7       4       2      13       6      10       0      10     910      12]   93.429% 	[class: 9]
 [       4       8       0   


  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9679
}
