## Torch tutorial

Simple tutorial to train your first neural network with torch.

As a toy dataset, we will use the common MNIST dataset. More information about MNIST can be found at http://yann.lecun.com/exdb/mnist/.

First, we import some dependecies.

In [93]:
require 'torch'
require 'nn'
require 'optim'
mnist = require 'mnist'

We load the data.

In [94]:
train_data = mnist.traindataset()
test_data = mnist.testdataset()

We create the neural network architecture. It consists in a simple feedforward neural network with dropout.

In [95]:
function create_model()
    model = nn.Sequential()
    model:add(nn.Reshape(28*28)) -- Reshape the images (1, 28, 28), i.e. grayscale 28x28 images. The batch
    model:add(nn.Linear(28*28, 1024))
    model:add(nn.Dropout(.5))
    model:add(nn.ReLU())
    model:add(nn.Linear(1024, 1024))
    model:add(nn.Dropout(.5))
    model:add(nn.ReLU())
    model:add(nn.Linear(1024, 512))
    model:add(nn.Dropout(.5))
    model:add(nn.ReLU())
    model:add(nn.Linear(512, 10))
    return model
end

In [96]:
model = create_model()
print(model:__tostring())

nn.Sequential {
  [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> (11) -> output]
  (1): nn.Reshape(784)
  (2): nn.Linear(784 -> 1024)
  (3): nn.Dropout(0.500000)
  (4): nn.ReLU
  (5): nn.Linear(1024 -> 1024)
  (6): nn.Dropout(0.500000)
  (7): nn.ReLU
  (8): nn.Linear(1024 -> 512)
  (9): nn.Dropout(0.500000)
  (10): nn.ReLU
  (11): nn.Linear(512 -> 10)
}	


We create the loss function. Here, we chose the common cross entropy loss that is suitable for classification.

In [97]:
criterion = nn.CrossEntropyCriterion()

We add one to the labels because the function `nn.CrossEntropyCriterion` expects classes starting from 1 and not 0. Thus, the class number 1 corresponds to the digit 0, ..., the class number 10 corresponds to the digit 9.

In [98]:
test_data.label = (test_data.label +1.0):float()
train_data.label = (train_data.label + 1.0):float()

We define some hyperparameters.

In [1]:
optimState = {learningRate=learningRate_, momentum=0.5} -- parameters for the (minibatch) gradient descent algorithm

batchSize = 128
maxEpoch = 20

display_n = 500000 -- display training loss and accuracy every x step
test_n = 500 -- test the model every x step

In [100]:
-- placeholder for mini-batch
X_batch = train_data.data[{{1, batchSize}}]:clone():float() -- variable that will hold the mini batch data
Y_batch = train_data.label[{{1, batchSize}}]:clone():float() -- and the mini batch labels

batchIndices = torch.LongTensor(batchSize) -- placeholder for batch indices

Optionally, we can cast the model created as well as the different variables on GPU.

In [104]:
require 'cutorch'
require 'cunn'
cutorch.setDevice(1) -- chose which GPU to use
model = model:cuda()
criterion = criterion:cuda()
train_data.data = train_data.data:cuda()
test_data.data = test_data.data:cuda()
X_batch = X_batch:cuda()
Y_batch = Y_batch:cuda()

Function that returns the accuracy of a batch of predictions.

In [102]:
function get_accuracy(output_, y_true)
    --- output_: a `Tensor`, output from the model.
    --- y_true: labels
    --- Returns accuracy
    y_true = y_true:double()
    _, y_predicted = torch.max(output_, 2)
    y_predicted = y_predicted:double()
    accuracy = y_true:eq(y_predicted):sum() / y_true:size(1)
    return accuracy
end

We now train the network.

In [103]:
-- model parameters for optimization
local params, gradParams = model:getParameters()
local iteration = 0
local steps_per_epoch = data_train.label:size(1) / batchSize

while true do
   iteration = iteration + 1
    
   -- Take a random mini batch of data
   batchIndices:random(1, train_data.data:size(1))
   X_batch:copy(train_data.data:index(1, batchIndices))
   Y_batch:copy(train_data.label:index(1, batchIndices))
    
   -- when maxEpoch is reached, end the optimization
   if iteration / steps_per_epoch > maxEpoch then
        break
   end
 
    ----------------------------------------
    -------------- TRAINING ----------------
    ----------------------------------------
   function feval(params)
      gradParams:zero()
      outputs = model:forward(X_batch)
      loss = criterion:forward(outputs, Y_batch)
      local gradOutputs = criterion:backward(outputs, Y_batch)
      model:backward(X_batch, gradOutputs)
      return loss, gradParams
   end
    
   timer = torch.Timer() -- timer
   optim.adagrad(feval, params, optimState) -- perform one training step
   datum_sec = batchSize / timer:time().real -- measure the time for one training step
    
   if iteration % display_n == 0 then
          print(string.format("TRAINING – epoch %.2f, loss = %f, %.2f datum/sec", iteration / steps_per_epoch, loss, datum_sec))
   end
    ----------------------------------------
    ----------------------------------------
    
    
    ----------------------------------------
    ------------- EVALUATION ---------------
    ----------------------------------------
   if (iteration - 1) % test_n == 0 or iteration / steps_per_epoch >= maxEpoch then
        model:evaluate() -- evaluation mode. This is important to disable the dropout while testing the model
        local outputs_ = model:forward(test_data.data)
        local accuracy = get_accuracy(outputs_, test_data.label)
        print(string.format('TEST – epoch %.2f, accuracy: %.4f', iteration / steps_per_epoch, accuracy))
        model:training() -- reactivate dropout
   end
    ----------------------------------------
    ----------------------------------------
end

TEST – epoch 0.00, accuracy: 0.0980	


TEST – epoch 1.07, accuracy: 0.9429	


TEST – epoch 2.14, accuracy: 0.9572	


TEST – epoch 3.20, accuracy: 0.9621	


TEST – epoch 4.27, accuracy: 0.9642	


TEST – epoch 5.34, accuracy: 0.9672	


TEST – epoch 6.40, accuracy: 0.9685	


TEST – epoch 7.47, accuracy: 0.9704	


TEST – epoch 8.54, accuracy: 0.9709	


TEST – epoch 9.60, accuracy: 0.9738	


TEST – epoch 10.67, accuracy: 0.9745	


TEST – epoch 11.74, accuracy: 0.9734	


TEST – epoch 12.80, accuracy: 0.9756	


TEST – epoch 13.87, accuracy: 0.9748	


TEST – epoch 14.94, accuracy: 0.9767	


TEST – epoch 16.00, accuracy: 0.9771	


TEST – epoch 17.07, accuracy: 0.9769	


TEST – epoch 18.14, accuracy: 0.9774	


TEST – epoch 19.20, accuracy: 0.9776	
