In [1]:
require "nn"
require "optim"

## Step 1: Building Architecture

**Building Model 1: a two layer convolutional neural network**

In [2]:
function initialize_model_1()
    --container
    model = nn.Sequential()
    --first layer
    ----Convolution
    model:add(nn.SpatialConvolutionMM(1, 32, 5, 5))
    ----Nonlinearity
    model:add(nn.ReLU())
    ----Max-pooling
    model:add(nn.SpatialMaxPooling(2, 2))
    --Second layer
    ----Convolution
    model:add(nn.SpatialConvolutionMM(32, 64, 5, 5))
    ----Nonlinearity
    model:add(nn.ReLU())
    ----Max-Pooling
    model:add(nn.SpatialMaxPooling(2, 2))
    ----Reshape Results from the second layer
    model:add(nn.Reshape(64 * 5 *5))
    --Output layer: linear classification
    model:add(nn.Linear(64 * 5 * 5, 10))
    
    return model
end

**Building Model 2: a one layer convolutional neural network**

In [3]:
function initialize_model_2()
    --container
    model = nn.Sequential()
    --first layer
    ----Convolution
    model:add(nn.SpatialConvolutionMM(1, 32, 5, 5))
    ----Nonlinearity
    model:add(nn.ReLU())
    ----Max-pooling
    model:add(nn.SpatialMaxPooling(2, 2))
    ----Reshape Results from the second layer
    model:add(nn.Reshape(32 * 14 * 14))
    --Output layer: linear classification
    model:add(nn.Linear(32 * 14 * 14, 10))
    
    return model
end

**Building Model 3:only linear classification layer**

In [4]:
function initialize_model_3()
    model = nn.Sequential()
    model:add(nn.Reshape(1 * 32 * 32))
    model:add(nn.Linear(1 * 32 * 32, 10))
    return model
end

## Step 2: Train the Model

In [5]:
-- load data
require "batches"
require 'gnuplot'
mnistTrain = torch.load("./data/trainingData.t7") 
mnistTest  = torch.load("./data/testData.t7")

In [6]:
training_step = 1000
train_batch_size = 100
test_batch_size  = 1000
inspection_step = 50

In [7]:
testImages, testLabels = mnistTest:getNextBatch(test_batch_size) 
function getModelAccuracy (data, model) 
    local preds = model:forward(testImages) 
    return accuracy(preds, testLabels)
end

In [8]:
function trainModel (data, model, batch_size, learning_rate)
    --load image
    images, labels = data:getNextBatch(batch_size)
    --feedforward
    scores = model:forward(images)
    --define cross entropy criterion
    crit = nn.CrossEntropyCriterion()
    --use crit to calculate teh loss function
    loss = crit:forward(scores, labels)
 
    --backward
    --calculate gradient of loss w.r.t scores
    dScores = crit:backward(scores, labels) 
    --find the rest of the gradients
    model:backward(images, dScores) 
    --update parameters 
    model:updateParameters(learning_rate)
    --zero the gradients
    model:zeroGradParameters()
    --return model
    return model
end

In [9]:
function trainModel_L1 (data, model, batch_size, learning_rate)
    --load image
    images, labels = data:getNextBatch(batch_size)
    --retrieve the model's parameters and gradients
    parameters,gradParameters = model:getParameters()
    --print('para: '..torch.norm(parameters,1))
 
    --zero the gradients
    model:zeroGradParameters()
    
    --feedforward
    scores = model:forward(images)
    --define cross entropy criterion
    crit = nn.CrossEntropyCriterion()
    --use crit to calculate teh loss function
    loss = crit:forward(scores, labels)
    
    --backward
    --calculate gradient of loss w.r.t scores
    dScores = crit:backward(scores, labels) 
    --find the rest of the gradients
    model:backward(images, dScores) 
    --update parameters 

    local feval = function(x)
        -- get new parameters
        if x ~= parameters then
            parameters:copy(x)
        end
        -- define L1 coef 
        coefL1 = 1e-3;
        local norm,sign= torch.norm,torch.sign
        --print('old loss'..loss)
        loss = loss + norm(coefL1 * parameters,1)
        --print('adjusted loss: '..loss)
        gradParameters:add( sign(parameters):mul(coefL1) )
        --print('L1 grad: '..norm(gradParameters,1))
        return loss, gradParameters
    end;
    
    sgdState = {
            learningRate = 0.05,
            momentum = 0,
            learningRateDecay = 0
         }
    optim.sgd(feval, parameters, sgdState)
   
    --return model
    return model
end

**Train Model 1: a two layer CNN**

In [12]:
--Test Model 1
--::redo::
model = initialize_model_1()
accuracy_inspection = torch.zeros(training_step / inspection_step)
for i = 1,training_step do 
    model = trainModel_L1(mnistTrain, model, train_batch_size, learning_rate)
    if (i % inspection_step == 0) then
        accuracy_inspection[i / inspection_step] = getModelAccuracy(mnistTest, model)
        print('['..tonumber(i / training_step * 100)..'%]')
        --print('['..tonumber(i / training_step * 100)..'%]')
        print(accuracy_inspection[i / inspection_step])
        parameters,gradParameters = model:getParameters()
        print('L1 grad: '..torch.norm(gradParameters,1))
        if (accuracy_inspection[i / inspection_step] < 0.13) then
            print('Bad Luck, Do it Again')
            --goto redo
            --break
        end
        
    end
end
range = torch.range(inspection_step, training_step, inspection_step)
trained_model_1 = model
trained_model_1_acc = accuracy_inspection
torch.save('model1.t7', trained_model_1)
torch.save('model1_acc.t7', trained_model_1_acc)

[5%]	
0.785	


L1 grad: 472.23898055948	


[10%]	
0.857	


L1 grad: 606.17457178172	


[15%]	
0.858	


L1 grad: 673.37265896262	


[20%]	
0.895	


L1 grad: 654.38515500249	


[25%]	
0.913	
L1 grad: 438.20046078911	


[30%]	
0.926	


L1 grad: 319.41686035304	


[35%]	
0.925	


L1 grad: 526.2534856516	


[40%]	
0.923	
L1 grad: 511.25362305205	


[45%]	
0.935	


L1 grad: 343.65048716251	


[50%]	
0.936	


L1 grad: 232.90540979071	


[55%]	


0.948	


L1 grad: 347.03354436258	


[60%]	
0.943	


L1 grad: 453.71017863283	


[65%]	
0.946	


L1 grad: 291.66458050385	


[70%]	
0.946	


L1 grad: 424.27079548325	


[75%]	
0.954	


L1 grad: 249.58983443451	


[80%]	
0.956	


L1 grad: 410.88083801179	


[85%]	
0.948	


L1 grad: 343.18768152034	


[90%]	
0.951	


L1 grad: 235.6337464715	


[95%]	
0.958	


L1 grad: 415.56455486533	


[100%]	
0.947	


L1 grad: 461.96493764846	


**Train Model 1: a one layer CNN**

In [11]:
--Test Model 2
::redo::
model = initialize_model_2()
accuracy_inspection = torch.zeros(training_step / inspection_step)
for i = 1,training_step do 
    model = trainModel_L1(mnistTrain, model, train_batch_size, learning_rate)
    if (i % inspection_step == 0) then
        accuracy_inspection[i / inspection_step] = getModelAccuracy(mnistTest, model)
        print('['..tonumber(i / training_step * 100)..'%]')
        print(accuracy_inspection[i / inspection_step])
        if (accuracy_inspection[i / inspection_step] < 0.12) then
            print('Bad Luck, Do it Again')
            --goto redo
            break
        end
        parameters,gradParameters = model:getParameters()
        print('L1 grad: '..torch.norm(gradParameters,1))
    end
end
range = torch.range(inspection_step, training_step, inspection_step)
trained_model_2 = model
trained_model_2_acc = accuracy_inspection
torch.save('model2.t7', trained_model_2)
torch.save('model2_acc.t7', trained_model_2_acc)

[5%]	
0.848	


L1 grad: 470.01260191718	


[10%]	
0.879	


L1 grad: 405.80498595691	


[15%]	
0.882	


L1 grad: 575.00673092064	


[20%]	
0.88	


L1 grad: 455.66200381372	


[25%]	
0.904	
L1 grad: 404.02482257076	


[30%]	
0.909	


L1 grad: 316.85596940531	


[35%]	
0.908	


L1 grad: 380.91316877674	


[40%]	
0.893	


L1 grad: 618.97467066509	




[45%]	
0.913	


L1 grad: 305.38550376478	


[50%]	
0.916	


L1 grad: 460.83507060605	


[55%]	
0.933	


L1 grad: 443.16033089303	


[60%]	
0.916	
L1 grad: 507.61109531962	


[65%]	
0.925	


L1 grad: 308.65747375262	


[70%]	
0.923	


L1 grad: 323.68046432541	


[75%]	
0.929	


L1 grad: 499.7287328614	


[80%]	
0.922	


L1 grad: 454.00027381789	


[85%]	
0.932	
L1 grad: 370.27450729004	


[90%]	
0.934	


L1 grad: 294.25950755272	


[95%]	
0.934	


L1 grad: 363.95813796223	


[100%]	
0.929	


L1 grad: 492.83908237513	


**Train Model 3: a linear classifier**

In [10]:
--Test Model 3
model = initialize_model_3()
accuracy_inspection = torch.zeros(training_step / inspection_step)
for i = 1,training_step do 
    model = trainModel_L1(mnistTrain, model, train_batch_size, learning_rate)
    if (i % inspection_step == 0) then
        accuracy_inspection[i / inspection_step] = getModelAccuracy(mnistTest, model)
        print('['..tonumber(i / training_step * 100)..'%]')
        print(accuracy_inspection[i / inspection_step])
    end
end
range = torch.range(inspection_step, training_step, inspection_step)
trained_model_3 = model
trained_model_3_acc = accuracy_inspection
torch.save('model3.t7', trained_model_3)
torch.save('model3_acc.t7', trained_model_3_acc)

[5%]	
0.816	


[10%]	
0.846	


[15%]	
0.861	


[20%]	
0.867	


[25%]	
0.871	


[30%]	
0.878	


[35%]	
0.883	


[40%]	


0.884	


[45%]	
0.88	


[50%]	
0.882	


[55%]	
0.886	


[60%]	
0.885	


[65%]	
0.885	


[70%]	
0.893	


[75%]	
0.887	


[80%]	


0.887	


[85%]	
0.891	


[90%]	
0.896	


[95%]	
0.894	


[100%]	
0.895	
