# Introduction

# Data loading

First we are about to write down a script that adapt the mnist dataset to our needs, meaning :

* Transform mnist dataset into a 60000 x 784 instead of a 60000 x 28 x 28 tensor
* Normalize data between 0 and 1
* Return as shuffled version of the dataset with the corresponding labels, renaming classes as -1 or +1.

We will then consider two classes of the mnist dataset to make our classification task.

In [46]:
-- Data initialization

In [47]:
mnist = require 'mnist'
require 'nn';
train = mnist.traindataset()
labels = train.label

In [48]:
reshaped_data = torch.reshape(train.data, 60000,784)

## Shuffling the dataset using a loop

In [76]:
function mnist_munging(data,labels)
    -- We are going to time the script. It is a good habit to have.
    timer = torch.Timer()    
    times = {}
    times["reshaping"]  = timer:time().real
    -- Reshaping and normalizing
    local reshaped_data = reshaped_data:double()
    reshaped_data = reshaped_data/torch.max(reshaped_data)
    times["reshaping"] = timer:time().real - times["reshaping"]
    print("times : ")
    print(times)
    return reshaped_data
end

In [77]:
function classify_training_examples(reshaped_data,labels)
    -- Now we will gather the training examples by labels.
    timer = torch.Timer()    
    times = {}
    times["classifying"]  = timer:time().real
    -- basic type checking  -- TODO Doesn't work right now.
    if type(labels) == nil then
        return "please provide some good labels"
    end
    
    -- We create the appropriate tensors in order to stock the training examples
    local classified_examples = {}    
    for i=0,9 do
        classified_examples[i] = {}
        classified_examples[i]["data"] = {} 
        classified_examples[i]["count"] = 0
    end
    
    for i=1,(#reshaped_data)[1] do
        classified_examples[labels[i]]["count"] = classified_examples[labels[i]]["count"] + 1
        classified_examples[labels[i]]['data'][classified_examples[labels[i]]["count"]] = reshaped_data[i]
    end
    times["classifying"] = timer:time().real - times["classifying"]
    print("times : ")
    print(times)
    return classified_examples
end

In [157]:
function convert_to_tensor(data_table)
    -- This method convert the data type from a table to a Tensor
    local result_tensor = torch.Tensor(#data_table,784)
    for i=1,#data_table do
        result_tensor[i] = data_table[i]
    end
    return result_tensor
end

function get_target_classes(classified_dataset,class_1,class_2)
    times = {}
    timer = torch.Timer()
    times["global"] = timer:time().real
    
    --This method returns the targeted classes if they are included into the classes existing in the dataset.
    print("# of example of class ".. class_1 .. " : " .. classified_dataset[class_1].count)
    print("# of example of class " .. class_2 .. " : " .. classified_dataset[class_2].count)
    
    -- We then create a dataset containing all the data with the correct label
    
    local trainset = {}
    
    local look_up_trainset = {}
    -- filling up with class_1 examples 
    for i=1,classified_dataset[class_1].count do 
        look_up_trainset[i] = {}
        look_up_trainset[i]["data"] = classified_dataset[class_1].data[i]
        look_up_trainset[i]["labels"] = class_1
    end
    -- filling up with class_1 examples 
    for i=1,classified_dataset[class_2].count do 
        look_up_trainset[classified_dataset[class_1].count+i] = {}
        look_up_trainset[classified_dataset[class_1].count+i]["data"] = classified_dataset[class_2].data[i]
        look_up_trainset[classified_dataset[class_1].count+i]["labels"] = class_2
    end
    
    times["look_up_building"] = timer:time().real - times["global"]
    times["global"] = timer:time().real
    
    -- We then shuffle the lookup trainset and the labels using the same permutation
    total = classified_dataset[class_1].count + classified_dataset[class_2].count
    -- permutation template
    perm = torch.randperm(total)    
    
    local shuffled_trainset = {}
    shuffled_trainset["data"] = {}
    shuffled_trainset["labels"] = {}
    
    for i=1,total do 
       table.insert(shuffled_trainset["data"],torch.Tensor(look_up_trainset[perm[i]]["data"]))
        if look_up_trainset[perm[i]]["labels"] == class_1 then
            shuffled_trainset["labels"][i] = 1
        else 
            shuffled_trainset["labels"][i] = -1
        end
    end
    shuffled_trainset["labels"] = torch.Tensor(shuffled_trainset["labels"])
    shuffled_trainset["data"] = convert_to_tensor(shuffled_trainset["data"])

    times["shuffling"] = timer:time().real - times["global"]
    -- Adding metatable with __index function allowing heritage from torch
    
    
    setmetatable(shuffled_trainset,
    {__index = function(t, i)
                return {t.data[i], t.labels[i]}
               end})
    function shuffled_trainset:size()
        return self.data:size()
    end
    
    print("times : ")
    print(times)
    return shuffled_trainset
end

In [158]:
reshaped_data = mnist_munging(mnist.traindataset().data,mnist.traindataset().label)
classified_data = classify_training_examples(reshaped_data,labels)
training_set = get_target_classes(classified_data,0,3)

times : 	
{
  reshaping : 0.94750618934631
}


times : 	
{
  classifying : 0.34722208976746
}
# of example of class 0 : 5923	
# of example of class 3 : 6131	


times : 	
{
  look_up_building : 0.0023889541625977
  global : 0.0023980140686035
  shuffling : 0.1449031829834
}


In [153]:
d = convert_to_tensor(training_set['data'])

As you can see we used a lot of loops during our algorithm. In Python, coding this way would not have been the optimal way.. But in LUA it is!
Loops are optimized so we will use and abuse them.

## Accuracy implementation

When training 

We are about to implement a accuracy function that takes two arguments : first a list of prediction and second a list of the correct predictions and computes the prediction *accuracy* following the given formula : $$Acc =  \frac{t_p + t_n}{t_p + f_p + f_n + t_n}$$


With : 
$t_p$ and $t_n$ : Correctly labeled items
$f_p$ and $f_n$ : Not correctly labeled items

This seems plausible, since there are two actual classes in our classification problem, and our classifier attempts to label them  correctly. This is precisely the effectiveness measure often used for evaluating machine learning classification problems.

We will later see that Accuracy might not be the best error evaluation method we have at hand. 

In [53]:
reshaped_data = mnist_munging(mnist.traindataset().data,mnist.traindataset().label)
classified_data = classify_training_examples(reshaped_data,labels)
training_set = get_target_classes(classified_data,1,3)

{
  reshaping : 1.5437829494476
}


# of example of class 1 : 6742	
# of example of class 3 : 6131	


times : 	
{
  look_up_building : 0.083427906036377
  global : 0.083441972732544
  shuffling : 0.014678001403809
}


In [54]:
-- Trying accuracy with fake labels prediction
fake_predictions = torch.sign(torch.randn(#training_set["data"]))

In [55]:
function accuracy(y,out)
    -- This method computes the accuracy of a set of prediction y, compared to some expected labels
    local pred_signs = torch.sign(y)
    local correct_classification = torch.cmul(pred_signs,out)
    --local eval_params = torch.Tensor(1):fill(1):double()
    return torch.mean(correct_classification:eq(1):double())
end

In [56]:
acc = accuracy(fake_predictions,training_set["labels"])
print(acc)

0.50244698205546	


## Batch and Stochastic Gradient Descent

As we specified in [this paper](../tme1/presentation/Gradient_Descent_Optimization Techniques.pdf), there are several different version of Gradient Descent. We distinguish two main versions of Gradient Descent algorithms :

1. The **Batch** gradient Descent : Which is a version of Gradient Descent that takes all the training examples and optimize the model parameters all at once.
1. The **Stochastic** gradient Descent, that update the model parameters and updates them using the training examples one by one.

#### Model

In this section we will use the following model for our analysis

In [184]:
model = nn.Linear(784,1)
criterion = nn.MSECriterion()
model:zeroGradParameters() -- Here, we are setting the cummulative of the gradients parameters to be equal to 0
model:reset() -- I don't know what this command is for but it was in the TP TODO : Look for it

### Batch Gradient Descent

In [185]:
learning_rate= torch.random(1,10)/10000 -- We put a random weight parameter in this.
maxEpoch = 50

-- Well, might as well define a method so that we don't have to type it all the time
function train_batch(params)
    if params.times == nil then params.times = {} end
    timer = torch.Timer()
    params.times["batch_training"] = timer:time().real
    -- for k,_ in pairs(params) do print(k) end
    --assert(params.data ~= nil, 'please provide correct structure for the training set (i.e. training_set.data ~= nil)')
    --assert(params.labels ~= nil, 'please provide correct structure for the training set (i.e. training_set.labels ~= nil)')
    local batch_loss = {}    
    model:zeroGradParameters()
    for iteration=1,maxEpoch do
        loss=0
        output = model:forward(params.training_set.data)
        loss= criterion:forward(output,params.training_set.labels)
        model:backward(params.training_set.data,criterion:backward(model.output,params.training_set.labels))
        updated_parameters = model:updateParameters(learning_rate)
        batch_loss[iteration] = loss
    end
    times["batch_training"] = timer:time().real - params.times["batch_training"]
    print("times")
    print(times)
    return batch_loss
end

In [186]:
batch_params = {}
batch_params["learning_rate"] = learning_rate
batch_params["maxEpoch"] = 100
batch_params["training_set"] = training_set

In [188]:
print("loss \n")
train_batch(batch_params)

loss 
	


times	
{
  look_up_building : 0.0023889541625977
  global : 0.0023980140686035
  batch_training : 2.9678781032562
  shuffling : 0.1449031829834
}


times	
{
  look_up_building : 0.0023889541625977
  global : 0.0023980140686035
  batch_training : 2.4079020023346
  shuffling : 0.1449031829834
}
{
  1 : 0.98453662733588
  2 : 0.98144559856183
  3 : 0.97529596882648
  4 : 0.96615191033704
  5 : 0.95410799769915
  6 : 0.93928725595979
  7 : 0.92183866899693
  8 : 0.90193423724641
  9 : 0.87976568863593
  10 : 0.85554095704896
  11 : 0.82948054823825
  12 : 0.80181391363586
  13 : 0.77277594797322
  14 : 0.74260371726463
  15 : 0.71153350996543
  16 : 0.67979828663048
  17 : 0.64762558297154
  18 : 0.61523589877338
  19 : 0.58284158169471
  20 : 0.55064619160869
  21 : 0.51884430887988
  22 : 0.48762172982815
  23 : 0.45715597549626
  24 : 0.4276170264778
  25 : 0.39916818756863
  26 : 0.37196698176858
  27 : 0.34616597386028
  28 : 0.3219134293803


  29 : 0.29935372500931
  30 : 0.27862744076404
  31 : 0.25987108221214
  32 : 0.24321640142866
  33 : 0.2287893076265
  34 : 0.216708381301
  35 : 0.20708302827624
  36 : 0.20001133118501
  37 : 0.19557767467785
  38 : 0.19385023616196
  39 : 0.19487844538774
  40 : 0.19869052317516
  41 : 0.20529121165133
  42 : 0.21465980542066
  43 : 0.22674858519997
  44 : 0.24148174293314
  45 : 0.25875487077124
  46 : 0.27843506626674
  47 : 0.30036168353871
  48 : 0.32434773599189
  49 : 0.35018193145759
  50 : 0.37763129643967
}


## Visualization - 

In [182]:
require 'gnuplot'
--require 'tools'
Plot = require 'itorch.Plot'

In [183]:
-- scatter plots 
plot = Plot():circle(cat_1.x,cat_1.y, 'red', 'Category 1'):circle(cat_2.x, cat_2.y, 'blue', 'Category 2'):draw() 
plot:title('Data Points to separate'):redraw() 
plot:xaxis('random variable 1'):yaxis('random variable 2'):redraw() 
plot:legend(true) 
plot:save('out.html')

[string "-- scatter plots ..."]:2: attempt to index global 'cat_1' (a nil value)
stack traceback:
	[string "-- scatter plots ..."]:2: in main chunk
	[C]: in function 'xpcall'
	/Users/david/torch/install/share/lua/5.1/itorch/main.lua:210: in function </Users/david/torch/install/share/lua/5.1/itorch/main.lua:174>
	/Users/david/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/david/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x0100c6ad10: 

In [193]:
-- 3 : Learning Loop
learning_rate= torch.random(1,10)/10000 -- We put a random weight parameter in this.
maxEpoch = 50
all_losses={}
-- Well, might as well define a method so that we don't have to type it all the time

stoch_params = {"learning_rate":learning_rate,"maxEpoch":maxEpoch}

function train_stochastic(training_set,learning_rate,maxEpoch)
    if times ==nil then
        times = {}
        timer = torch.Timer()
        times["stoch_training"] = timer:time()
    end
    assert(training_set.data ~= nil, 'please provide correct structure for the training set (i.e. training_set.data ~= nil)')
    assert(training_set.data ~= nil, 'please provide correct structure for the training set (i.e. training_set.label ~= nil)')
    local stochastic_losses = {}    
    model:zeroGradParameters()
    for iteration=1,maxEpoch do
        loss=0
        for i=1,training_set.data:size()[1] do 
            output = model:forward(training_set.data[i])
            loss= loss + criterion:forward(output,training_set.label[i])
            model:backward(training_set.data[i],criterion:backward(model.output,training_set.label[i]))
            updated_parameters = model:updateParameters(learning_rate)
        end
        stochastic_losses[iteration] = loss
    end
    times["stoch_training"] = timer:time() - times["stoch_training"]
    print("times")
    print(times)
    return stochastic_losses
end

[string "-- 3 : Learning Loop..."]:7: '}' expected near ':': 

# Train - Test