# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Data-loading" data-toc-modified-id="Data-loading-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Data loading</a></div><div class="lev2 toc-item"><a href="#Shuffling-the-dataset-using-a-loop" data-toc-modified-id="Shuffling-the-dataset-using-a-loop-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Shuffling the dataset using a loop</a></div><div class="lev2 toc-item"><a href="#Accuracy-implementation" data-toc-modified-id="Accuracy-implementation-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Accuracy implementation</a></div><div class="lev2 toc-item"><a href="#Batch-and-Stochastic-Gradient-Descent" data-toc-modified-id="Batch-and-Stochastic-Gradient-Descent-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Batch and Stochastic Gradient Descent</a></div><div class="lev3 toc-item"><a href="#Batch-Gradient-Descent" data-toc-modified-id="Batch-Gradient-Descent-1.3.1"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>Batch Gradient Descent</a></div><div class="lev2 toc-item"><a href="#Visualization--" data-toc-modified-id="Visualization---1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Visualization -</a></div><div class="lev2 toc-item"><a href="#Stochastic-Gradient" data-toc-modified-id="Stochastic-Gradient-1.5"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>Stochastic Gradient</a></div><div class="lev1 toc-item"><a href="#Train---Test" data-toc-modified-id="Train---Test-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Train - Test</a></div><div class="lev1 toc-item"><a href="#Deeper-than-Train---Test" data-toc-modified-id="Deeper-than-Train---Test-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Deeper than Train - Test</a></div>

# Data loading

First we are about to write down a script that adapt the mnist dataset to our needs, meaning :

* Transform mnist dataset into a 60000 x 784 instead of a 60000 x 28 x 28 tensor
* Normalize data between 0 and 1
* Return as shuffled version of the dataset with the corresponding labels, renaming classes as -1 or +1.

We will then consider two classes of the mnist dataset to make our classification task.

In [2]:
-- Data initialization

In [3]:
mnist = require 'mnist'
require 'nn';
train = mnist.traindataset()
labels = train.label

In [4]:
reshaped_data = torch.reshape(train.data, 60000,784)

## Shuffling the dataset using a loop

In [5]:
function mnist_munging(data,labels)
    -- We are going to time the script. It is a good habit to have.
    timer = torch.Timer()    
    times = {}
    times["reshaping"]  = timer:time().real
    -- Reshaping and normalizing
    local reshaped_data = reshaped_data:double()
    reshaped_data = reshaped_data/torch.max(reshaped_data)
    times["reshaping"] = timer:time().real - times["reshaping"]
    print("times : ")
    print(times)
    return reshaped_data
end

In [6]:
function classify_training_examples(reshaped_data,labels)
    -- Now we will gather the training examples by labels.
    timer = torch.Timer()    
    times = {}
    times["classifying"]  = timer:time().real
    -- basic type checking  -- TODO Doesn't work right now.
    if type(labels) == nil then
        return "please provide some good labels"
    end
    
    -- We create the appropriate tensors in order to stock the training examples
    local classified_examples = {}    
    for i=0,9 do
        classified_examples[i] = {}
        classified_examples[i]["data"] = {} 
        classified_examples[i]["count"] = 0
    end
    
    for i=1,(#reshaped_data)[1] do
        classified_examples[labels[i]]["count"] = classified_examples[labels[i]]["count"] + 1
        classified_examples[labels[i]]['data'][classified_examples[labels[i]]["count"]] = reshaped_data[i]
    end
    times["classifying"] = timer:time().real - times["classifying"]
    print("times : ")
    print(times)
    return classified_examples
end

In [7]:
function convert_to_tensor(data_table)
    -- This method convert the data type from a table to a Tensor
    local result_tensor = torch.Tensor(#data_table,784)
    for i=1,#data_table do
        result_tensor[i] = data_table[i]
    end
    return result_tensor
end

function get_target_classes(classified_dataset,class_1,class_2)
    times = {}
    timer = torch.Timer()
    times["global"] = timer:time().real
    
    --This method returns the targeted classes if they are included into the classes existing in the dataset.
    print("# of example of class ".. class_1 .. " : " .. classified_dataset[class_1].count)
    print("# of example of class " .. class_2 .. " : " .. classified_dataset[class_2].count)
    
    -- We then create a dataset containing all the data with the correct label
    
    local trainset = {}
    
    local look_up_trainset = {}
    -- filling up with class_1 examples 
    for i=1,classified_dataset[class_1].count do 
        look_up_trainset[i] = {}
        look_up_trainset[i]["data"] = classified_dataset[class_1].data[i]
        look_up_trainset[i]["labels"] = class_1
    end
    -- filling up with class_1 examples 
    for i=1,classified_dataset[class_2].count do 
        look_up_trainset[classified_dataset[class_1].count+i] = {}
        look_up_trainset[classified_dataset[class_1].count+i]["data"] = classified_dataset[class_2].data[i]
        look_up_trainset[classified_dataset[class_1].count+i]["labels"] = class_2
    end
    
    times["look_up_building"] = timer:time().real - times["global"]
    times["global"] = timer:time().real
    
    -- We then shuffle the lookup trainset and the labels using the same permutation
    total = classified_dataset[class_1].count + classified_dataset[class_2].count
    -- permutation template
    perm = torch.randperm(total)    
    
    local shuffled_trainset = {}
    shuffled_trainset["data"] = {}
    shuffled_trainset["labels"] = {}
    
    for i=1,total do 
       table.insert(shuffled_trainset["data"],torch.Tensor(look_up_trainset[perm[i]]["data"]))
        if look_up_trainset[perm[i]]["labels"] == class_1 then
            shuffled_trainset["labels"][i] = 1
        else 
            shuffled_trainset["labels"][i] = -1
        end
    end
    shuffled_trainset["labels"] = torch.Tensor(shuffled_trainset["labels"])
    shuffled_trainset["data"] = convert_to_tensor(shuffled_trainset["data"])

    times["shuffling"] = timer:time().real - times["global"]
    -- Adding metatable with __index function allowing heritage from torch
    
    
    setmetatable(shuffled_trainset,
    {__index = function(t, i)
                return {t.data[i], t.labels[i]}
               end})
    function shuffled_trainset:size()
        return self.data:size()
    end
    
    print("times : ")
    print(times)
    return shuffled_trainset
end

In [8]:
reshaped_data = mnist_munging(mnist.traindataset().data,mnist.traindataset().label)
classified_data = classify_training_examples(reshaped_data,labels)
training_set = get_target_classes(classified_data,0,3)

times : 	
{
  reshaping : 2.5700118541718
}


times : 	
{
  classifying : 0.72749781608582
}
# of example of class 0 : 5923	
# of example of class 3 : 6131	


times : 	
{
  look_up_building : 0.0042030811309814
  global : 0.0042128562927246
  shuffling : 0.69526505470276
}


In [9]:
function mnist_pipeline(class_1,class_2)
    reshaped_data = mnist_munging(mnist.traindataset().data,mnist.traindataset().label)
    classified_data = classify_training_examples(reshaped_data,labels)
    training_set = get_target_classes(classified_data,class_1,class_2)
    return training_set
end

As you can see we used a lot of loops during our algorithm. In Python, coding this way would not have been the optimal way.. But in LUA it is!
Loops are optimized so we will use and abuse them.

## Accuracy implementation

We are about to implement a accuracy function that takes two arguments : first a list of prediction and second a list of the correct predictions and computes the prediction *accuracy* following the given formula : $$Acc =  \frac{t_p + t_n}{t_p + f_p + f_n + t_n}$$


With : 
$t_p$ and $t_n$ : Correctly labeled items
$f_p$ and $f_n$ : Not correctly labeled items

This seems plausible, since there are two actual classes in our classification problem, and our classifier attempts to label them  correctly. This is precisely the effectiveness measure often used for evaluating machine learning classification problems.

We will later see that Accuracy might not be the best error evaluation method we have at hand. 

In [10]:
reshaped_data = mnist_munging(mnist.traindataset().data,mnist.traindataset().label)
classified_data = classify_training_examples(reshaped_data,labels)
training_set = get_target_classes(classified_data,1,3)

times : 	
{
  reshaping : 2.3850400447845
}


times : 	
{
  classifying : 0.39362597465515
}
# of example of class 1 : 6742	
# of example of class 3 : 6131	


times : 	
{
  look_up_building : 0.0035409927368164
  global : 0.0035498142242432
  shuffling : 0.33586311340332
}


In [11]:
-- Trying accuracy with fake labels prediction
fake_predictions = torch.sign(torch.randn(#training_set["data"]))

In [12]:
function accuracy(output,labels)
    -- This method computes the accuracy of a set of prediction y, compared to some expected labels
    local pred_signs = torch.sign(output)
    local correct_classification = torch.cmul(pred_signs,labels)
    --local eval_params = torch.Tensor(1):fill(1):double()
    return torch.mean(correct_classification:eq(1):double())
end

## Batch and Stochastic Gradient Descent

As we specified in [this paper](../tme1/presentation/Gradient_Descent_Optimization Techniques.pdf), there are several different version of Gradient Descent. We distinguish two main versions of Gradient Descent algorithms :

1. The **Batch** gradient Descent : Which is a version of Gradient Descent that takes all the training examples and optimize the model parameters all at once.
1. The **Stochastic** gradient Descent, that update the model parameters and updates them using the training examples one by one.

### Batch Gradient Descent

In [13]:
-- Here, we are setting the cummulative of the gradients parameters to be equal to 0

-- Well, might as well define a method so that we don't have to type it all the time
function train_batch(params)
    if params.times == nil then params.times = {} end

    model = params.model or nn.Linear(params.training_set:size()[2],1)
    criterion = params.criterion or nn.MSECriterion()
    maxEpoch =  params.maxEpoch or 50

    model:zeroGradParameters()
    model:reset()
    
    --model
    timer = torch.Timer()
    params.times["batch_training"] = timer:time().real
    -- for k,_ in pairs(params) do print(k) end
    --assert(params.data ~= nil, 'please provide correct structure for the training set (i.e. training_set.data ~= nil)')
    --assert(params.labels ~= nil, 'please provide correct structure for the training set (i.e. training_set.labels ~= nil)')
    local batch_loss = {}    
    batch_loss["iteration"]= {}
    batch_loss['training'] = {}
    batch_loss["accuracy"] = {}
    for iteration=1,maxEpoch do

        model:zeroGradParameters()
        local loss = 0
        output = model:forward(params.training_set.data)
        loss = criterion:forward(output,params.training_set.labels) --/params["training_set"]:size()[1]
        model:backward(params.training_set.data,criterion:backward(model.output,params.training_set.labels))
        updated_parameters = model:updateParameters(learning_rate)
        
        -- for prints
        table.insert(batch_loss["accuracy"],accuracy(output,params.training_set["labels"]))
        table.insert(batch_loss["iteration"],iteration)
        table.insert(batch_loss["training"],loss)
    end
    times["batch_training"] = timer:time().real - params.times["batch_training"]
    print("times")
    print(times)
    return batch_loss
end

In [14]:
--- Standard declaration
learning_rate= torch.random(1,10)/10000 -- We put a random weight parameter in this.
maxEpoch = 50

-- Batch parameters 
batch_params = {}
batch_params["learning_rate"] = learning_rate
batch_params["maxEpoch"] = 500
batch_params["training_set"] = training_set
batch_params["model"] = nn.Linear(training_set:size()[2],1)

In [15]:
batch_loss = train_batch(batch_params)

times	
{
  look_up_building : 0.0035409927368164
  global : 0.0035498142242432
  batch_training : 37.601534128189
  shuffling : 0.33586311340332
}


## Visualization - 

In [16]:
require 'gnuplot'
--require 'tools'
Plot = require 'itorch.Plot'

In [17]:
-- scatter plots 
plot = Plot():circle(batch_loss["iteration"],batch_loss["accuracy"],'red','accuracy'):circle(batch_loss["iteration"],batch_loss["training"],"blue","loss"):draw()
plot:title('loss and accuracy plot'):redraw() 
plot:xaxis('iteration'):yaxis('MSE - Accuracy'):redraw() 
--plot:legend(true) 
plot:save('out.html')

## Stochastic Gradient

In [18]:
 require 'nn'
 require 'gnuplot'
 require 'tools' 

In [19]:
local load_mnist = require 'load_mnist'
xtrain,ytrain=load_mnist.get_train(2,3)
xtest,ytest=load_mnist.get_test(2,3)

In [20]:
--xtrain,ytrain,xtest,ytest = load_mnist.get(2,3)
--gnuplot.imagesc(xtrain[1]:reshape(28,28))

In [1]:

-- 1: Creation du jeux de données
local DIMENSION=xtrain:size(2) -- dimension d'entrée
local N=xtrain:size(1) -- nombre de points d'apprentissage

function accuracy(y,out)
	local precision = 0
	for i=1, out:size(1)do
		if out[i]*y[i] > 0 then	precision = precision+1
		end
	end
	return precision/out:size(1)
end


local model= nn.Linear(DIMENSION,1)
model:reset(0.1)
local criterion= nn.MSECriterion()


-- 3 : Boucle d'apprentissage
local learning_rate= 1e-3 
local maxEpoch= 500
local all_losses={}
local timer = torch.Timer()

for iteration=1,maxEpoch do
------ Evaluation de la loss moyenne 
-- TODO
local loss=0  
local x,y,out,delta

---- calcul de la loss moyenne 
for j=1,xtrain:size(1) do
  x=xtrain[j]
  y=ytrain[j]
  out=model:forward(x)
  loss=loss+criterion:forward(out,y)
end  

loss=loss/xtrain:size(1)	
all_losses[iteration]=loss  --stockage de la loss moyenne (pour dessin)


-- version gradient stochastique
-- TODO    
model:zeroGradParameters()
local idx = math.random(xtrain:size(1));
x=xtrain[idx]
y=ytrain[idx]      	
out=model:forward(x)
loss=criterion:forward(out,y)
delta=criterion:backward(out,y)
model:backward(x,delta) 
model:updateParameters(learning_rate)  


-- plot de la frontiere ou plot du loss (utiliser l'un ou l'autre)
--plot(xs,ys,model,100)  -- uniquement si DIMENSION=2
--gnuplot.plot(torch.Tensor(all_losses)) 
end

local function prediction(xtest,ytest)
output = model:forward(xtest)
return accuracy(ytest,output)
  end

print(timer:time().real)
print(prediction(xtest,ytest))


[string "..."]:3: attempt to index global 'xtrain' (a nil value)
stack traceback:
	[string "..."]:3: in main chunk
	[C]: in function 'xpcall'
	/Users/david/torch/install/share/lua/5.1/itorch/main.lua:210: in function </Users/david/torch/install/share/lua/5.1/itorch/main.lua:174>
	/Users/david/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/Users/david/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/david/torch/install/share/lua/5.1/itorch/main.lua:389: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x01022cad10: 

# Train - Test

When we are creating a machine learning model, we are evaluating its performance at generalization and not . Thus, we evaluate it on a __test__ dataset, which is that contains data that the model has never seen before.

That is the the mnist dataset comes with a test dataset.

In [24]:
TRAIN_DIM = 0.8
TEST_DIM = 1 - TRAIN_DIM

train_set = ttraining_set:size()[1]*TRAIN_DIM
test_set = training_set

In [None]:
training_set.data[{}]

# Deeper than Train - Test