# Project 1

#### By Arnaud Savary and Jeremie Guy

## Data import

In [34]:
import dlc_practical_prologue as prologue

# We import 1000 pairs of digits for the training and the testing inputs, targets and classes
number_of_pairs = 1000 
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets( 
    number_of_pairs)

# Quick verification of the sizes:
print('Size of train_input:', train_input.size())
print('Size of train_target:', train_target.size())
print('Size of train_classes:', train_classes.size())
print('Size of test_input:', test_input.size())
print('Size of test_target:', test_target.size())
print('Size of test_classes:', test_classes.size())

Size of train_input: torch.Size([1000, 2, 14, 14])
Size of train_target: torch.Size([1000])
Size of train_classes: torch.Size([1000, 2])
Size of test_input: torch.Size([1000, 2, 14, 14])
Size of test_target: torch.Size([1000])
Size of test_classes: torch.Size([1000, 2])


## CNN

In [35]:
import torch
from torch import nn
from torch.nn import functional as F

In [36]:
# We define a convolutional network that take batches of 2 images as input
# The input therfore is N * 2 * 14 * 14 
class ConvNet(nn.Module):
    def __init__(self, initial_layers=2, final_layers=2):
        super().__init__()

        # We define here the layers of the network
        self.conv1 = nn.Conv2d(initial_layers, 32, kernel_size=3)
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(256, 200)
        self.batchnorm3 = nn.BatchNorm1d(200)
        self.fc2 = nn.Linear(200, final_layers)
        self.batchnorm4 = nn.BatchNorm1d(final_layers)
        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.3)
        

    def forward(self, x):
        # Forward step of the network, we entwine activation functions with the previous layers and apply 
        # batch normalization and dropout
        x = self.dropout1(self.batchnorm1(F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))))
        x = self.dropout1(self.batchnorm2(F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))))
        x = self.dropout2(self.batchnorm3(F.relu(self.fc1(x.view(-1, 256)))))
        x = self.dropout2(self.batchnorm4(self.fc2(x)))
        return x

## CNN with Weight Sharing

##### We create a unique class for weight sharing, wether auxiliary loss is used or not

In [37]:
# We define a convolutional network that take batches of 2 images as input
# The input therfore is N * 2 * 14 * 14 
# It uses the previously defined CNN as a base and implement the weight
# sharing mechanism. It can output auxiliary losses if the corresponding 
# boolean is set to True.
class WeightSharing(nn.Module):
    def __init__(self, auxiliary_loss = False):
        super().__init__()
        # We need to use the previously defined CNN but with only 1 node
        # for the initialization layer and 10 nodes for the final layer
        # Those 10 final layers will then be merged and we will apply a 
        # few additional layers before making the final prediction
        self.sharedConvNet = ConvNet(initial_layers=1, final_layers=10)
        self.auxiliary_loss = auxiliary_loss

        # Those layers are here for the final prediction, once the 2 CNN
        # have been merged
        self.fc1 = nn.Linear(20,100)
        self.batchnorm1 = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100,20)
        self.batchnorm2 = nn.BatchNorm1d(20)
        self.fc3 = nn.Linear(20,2)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        # We split the input in 2 separate set of images
        image1 = x.narrow(1,0,1)
        image2 = x.narrow(1,1,1)

        # We call the CNN on thoses sets and merge the results
        output1 = self.sharedConvNet(image1)
        output2 = self.sharedConvNet(image2)
        outputCat = torch.cat((output1, output2), 1)

        # We apply our previously defined layers, as well as ReLu, batch 
        # normalization and dropout
        outputCat = self.dropout(self.batchnorm1(F.relu(self.fc1(outputCat))))
        outputCat = self.dropout(self.batchnorm2(F.relu(self.fc2(outputCat))))
        outputCat = self.fc3(outputCat)

        # If we want to use auxiliary losses we return the outputs of the 2 CNN
        if self.auxiliary_loss:
            return outputCat, output1, output2
        else:
            return outputCat

## Model training and number of errors

In [38]:
# This function trains a given model with the given datasets, mini_batch_size, lr, alpha and gamma.
# It prints the accumulated loss if asked to do so and returns it.
def train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = 5e-1,  alpha = 0.3, gamma = 0.5, auxiliary_loss = False, printAccLoss = False):
    losses = []
    nb_epochs = 25
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()

    for e in range(nb_epochs):
        acc_loss = 0

        # Loops on the mini batches
        for b in range(0, train_input.size(0), mini_batch_size):
            # If we want to use auxiliary loss, we compute the losses of the merged and separated data
            # and combine them with a weighted sum
            if auxiliary_loss:
                # Extracts the targets of the separated images
                trainTargetImage1 = (train_classes.narrow(1,0,1)).squeeze()
                trainTargetImage2 = (train_classes.narrow(1,1,1)).squeeze()

                outputCat, output1, output2 = model(train_input.narrow(0, b, mini_batch_size))

                lossCat = criterion(outputCat, train_target.narrow(0, b, mini_batch_size))
                loss1 = criterion(output1, trainTargetImage1.narrow(0, b, mini_batch_size))
                loss2 = criterion(output2, trainTargetImage2.narrow(0, b, mini_batch_size))

                loss = alpha*loss1 + alpha*loss2 + gamma*lossCat

            else:
                output = model(train_input.narrow(0, b, mini_batch_size))
                loss = criterion(output, train_target.narrow(0, b, mini_batch_size))

            # We stores the accumulated loss for future plots
            acc_loss += loss.item()
            losses.append(acc_loss)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if printAccLoss:
            print(e, acc_loss)

    return losses

# This function returns the number of errors made by the given model on the given dataset and mini_batch_size
def compute_nb_errors(model, input, target, classes, mini_batch_size, auxiliary_loss = False):
    nb_errors = 0

    # We plit the data in mini-batch
    for b in range(0, input.size(0), mini_batch_size):
        # While we don't need to compute loss here, we separate the auxiliary loss model because we need to
        # get rid of the unwanted returns
        if auxiliary_loss:
            output, _, _ = model(input.narrow(0, b, mini_batch_size))
        else:
            output = model(input.narrow(0, b, mini_batch_size))

        # We extract the predicted class and compare it to thr target to compute the number of errors    
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1

    return nb_errors

## Models testing

In [39]:
import time
import numpy as np

# This functions runs the model 10 times and prints the mean and std on the number of errors.
# It can also print the time for each run according to the corresponding boolean.
def test_model(model_name, print_times=False):
    train_error_ratios = []
    test_error_ratios = []

    auxiliary_loss = False
    number_of_runs = 10
    mini_batch_size = 50

    # Extracts the best parameters for the model
    bestLr, bestAlpha, bestGamma = tune_model(model_name)

    for run in range(number_of_runs):
        # For each run we create a new model to allow a random weight initialization
        if model_name == 'ConvNet':
            model = ConvNet()
        elif model_name == 'WeightSharing':
            model = WeightSharing()
        elif model_name == 'WeightSharingWithAuxiliaryLoss':
            auxiliary_loss=True
            model = WeightSharing(auxiliary_loss)
        else:
            raise ValueError('Please use one of the implemented methods: ConvNet, WeightSharing, WeightSharingWithAuxiliaryLoss')
        
        # We also extract a new random dataset
        train_input, train_target, _, test_input, test_target, _ = prologue.generate_pair_sets(number_of_pairs)

        # Model training
        if print_times: startTime = time.time()
        model.train(True)
        train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = bestLr,  alpha = bestAlpha, gamma = bestGamma, auxiliary_loss=auxiliary_loss)
        model.train(False)

        if print_times: endTime = time.time()

        # Extracts the number of errors made by the model
        nb_train_errors = compute_nb_errors(model, train_input, train_target, train_classes, mini_batch_size,auxiliary_loss=auxiliary_loss)
        nb_test_errors = compute_nb_errors(model, test_input, test_target, test_classes, mini_batch_size, auxiliary_loss=auxiliary_loss)

        # Divides the number of errors by the length of the dataset to get a percentage
        train_error_ratios.append(nb_train_errors/train_input.size(0))
        test_error_ratios.append(nb_test_errors/test_input.size(0))

        print('--- Run', run, '---')
        print('- train error ratio:', nb_train_errors/train_input.size(0))
        print('- test error ratio:', nb_test_errors/test_input.size(0))
        if print_times: print('- time for training:', str(endTime-startTime))

    print('----END----')
    print('Train error mean ratio:', np.mean(train_error_ratios))
    print("Train error ratio's std:", np.std(train_error_ratios))
    print('Test errors mean ratio:', np.mean(test_error_ratios))
    print("Train error ratio's std:", np.std(train_error_ratios))


## Model tuning

In [40]:
from torch import split

# This function returns the best paramaters for the given model name on an average of 5 rounds with the methode named grid search
def tune_model(model_name, number_of_rounds=5):
    auxiliary_loss=False
    mini_batch_size = 50
    
    # Diferent values for each parameter
    lrs = [5e-2, 1e-1, 5e-1, 1]
    alphas = [0.2, 0.5, 0.8, 1]
    gammas = [0.2, 0.5, 0.8, 1]

    # Stores the best parameter
    bestLr = 0
    bestAlpha = 0
    bestGamma = 0
    best_percentage_validation_errors = 1

    # We loop on every combination of parameters
    for lr in lrs:
        if model_name == 'WeightSharingWithAuxiliaryLoss':
            for alpha in alphas:
                for gamma in gammas:
                    percentage_validation_errors = []
                    for _ in range(number_of_rounds):
                        auxiliary_loss=True

                        # We generate a new model and dataset
                        model = WeightSharing(auxiliary_loss)
                        input, target, classes, _, _, _ = prologue.generate_pair_sets(number_of_pairs)
                        
                        # We split the datasets in training and validation input, target, class
                        train_input, validation_input = split(input, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])
                        train_target, validation_target = split(target, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])
                        train_classes, validation_classes = split(classes, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])
                        
                        # We train the model
                        model.train(True)
                        train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = lr, alpha = alpha, gamma = gamma, auxiliary_loss=auxiliary_loss)

                        # We compute the number of errors and append it to a list
                        model.train(False)
                        nb_validation_errors = compute_nb_errors(model, validation_input, validation_target, validation_classes, mini_batch_size, auxiliary_loss=auxiliary_loss)
                        percentage_validation_errors.append(nb_validation_errors/len(validation_input))

                    # If the mean of the number of errors on the 5 runs is better than what we had, we store the current parameters
                    if np.mean(percentage_validation_errors) < best_percentage_validation_errors:
                        best_percentage_validation_errors = np.mean(percentage_validation_errors)
                        bestLr = lr
                        bestAlpha = alpha
                        bestGamma = gamma
                        
        else:
            percentage_validation_errors = []
            for _ in range(number_of_rounds):
                # We generate a new model and dataset
                if model_name == 'ConvNet':
                    model = ConvNet()
                elif model_name == 'WeightSharing':
                    model = WeightSharing()
                else:
                    raise ValueError('Please use one of the implemented methods: ConvNet, WeightSharing, WeightSharingWithAuxiliaryLoss')
                input, target, classes, _, _, _ = prologue.generate_pair_sets(number_of_pairs)

                # We split the datasets in training and validation input, target, class
                train_input, validation_input = split(input, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])
                train_target, validation_target = split(target, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])
                train_classes, validation_classes = split(classes, [int(number_of_pairs*0.5), int(number_of_pairs*0.5)])

                # We train the model
                model.train(True)
                train_model(model, train_input, train_target, train_classes, mini_batch_size)

                # We compute the number of errors and append it to a list
                model.train(False)
                nb_validation_errors = compute_nb_errors(model, validation_input, validation_target, validation_classes, mini_batch_size)
                percentage_validation_errors.append(nb_validation_errors/len(validation_input))
            
            # If the mean of the number of errors on the 5 runs is better than what we had, we store the current parameters
            if np.mean(percentage_validation_errors) < best_percentage_validation_errors:
                best_percentage_validation_errors = np.mean(percentage_validation_errors)
                bestLr = lr

    print('--- Best parameters ---')
    print('lr:', bestLr, ', alpha:', bestAlpha, ', gamma:',bestGamma)
    print('Mean percentage error on validation set :', best_percentage_validation_errors)
    
    return bestLr, bestAlpha, bestGamma

    

### CNN

In [41]:
test_model('ConvNet')

--- Best parameters ---
lr: 0.1 , alpha: 0 , gamma: 0
Mean percentage error on validation set : 0.1988
--- Run 0 ---
- train error ratio: 0.0
- test error ratio: 0.162
--- Run 1 ---
- train error ratio: 0.001
- test error ratio: 0.159
--- Run 2 ---
- train error ratio: 0.001
- test error ratio: 0.181
--- Run 3 ---
- train error ratio: 0.001
- test error ratio: 0.166
--- Run 4 ---
- train error ratio: 0.001
- test error ratio: 0.184
--- Run 5 ---
- train error ratio: 0.0
- test error ratio: 0.178
--- Run 6 ---
- train error ratio: 0.002
- test error ratio: 0.172
--- Run 7 ---
- train error ratio: 0.0
- test error ratio: 0.157
--- Run 8 ---
- train error ratio: 0.003
- test error ratio: 0.159
--- Run 9 ---
- train error ratio: 0.001
- test error ratio: 0.173
----END----
Train error mean ratio: 0.0010000000000000002
Train error ratio's std: 0.000894427190999916
Test errors mean ratio: 0.1691
Train error ratio's std: 0.000894427190999916


### CNN with Weight Sharing

In [42]:
test_model('WeightSharing')

--- Best parameters ---
lr: 0.1 , alpha: 0 , gamma: 0
Mean percentage error on validation set : 0.15960000000000002
--- Run 0 ---
- train error ratio: 0.005
- test error ratio: 0.111
--- Run 1 ---
- train error ratio: 0.006
- test error ratio: 0.127
--- Run 2 ---
- train error ratio: 0.015
- test error ratio: 0.112
--- Run 3 ---
- train error ratio: 0.008
- test error ratio: 0.121
--- Run 4 ---
- train error ratio: 0.011
- test error ratio: 0.129
--- Run 5 ---
- train error ratio: 0.001
- test error ratio: 0.132
--- Run 6 ---
- train error ratio: 0.006
- test error ratio: 0.114
--- Run 7 ---
- train error ratio: 0.006
- test error ratio: 0.138
--- Run 8 ---
- train error ratio: 0.005
- test error ratio: 0.154
--- Run 9 ---
- train error ratio: 0.009
- test error ratio: 0.13
----END----
Train error mean ratio: 0.0072
Train error ratio's std: 0.0036276714294434104
Test errors mean ratio: 0.12679999999999997
Train error ratio's std: 0.0036276714294434104


### CNN with Weight Sharing and Auxiliary Loss

In [43]:
test_model('WeightSharingWithAuxiliaryLoss')

--- Best parameters ---
lr: 0.5 , alpha: 0.8 , gamma: 0.2
Mean percentage error on validation set : 0.0668
--- Run 0 ---
- train error ratio: 0.162
- test error ratio: 0.234
--- Run 1 ---
- train error ratio: 0.146
- test error ratio: 0.203
--- Run 2 ---
- train error ratio: 0.103
- test error ratio: 0.19
--- Run 3 ---
- train error ratio: 0.211
- test error ratio: 0.255
--- Run 4 ---
- train error ratio: 0.14
- test error ratio: 0.217
--- Run 5 ---
- train error ratio: 0.183
- test error ratio: 0.231
--- Run 6 ---
- train error ratio: 0.177
- test error ratio: 0.198
--- Run 7 ---
- train error ratio: 0.156
- test error ratio: 0.232
--- Run 8 ---
- train error ratio: 0.137
- test error ratio: 0.19
--- Run 9 ---
- train error ratio: 0.177
- test error ratio: 0.237
----END----
Train error mean ratio: 0.1592
Train error ratio's std: 0.028452767879417282
Test errors mean ratio: 0.21870000000000003
Train error ratio's std: 0.028452767879417282
