# Project 1


## Description

## Data import

In [236]:
import dlc_practical_prologue as prologue

# We import 1000 pairs of digits for the training and the testing inputs, targets and classes
number_of_pairs = 1000 
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets( 
    number_of_pairs)

# Quick verification of the sizes:
print('Size of train_input:', train_input.size())
print('Size of train_target:', train_target.size())
print('Size of train_classes:', train_classes.size())
print('Size of test_input:', test_input.size())
print('Size of test_target:', test_target.size())
print('Size of test_classes:', test_classes.size())

Size of train_input: torch.Size([1000, 2, 14, 14])
Size of train_target: torch.Size([1000])
Size of train_classes: torch.Size([1000, 2])
Size of test_input: torch.Size([1000, 2, 14, 14])
Size of test_target: torch.Size([1000])
Size of test_classes: torch.Size([1000, 2])


## CNN

In [237]:
import torch
from torch import nn
from torch.nn import functional as F

In [238]:
class ConvNet(nn.Module):
    def __init__(self, initial_layers=2, final_layers=2):
        super().__init__()
        self.conv1 = nn.Conv2d(initial_layers, 32, kernel_size=3)
        self.batchnorm1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(256, 200)
        self.batchnorm3 = nn.BatchNorm1d(200)
        self.fc2 = nn.Linear(200, final_layers)
        self.batchnorm4 = nn.BatchNorm1d(final_layers)
        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.3)
        

    def forward(self, x):
        x = self.dropout1(self.batchnorm1(F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))))
        x = self.dropout1(self.batchnorm2(F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))))
        x = self.dropout2(self.batchnorm3(F.relu(self.fc1(x.view(-1, 256)))))
        x = self.dropout2(self.batchnorm4(self.fc2(x)))
        return x

## CNN with Weight Sharing

#### We define the same class wether we use auxiliary loss or not

In [239]:
class WeightSharing(nn.Module):
    def __init__(self, auxiliary_loss = False):
        super().__init__()
        self.sharedConvNet = ConvNet(initial_layers=1, final_layers=10)
        self.auxiliary_loss = auxiliary_loss

        self.fc1 = nn.Linear(20,100)
        self.batchnorm1 = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100,20)
        self.batchnorm2 = nn.BatchNorm1d(20)
        self.fc3 = nn.Linear(20,2)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        image1 = x.narrow(1,0,1)
        image2 = x.narrow(1,1,1)

        output1 = self.sharedConvNet(image1)
        output2 = self.sharedConvNet(image2)
        outputCat = torch.cat((output1, output2), 1)

        outputCat = self.dropout(self.batchnorm1(F.relu(self.fc1(outputCat))))
        outputCat = self.dropout(self.batchnorm2(F.relu(self.fc2(outputCat))))
        outputCat = self.fc3(outputCat)

        
        if self.auxiliary_loss:
            return outputCat, output1, output2
        else:
            return outputCat

## Model training and number of errors

In [240]:
def train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = 5e-1,  alpha = 0.3, gamma = 0.5, printAccLoss = False, auxiliary_loss = False):
    criterion = nn.CrossEntropyLoss()
    nb_epochs = 25
    optimizer = torch.optim.SGD(model.parameters(), lr = lr)

    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            if auxiliary_loss:
                trainTargetImage1 = (train_classes.narrow(1,0,1)).squeeze()
                trainTargetImage2 = (train_classes.narrow(1,1,1)).squeeze()

                outputCat, output1, output2 = model(train_input.narrow(0, b, mini_batch_size))
                lossCat = criterion(outputCat, train_target.narrow(0, b, mini_batch_size))
                loss1 = criterion(output1, trainTargetImage1.narrow(0, b, mini_batch_size))
                loss2 = criterion(output2, trainTargetImage2.narrow(0, b, mini_batch_size))

                loss = alpha*loss1 + alpha*loss2 + gamma*lossCat

            else:
                output = model(train_input.narrow(0, b, mini_batch_size))
                loss = criterion(output, train_target.narrow(0, b, mini_batch_size))

            acc_loss = acc_loss + loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


        if printAccLoss:
            print(e, acc_loss)

def compute_nb_errors(model, input, target, mini_batch_size, auxiliary_loss = False):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        if auxiliary_loss:
            outputCat, _, _ = model(input.narrow(0, b, mini_batch_size))
            _, predicted_classes = outputCat.max(1)
            for k in range(mini_batch_size):
                if target[b + k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1
        else:
            output = model(input.narrow(0, b, mini_batch_size))
            _, predicted_classes = output.max(1)
            for k in range(mini_batch_size):
                if target[b + k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1


    return nb_errors

## Models testing

In [241]:
import time
import numpy as np

mini_batch_size = 50

def test_model(model_name, print_times=False, print_accuracy=False):
    train_error_ratios = []
    test_error_ratios = []
    auxiliary_loss = False
    number_of_runs = 10

    for run in range(number_of_runs):
        if model_name == 'ConvNet':
            model = ConvNet()
        elif model_name == 'WeightSharing':
            model = WeightSharing()
        elif model_name == 'WeightSharingWithAuxiliaryLoss':
            auxiliary_loss=True
            model = WeightSharing(auxiliary_loss)
        else:
            raise ValueError('Please use one of the implemented methods: ConvNet, WeightSharing, WeightSharingWithAuxiliaryLoss')
            
        train_input, train_target, _, test_input, test_target, _ = prologue.generate_pair_sets(number_of_pairs)

        if print_times: startTime = time.time()
        model.train(True)
        train_model(model, train_input, train_target, train_classes, mini_batch_size, auxiliary_loss=auxiliary_loss)
        model.train(False)

        if print_times: endTime = time.time()

        nb_train_errors = compute_nb_errors(model, train_input, train_target, mini_batch_size,auxiliary_loss=auxiliary_loss)
        nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size, auxiliary_loss=auxiliary_loss)
        train_error_ratios.append(nb_train_errors/train_input.size(0))
        test_error_ratios.append(nb_test_errors/test_input.size(0))

        if print_accuracy:
            print('--- Run', run, '---')
            print('- train error ratio:', nb_train_errors/train_input.size(0))
            print('- test error ratio:', nb_test_errors/test_input.size(0))
        if print_times: print('- time for training:', str(endTime-startTime))
    if print_accuracy: 
        print('----END----')
        print('Train error mean ratio:', np.mean(train_error_ratios))
        print('Test errors mean ratio:', np.mean(test_error_ratios))

    return train_error_ratios, test_error_ratios


## Model tuning

In [242]:
def tune_model(model_name):
    auxiliary_loss=False
    if model_name == 'ConvNet':
        model = ConvNet()
    elif model_name == 'WeightSharing':
        model = WeightSharing()
    elif model_name == 'WeightSharingWithAuxiliaryLoss':
        auxiliary_loss=True
        model = WeightSharing(auxiliary_loss)
    else:
        raise ValueError('Please use one of the implemented methods: ConvNet, WeightSharing, WeightSharingWithAuxiliaryLoss')
    
    lrs = []
    alphas = []
    gammas = []
    for lr in lrs:
        if model_name == 'WeightSharingWithAuxiliaryLoss':
            for alpha in alphas:
                for gamma in gammas:
                    train_input, train_target, train_classes, _, _, _ = prologue.generate_pair_sets(number_of_pairs)
                    model.train(True)
                    train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = lr, alpha = alpha, gamma = gamma)
                    model.train(False)
        else:
            train_input, train_target, train_classes, _, _, _ = prologue.generate_pair_sets(number_of_pairs)
            model.train(True)
            train_model(model, train_input, train_target, train_classes, mini_batch_size, lr = lr)
            model.train(False)

    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size, auxiliary_loss=auxiliary_loss)

### CNN

In [243]:
train_error_ratios, test_error_ratios = test_model('ConvNet', print_accuracy=True)

--- Run 0 ---
- train error ratio: 0.007
- test error ratio: 0.155


KeyboardInterrupt: 

### CNN with Weight Sharing

In [244]:
train_error_ratios, test_error_ratios = test_model('WeightSharing', print_accuracy=True)

--- Run 0 ---
- train error ratio: 0.006
- test error ratio: 0.116
--- Run 1 ---
- train error ratio: 0.006
- test error ratio: 0.112
--- Run 2 ---
- train error ratio: 0.011
- test error ratio: 0.132
--- Run 3 ---
- train error ratio: 0.001
- test error ratio: 0.122
--- Run 4 ---
- train error ratio: 0.009
- test error ratio: 0.117
--- Run 5 ---
- train error ratio: 0.002
- test error ratio: 0.108
--- Run 6 ---
- train error ratio: 0.013
- test error ratio: 0.131
--- Run 7 ---
- train error ratio: 0.003
- test error ratio: 0.101
--- Run 8 ---
- train error ratio: 0.013
- test error ratio: 0.144
--- Run 9 ---
- train error ratio: 0.018
- test error ratio: 0.12
----END----
Train error mean ratio: 0.0082
Test errors mean ratio: 0.12029999999999999


### CNN with Weight Sharing and Auxiliary Loss

In [245]:
train_error_ratios, test_error_ratios = test_model('WeightSharingWithAuxiliaryLoss', print_accuracy=True)

--- Run 0 ---
- train error ratio: 0.011
- test error ratio: 0.149
--- Run 1 ---
- train error ratio: 0.008
- test error ratio: 0.129
--- Run 2 ---
- train error ratio: 0.005
- test error ratio: 0.146
--- Run 3 ---
- train error ratio: 0.006
- test error ratio: 0.116
--- Run 4 ---
- train error ratio: 0.012
- test error ratio: 0.144
--- Run 5 ---
- train error ratio: 0.013
- test error ratio: 0.143
--- Run 6 ---
- train error ratio: 0.007
- test error ratio: 0.105
--- Run 7 ---
- train error ratio: 0.021
- test error ratio: 0.131
--- Run 8 ---
- train error ratio: 0.012
- test error ratio: 0.146
--- Run 9 ---
- train error ratio: 0.007
- test error ratio: 0.14
----END----
Train error mean ratio: 0.0102
Test errors mean ratio: 0.13489999999999996
