# Project 1


## Description

## Data import

In [110]:
import dlc_practical_prologue as prologue

# We import 1000 pairs of digits for the training and the testing inputs, targets and classes
number_of_pairs = 1000 
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets( 
    number_of_pairs)

# Quick verification of the sizes:
print('Size of train_input:', train_input.size())
print('Size of train_target:', test_target.size())
print('Size of train_classes:', test_classes.size())
print('Size of test_input:', test_input.size())
print('Size of test_target:', test_target.size())
print('Size of test_classes:', test_classes.size())



Size of train_input: torch.Size([1000, 2, 14, 14])
Size of train_target: torch.Size([1000])
Size of train_classes: torch.Size([1000, 2])
Size of test_input: torch.Size([1000, 2, 14, 14])
Size of test_target: torch.Size([1000])
Size of test_classes: torch.Size([1000, 2])


## CNN

In [111]:
import torch
from torch import nn
from torch.nn import functional as F

In [112]:
class ConvNet(nn.Module):
    def __init__(self, nb_hidden=200, initial_layers=2, final_layers=2):
        super().__init__()
        self.conv1 = nn.Conv2d(initial_layers, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, final_layers)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

## CNN with Weight Sharing

#### We define the same class wether we use auxiliary loss or not

In [113]:
class WeightSharing(nn.Module):
    def __init__(self, auxiliary_loss = False):
        super().__init__()
        self.sharedConvNet = ConvNet(initial_layers=1, final_layers=10)
        self.auxiliary_loss = auxiliary_loss

        self.fc1 = nn.Linear(20,100)
        self.fc2 = nn.Linear(100,20)
        self.fc3 = nn.Linear(20,2)
    
    def forward(self, x):
        image1 = x.narrow(1,0,1)
        image2 = x.narrow(1,1,1)

        output1 = self.sharedConvNet(image1)
        output2 = self.sharedConvNet(image2)
        outputCat = torch.cat((output1, output2), 1)

        outputCat = F.relu(self.fc1(outputCat))
        outputCat = F.relu(self.fc2(outputCat))
        outputCat = self.fc3(outputCat)

        
        if self.auxiliary_loss:
            return outputCat, output1, output2
        else:
            return outputCat

## Model training and number of errors

In [114]:
def train_model(model, train_input, train_target, mini_batch_size, nb_epochs = 50, printAccLoss = False, auxiliary_loss = False, alpha = 0.3, beta = 0.5):
    criterion = nn.CrossEntropyLoss()
    eta = 1e-1

    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            if auxiliary_loss:
                trainTargetImage1 = (train_classes.narrow(1,0,1)).squeeze()
                trainTargetImage2 = (train_classes.narrow(1,1,1)).squeeze()

                outputCat, output1, output2 = model(train_input.narrow(0, b, mini_batch_size))
                lossCat = criterion(outputCat, train_target.narrow(0, b, mini_batch_size))
                loss1 = criterion(output1, trainTargetImage1.narrow(0, b, mini_batch_size))
                loss2 = criterion(output2, trainTargetImage2.narrow(0, b, mini_batch_size))

                loss = alpha*loss1 + alpha*loss2 + beta*lossCat

            else:
                output = model(train_input.narrow(0, b, mini_batch_size))
                loss = criterion(output, train_target.narrow(0, b, mini_batch_size))

            acc_loss = acc_loss + loss.item()

            model.zero_grad()
            loss.backward()

            with torch.no_grad():
                for p in model.parameters():
                    p -= eta * p.grad

        if printAccLoss:
            print(e, acc_loss)

def compute_nb_errors(model, input, target, mini_batch_size, auxiliary_loss = False):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        if auxiliary_loss:
            outputCat, _, _ = model(input.narrow(0, b, mini_batch_size))
            _, predicted_classes = outputCat.max(1)
            for k in range(mini_batch_size):
                if target[b + k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1
        else:
            output = model(input.narrow(0, b, mini_batch_size))
            _, predicted_classes = output.max(1)
            for k in range(mini_batch_size):
                if target[b + k] != predicted_classes[k]:
                    nb_errors = nb_errors + 1


    return nb_errors

## Models testing

In [115]:
import time
mini_batch_size = 50
number_of_runs = 10

### CNN

In [116]:
for k in range(number_of_runs):
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(number_of_pairs)
    startTime = time.time()
    model = ConvNet()
    midTime = time.time()
    train_model(model, train_input, train_target, mini_batch_size)
    endTime = time.time()
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0), nb_test_errors, test_input.size(0)))
    print('Time for model:', str(midTime-startTime) + ', time for training:', endTime-midTime)

test error Net 44.50% 445/1000
Time for model: 0.0019998550415039062, time for training: 4.278506517410278
test error Net 55.20% 552/1000
Time for model: 0.0020008087158203125, time for training: 4.29086446762085
test error Net 44.70% 447/1000
Time for model: 0.0020017623901367188, time for training: 4.339862823486328
test error Net 44.10% 441/1000
Time for model: 0.001998424530029297, time for training: 4.524110794067383
test error Net 43.10% 431/1000
Time for model: 0.0020003318786621094, time for training: 4.432950496673584
test error Net 56.70% 567/1000
Time for model: 0.0010001659393310547, time for training: 4.378861904144287
test error Net 42.70% 427/1000
Time for model: 0.0020008087158203125, time for training: 4.3442223072052
test error Net 44.90% 449/1000
Time for model: 0.0019998550415039062, time for training: 4.294865369796753
test error Net 45.10% 451/1000
Time for model: 0.0019998550415039062, time for training: 4.459066867828369
test error Net 53.50% 535/1000
Time for m

### CNN with Weight Sharing

In [117]:
for k in range(number_of_runs):
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(number_of_pairs)
    startTime = time.time()
    model = WeightSharing()
    midTime = time.time()
    train_model(model, train_input, train_target, mini_batch_size)
    endTime = time.time()
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0), nb_test_errors, test_input.size(0)))
    print('Time for model:', str(midTime-startTime) + ', time for training:', endTime-midTime)

test error Net 54.90% 549/1000
Time for model: 0.001999378204345703, time for training: 9.065340995788574
test error Net 56.80% 568/1000
Time for model: 0.002000570297241211, time for training: 9.872064590454102
test error Net 42.20% 422/1000
Time for model: 0.002000093460083008, time for training: 9.534501314163208
test error Net 44.40% 444/1000
Time for model: 0.0019989013671875, time for training: 10.395581007003784
test error Net 56.50% 565/1000
Time for model: 0.001999378204345703, time for training: 9.296208143234253
test error Net 47.50% 475/1000
Time for model: 0.0020003318786621094, time for training: 9.911978006362915
test error Net 55.10% 551/1000
Time for model: 0.003000974655151367, time for training: 8.804882287979126
test error Net 56.30% 563/1000
Time for model: 0.002000570297241211, time for training: 9.042134046554565
test error Net 53.10% 531/1000
Time for model: 0.002000093460083008, time for training: 9.699244260787964
test error Net 46.70% 467/1000
Time for model:

### CNN with Weight Sharing and Auxiliary Loss

In [118]:
for k in range(number_of_runs):
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(number_of_pairs)
    startTime = time.time()
    model = WeightSharing(auxiliary_loss=True)
    midTime = time.time()
    train_model(model, train_input, train_target, mini_batch_size, auxiliary_loss=True)
    endTime = time.time()
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size, auxiliary_loss=True)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0), nb_test_errors, test_input.size(0)))
    print('Time for model:', str(midTime-startTime) + ', time for training:', endTime-midTime)

test error Net 52.90% 529/1000
Time for model: 0.0020008087158203125, time for training: 8.667993307113647
test error Net 53.70% 537/1000
Time for model: 0.00299835205078125, time for training: 10.951169967651367
test error Net 54.70% 547/1000
Time for model: 0.002000093460083008, time for training: 11.293402194976807
test error Net 57.00% 570/1000
Time for model: 0.0030007362365722656, time for training: 10.639906883239746
test error Net 54.70% 547/1000
Time for model: 0.003000020980834961, time for training: 9.984194040298462
test error Net 60.50% 605/1000
Time for model: 0.003000974655151367, time for training: 10.346497058868408
test error Net 53.10% 531/1000
Time for model: 0.0009999275207519531, time for training: 9.057891845703125
test error Net 54.20% 542/1000
Time for model: 0.0020012855529785156, time for training: 9.096790313720703
test error Net 55.90% 559/1000
Time for model: 0.003000020980834961, time for training: 9.236965417861938
test error Net 54.10% 541/1000
Time for