## Imports

In [37]:
import torch
from torch import nn
from torch.nn import functional as F
import time
from math import sqrt

import matplotlib.pyplot as plt

import dlc_practical_prologue

## Useful functions

In [4]:
def mean(x):
    return sum(x)/len(x)

def var(x):
    u = mean(x)
    return sum([(v-u)**2 for v in x])/len(x)

In [5]:
def number_parameters(model):   # Returns the number of parameter of a given model
    return sum(p.numel() for p in model.parameters())

## Load data

In [2]:
N = 1000

x_train, y_train, y_train_classes, x_test, y_test, y_test_classes = \
dlc_practical_prologue.generate_pair_sets(N)

assert x_train.shape == torch.Size([N, 2, 14, 14])    # float32
assert y_train.shape == torch.Size([N])               # int64
assert y_train_classes.shape == torch.Size([N, 2])    # int64

assert x_test.shape == torch.Size([N, 2, 14, 14])
assert y_test.shape == torch.Size([N])
assert y_test_classes.shape == torch.Size([N, 2])

In [3]:
# if torch.cuda.is_available():  
#     dev = "cuda:0"
# else:  
#     dev = "cpu"  
# device = torch.device(dev)
# x_train = x_train.to(device)
# y_train = y_train.to(device)
# y_train_classes = y_train_classes.to(device)
# x_test = x_test.to(device)
# y_test = y_test.to(device)
# y_test_classes = y_test_classes.to(device)

## Normalize input

In [46]:
tmp_list = torch.flatten(x_train).tolist()

x_train = x_train.sub(mean(tmp_list)).div(sqrt(var(tmp_list)))
x_test = x_test.sub(mean(tmp_list)).div(sqrt(var(tmp_list)))

## Define models

In [6]:
## Is a Multi Layer Perceptron, composed of fully connected linear layers.
## works well with lr=0.001  

class model_0(nn.Module):   
    def __init__(self, ns = [392,138,55,1]):
        super().__init__()
        layers = [nn.Linear(n_in, n_out) for n_in, n_out in zip(ns[:-1], ns[1:])]   
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        x = torch.flatten(x,1)          # flatten the 2 images in one vector (but keeping batch size)
        for layer in self.layers[:-1]:
            x = layer(x).relu()
            
        x = self.layers[-1](x) 
        x = torch.flatten(torch.sigmoid(x))
        return x
print(number_parameters(model_0()))

61935


In [7]:
## Is a convolutional neural network poorly paramztrized
## works well with lr=0.0001      

class model_1(nn.Module):                 
    def __init__(self):
        super(model_1, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)

        self.dense1 = nn.Linear(2304, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2) 
        
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.dense1(x))
        x = torch.sigmoid(self.dense2(x))
        return x

model_test = model_1()
model_test(x_train[0:2])
print(number_parameters(model_test))

46501


In [8]:
## Is a convolutional neural network more paramztrized
## works well with lr=0.0001      


class model_2(nn.Module):
    def __init__(self):
        super(model_2, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3)
        self.conv5 = nn.Conv2d(64, 64, kernel_size=3)

        self.dense1 = nn.Linear(1024, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.leaky_relu(self.conv4(x))
        x = F.leaky_relu(self.conv5(x))
        x = F.max_pool2d(x, kernel_size=2) 
        
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.dense1(x))
        x = torch.sigmoid(self.dense2(x))
        return x

model_test = model_2()
model_test(x_train[0:2])
print(number_parameters(model_test))

107557


In [9]:
class model_weight_sharing(nn.Module):
    def __init__(self):
        super(model_weight_sharing, self).__init__()        
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3, padding = 2)
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)       
        self.conv3 = nn.Conv2d(40, 60, kernel_size=3)       

        self.dense1 = nn.Linear(480, 40)                     
        self.dense2 = nn.Linear(40, 1)
        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.max_pool2d(self.conv2(d1), kernel_size=2)
        d1 = F.max_pool2d(self.conv3(d1), kernel_size=2)
        d1 = torch.flatten(d1, 1)
        
        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.max_pool2d(self.conv2(d2), kernel_size=2)
        d2 = F.max_pool2d(self.conv3(d2), kernel_size=2)
        d2 = torch.flatten(d2, 1)

        x = torch.cat((d1, d2), 1)
        x = F.leaky_relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x 
    
model_test = model_weight_sharing()
model_test(x_train[0:2])
print(number_parameters(model_test))

48381


In [10]:
## Is a convolutional neural network that splits the input in 2 images
##   and treats them identically and independantly 
## works well with lr=0.0001    


class model_1_ws(nn.Module):                 
    def __init__(self):
        super(model_1_ws, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)

        self.dense1 = nn.Linear(4608, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x1 = x[:, 0:1, ...]
        x2 = x[:, 1:2, ...]
        
        x1 = F.leaky_relu(self.conv1(x1))
        x1 = F.leaky_relu(self.conv2(x1))
        x1 = F.leaky_relu(self.conv3(x1))
        x1 = F.max_pool2d(x1, kernel_size=2)
        x1 = torch.flatten(x1, 1)
        
        x2 = F.leaky_relu(self.conv1(x2))
        x2 = F.leaky_relu(self.conv2(x2))
        x2 = F.leaky_relu(self.conv3(x2))
        x2 = F.max_pool2d(x2, kernel_size=2) 
        x2 = torch.flatten(x2, 1)
        
        x = torch.cat((x1, x2), 1)
        x = F.leaky_relu(self.dense1(torch.flatten(x, 1)))
        x = torch.sigmoid(self.dense2(x))
        return x
    
model_test = model_1_ws()
model_test(x_train[0:2])
print(number_parameters(model_test))

69397


In [11]:
## Is a convolutional neural network that splits the input in 2 images
#    and trat them identically and independantly 
#    additionally, predictions about digits are returned for auxiliary losses
## works well with lr=0.0001    

class model_weight_sharing_with_auxiliary_loss(nn.Module):
    def __init__(self):
        super(model_weight_sharing_with_auxiliary_loss, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=3)
        self.conv3 = nn.Conv2d(40, 40, kernel_size=3)
        
        #Treatment of the auxiliary outputs (classifier for both digits)
        self.dense1_digit = nn.Linear(40, 32)
        self.dense2_digit = nn.Linear(32, 10)
        
        #Tratment of the final output 
        self.dense1 = nn.Linear(80, 30)
        self.dense2 = nn.Linear(30, 1)
        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.max_pool2d(self.conv2(d1), kernel_size=2)
        d1 = F.max_pool2d(self.conv3(d1), kernel_size=2)
        d1 = torch.flatten(d1, 1)

        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.max_pool2d(self.conv2(d2), kernel_size=2)
        d2 = F.max_pool2d(self.conv3(d2), kernel_size=2)
        d2 = torch.flatten(d2, 1)
        
        x = torch.cat((d1, d2), 1)
        
        d1 = F.leaky_relu(self.dense1_digit(d1))
        d1 = F.softmax(self.dense2_digit(d1), -1)
        d2 = F.leaky_relu(self.dense1_digit(d2))
        d2 = F.softmax(self.dense2_digit(d2), -1)

        x = F.leaky_relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        
        return x, d1, d2 
    
print(number_parameters(model_weight_sharing_with_auxiliary_loss()))

25983


In [12]:
## Is a convolutional neural network more paramztrized
## works well with lr=0.0001      
# Makes sense for deep networks !!! (to avoid vanishing gradient) <TODO> with or without WS for classifier ??

class model_auxiliary_loss(nn.Module):
    def __init__(self):
        super(model_auxiliary_loss, self).__init__()
        self.conv1 = nn.Conv2d(2, 20, kernel_size=5, padding=2)        
        self.conv2 = nn.Conv2d(20, 40, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(40, 20, kernel_size=5, padding=2)
        self.conv4 = nn.Conv2d(20, 40, kernel_size=5, padding=2)
        
        self.dense1_ = nn.Linear(1960, 10)
        self.dense2_ = nn.Linear(10, 1)
        
        self.dense1 = nn.Linear(360, 10)
        self.dense2 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.max_pool2d(F.leaky_relu(self.conv2(x)), kernel_size=2)
        
        x_ = F.leaky_relu(self.dense1_(torch.flatten(x, 1)))       #For the auxiliary output
        x_ = torch.flatten(torch.sigmoid(self.dense2_(x_)))
        
        x = F.leaky_relu(self.conv3(x))
        x = F.max_pool2d(self.conv4(x), kernel_size=2)        
        
        x = F.leaky_relu(self.dense1(torch.flatten(x, 1)))
        x = torch.flatten(torch.sigmoid(self.dense2(x)))
        return x, x_

print(number_parameters(model_auxiliary_loss()))

84362


In [13]:
class model_1_aux(nn.Module):                 
    def __init__(self):
        super(model_1_aux, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)

        self.dense_final = nn.Linear(2304, 1)
        self.dense_d1 = nn.Linear(2304, 10)
        self.dense_d2 = nn.Linear(2304, 10)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.leaky_relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2) 
        
        x = torch.flatten(x, 1)
        
        d1 = torch.softmax(self.dense_d1(x), -1)
        d2 = torch.softmax(self.dense_d2(x), -1)
        x = torch.sigmoid(self.dense_final(x))
        
        return x, d1, d2

model_test = model_1_aux()
model_test(x_train[0:2])
print(number_parameters(model_test))

71845


In [23]:
class model_best(nn.Module):
    def __init__(self):
        super(model_best, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=2)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.dense1 = nn.Linear(9216, 64)
        self.dense2 = nn.Linear(64, 10)
        
        self.dense3 = nn.Linear(20, 1)

        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.leaky_relu(self.conv2(d1))
        d1 = F.leaky_relu(self.conv3(d1))
        d1 = torch.flatten(d1, 1)
        d1 = F.leaky_relu(self.dense1(d1))
        d1 = self.dense2(d1)

        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.leaky_relu(self.conv2(d2))
        d2 = F.leaky_relu(self.conv3(d2))
        d2 = torch.flatten(d2, 1)
        d2 = F.leaky_relu(self.dense1(d2))
        d2 = self.dense2(d2)
        
        x = F.leaky_relu(torch.cat((d1, d2), 1))
        
        d1 = torch.softmax(d1, -1)
        d2 = torch.softmax(d2, -1)
        x = torch.sigmoid(self.dense3(x))
        return x, d1, d2

model_test = model_best()
model_test(x_train[0:2])
print(number_parameters(model_test))

613855


## Training/Evaluation function

In [48]:
def train_model(model, train_input, train_target, digit_target,
                 epochs, optimizer,
                 loss_function = nn.BCELoss(), digit_loss = nn.CrossEntropyLoss(),
                 batch_size=10,
                 auxiliary = False):
    
    # Inspired by exercise corrige 
    model.train()
    losses = []
    
    for e in range(epochs):
        sum_loss = 0
        
        if auxiliary:
            sum_loss_digit1 = 0
            sum_loss_digit2 = 0
        
        for x_batch, y_batch, y_digit_batch in zip(x_train.split(batch_size),
                                                    y_train.split(batch_size),
                                                    digit_target.split(batch_size)):
            
            if auxiliary :
                output, d1, d2 = model(x_batch)

                loss_digit1 = digit_loss(d1, y_digit_batch[..., 0])
                loss_digit2 = digit_loss(d2, y_digit_batch[..., 1])
                loss_final  = loss_function(torch.flatten(output), y_batch.float())

                loss_total = loss_final + 0.4*(loss_digit1 + loss_digit2)
                
                sum_loss_digit1 += loss_digit1.item()
                sum_loss_digit2 += loss_digit2.item()
                
            else :
                output = torch.flatten(model(x_batch))
                loss_total = loss_function(output, y_batch.float())
                
            optimizer.zero_grad()
            loss_total.backward()  
            optimizer.step()

            sum_loss += loss_total.item()
            
        losses.append(sum_loss)
        
        if auxiliary :
            print("Epoch %i: loss %.3f --- loss_d1 %.2f --- loss_d2 %.2f" \
                  % (e,sum_loss, sum_loss_digit1, sum_loss_digit2), end="\r")  
        else :
            print("Epoch %i : loss %.3f" % (e, sum_loss), end = "\r")
    return losses

In [49]:
def evaluate_model(model, test_input, test_target, auxiliary = False):
    model.eval()
    
    with torch.no_grad():
        out = model(test_input) 
        if auxiliary:
            out = out[0]
        preds_proba = out.view(-1)   
        preds = preds_proba.masked_fill((preds_proba > 0.5), 1).masked_fill((preds_proba<0.5), 0)
        accuracy = (preds.float() == test_target.float()).sum().item()/preds.size(0)
    return accuracy

## Experiment parametrization

In [56]:
def experiment_model(model_name, build_model):
    ################################ TRAINING PARAMETERS ################################################ 

    number_training = 10
    epochs = 25

    accuracies = []
    times = []
    
    dummy_model = build_model()
    auxiliary = (type(dummy_model(x_train[0:1])) == tuple)

    ################################ TRAINING  ################################################ 

    print("Starting %i training of %i epochs, with model '%s' containing %i parameters. \n" % 
          (number_training, epochs, model_name, number_parameters(dummy_model)))

    del dummy_model
    
    for i_train in range(number_training):  
        start = time.time()

        model = build_model()
#         if torch.cuda.is_available():
#             model.cuda()
        
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001
                                    )
        indices_shuffle = torch.randperm(N)


        train_loss = train_model(model, x_train[indices_shuffle], y_train[indices_shuffle], y_train_classes[indices_shuffle],
                                 epochs = epochs, optimizer=optimizer, batch_size = 50, auxiliary = auxiliary)
        
        accuracy = evaluate_model(model, x_test, y_test, auxiliary = auxiliary)

        accuracy*=100
        print("Attempt", i_train + 1, "- accuracy %.2f%%"%accuracy)
        accuracies.append(accuracy)

        times.append(time.time() - start)

    print("\nExperiment results :")
    print("Accuracy mean : %.2f%% (%.1f)" % (mean(accuracies), var(accuracies)))
    print("Average training time : %.1f seconds (%.1f)\n\n" % (mean(times), var(times)))
    
    return accuracies

## Run experiments

In [51]:
# results = []
model_list = [('Simple Net', model_0),
             ('Auxiliary Loss', model_auxiliary_loss),
             ('Weight Sharing', model_weight_sharing),
             ('Auxiliary Loss + Weight Sharing', model_weight_sharing_with_auxiliary_loss)]

In [57]:
model_list = [('Model best', model_1)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

Starting 10 training of 25 epochs, with model 'Model best' containing 46501 parameters. 

Attempt 1 - accuracy 81.50%
Attempt 2 - accuracy 81.10%
Attempt 3 - accuracy 82.30%
Attempt4 : loss 0.990 4 - accuracy 82.00%
Attempt 5 - accuracy 83.00%
Attempt4 : loss 1.300 6 - accuracy 79.40%
Attempt 7 - accuracy 82.40%
Attempt 8 - accuracy 83.10%
Attempt 9 - accuracy 73.90%
Attempt 10 - accuracy 83.20%

Experiment results :
Accuracy mean : 81.19% (7.1)
Average training time : 13.3 seconds (5.0)




## Results comparision between models

In [None]:
for i, (result, model_params) in enumerate(results):
    plt.plot(result, label = model_params[0] + "_" + str(i))
    plt.legend()
    
plt.show()