In [2]:
import torch
import math
import dlc_practical_prologue as prologue
from torch import optim
from torch import Tensor
from torch import nn
from torch.nn import functional as F

In [3]:
# Generate the train and test sets.
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [4]:
# This model performs each digit classification with 2 different CNNs (so no weight sharing)        
class No_Weight_Sharing_Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Layers that handle digit classification with first CNN
        self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1_1 = nn.Linear(256, 200)
        self.fc2_1 = nn.Linear(200, 10)
        
        # Layers that handle digit classification with second CNN
        self.conv1_2 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1_2 = nn.Linear(256, 200)
        self.fc2_2 = nn.Linear(200, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 300)
        self.fc4 = nn.Linear(300, 300)
        self.fc5 = nn.Linear(300, 2)
        
    def cnn1(self, x):
        x = F.relu(F.max_pool2d(self.conv1_1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2_1(x), kernel_size=2))
        x = F.relu(self.fc1_1(x.view(-1, 256)))
        x = self.fc2_1(x)
        return x
    
    def cnn2(self, x):
        x = F.relu(F.max_pool2d(self.conv1_2(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2_2(x), kernel_size=2))
        x = F.relu(self.fc1_2(x.view(-1, 256)))
        x = self.fc2_2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])
        
        output_1 = self.cnn1(input_1)
        output_2 = self.cnn2(input_2)
        
        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return comparison   
    
# Model Definition 

        
class Simple_Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Layers that handle digit classification 
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 300)
        self.fc4 = nn.Linear(300, 300)
        self.fc5 = nn.Linear(300, 2)
        
    def cnn(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])
        
        output_1 = self.cnn(input_1)
        output_2 = self.cnn(input_2)
        
        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return comparison

class Auxiliary_Loss_Net_Optimized(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Layers that handle digit classification 
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4)
        self.fc1 = nn.Linear(64, 100)
        self.fc2 = nn.Linear(100, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 100)
        self.fc4 = nn.Linear(100, 100)
        self.fc5 = nn.Linear(100, 2)
        
    def cnn(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 64)))
        x = self.fc2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
                
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])

        
        output_1 = self.cnn(input_1)
        output_2 = self.cnn(input_2)
        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return output_1, output_2, comparison
    
class Auxiliary_Loss_Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Layers that handle digit classification 
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 300)
        self.fc4 = nn.Linear(300, 300)
        self.fc5 = nn.Linear(300, 2)
        
    def cnn(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
        
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])

        output_1 = self.cnn(input_1)
        output_2 = self.cnn(input_2)

        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return output_1, output_2, comparison
    
class Auxiliary_Loss_Dropout_Net(nn.Module):
    def __init__(self, dropout_rate):
        super().__init__()
        
        # Layers that handle digit classification 
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 300)
        self.fc4 = nn.Linear(300, 300)
        self.fc5 = nn.Linear(300, 2)
        self.dropout = nn.Dropout(dropout_rate)
        
    def cnn(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = self.dropout(x)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = self.dropout(x)
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
        
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])
        
        output_1 = self.cnn(input_1)
        output_2 = self.cnn(input_2)
        
        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return output_1, output_2, comparison

In [128]:
def train_model_simple_net(model, train_input, train_target, mini_batch_size, nb_epochs = 100, use_optimizer= None, _print=False):
    """Uses mseloss"""
    criterion = nn.MSELoss()
    eta = 1e-3
    if use_optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=eta)
    if use_optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            target = train_target.narrow(0, b, mini_batch_size).reshape(output.shape).float()
            
            loss = criterion(output, target)
            acc_loss = acc_loss + loss.item()
 
            model.zero_grad()
            loss.backward()
            
            if use_optimizer != None :
                optimizer.step()
            else :
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
        if _print:
            print(e, acc_loss)
            
def train_model_simple_net_2(model, train_input, train_target, mini_batch_size, nb_epochs = 100, use_optimizer= None, _print=False):
    """Uses cross entropy loss"""
    criterion = nn.CrossEntropyLoss()
    eta = 1e-3
    if use_optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=eta)
    if use_optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            target = train_target.narrow(0, b, mini_batch_size).long()
            loss = criterion(output, target)
            acc_loss = acc_loss + loss.item()
 
            model.zero_grad()
            loss.backward()
            
            if use_optimizer != None :
                optimizer.step()
            else :
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
        if _print:
            print(e, acc_loss)
            
def train_model_auxiliary_loss(model, train_input, train_target, train_classes, mini_batch_size, nb_epochs = 100, use_optimizer= None, _print=False):
    criterion_auxilary = nn.CrossEntropyLoss()
    criterion_final = nn.MSELoss()
    
    eta = 1e-3
    if use_optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=eta)
    if use_optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            digit_1, digit_2, comparison = model(train_input.narrow(0, b, mini_batch_size))
            
            target_comparison = train_target.narrow(0, b, mini_batch_size).reshape(comparison.shape).float()
            
            target_digit_1, target_digit_2 = train_classes.narrow(0, b, mini_batch_size)[:,0], train_classes.narrow(0, b, mini_batch_size)[:,1]
            loss1 = criterion_auxilary(digit_1, target_digit_1)
            loss2 = criterion_auxilary(digit_2, target_digit_2)
            loss3 = criterion_final(comparison, target_comparison)
            acc_loss = acc_loss + loss1.item() + loss2.item() + loss3.item()
 
            model.zero_grad()
            loss1.backward(retain_graph=True)
            loss2.backward(retain_graph=True)
            loss3.backward()
            
            if use_optimizer != None :
                optimizer.step()
            else :
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
        if _print :
            print(e, acc_loss)
        
def train_model_auxiliary_loss_2(model, train_input, train_target, train_classes, mini_batch_size, nb_epochs = 100, use_optimizer= None, _print=False):
    criterion_auxilary = nn.CrossEntropyLoss()
    criterion_final = nn.CrossEntropyLoss()
    
    eta = 1e-3
    if use_optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=eta)
    if use_optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            digit_1, digit_2, comparison = model(train_input.narrow(0, b, mini_batch_size))
            
            target_comparison = train_target.narrow(0, b, mini_batch_size).long()
            
            target_digit_1, target_digit_2 = train_classes.narrow(0, b, mini_batch_size)[:,0], train_classes.narrow(0, b, mini_batch_size)[:,1]
            loss1 = criterion_auxilary(digit_1, target_digit_1)
            loss2 = criterion_auxilary(digit_2, target_digit_2)
            loss3 = criterion_final(comparison, target_comparison)
            acc_loss = acc_loss + loss1.item() + loss2.item() + loss3.item()
 
            model.zero_grad()
            loss1.backward(retain_graph=True)
            loss2.backward(retain_graph=True)
            loss3.backward()
            
            if use_optimizer != None :
                optimizer.step()
            else :
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
        if _print :
            print(e, acc_loss)


def train_model_auxiliary_loss_optimized(model, train_input, train_target, train_classes, mini_batch_size, nb_epochs = 100, use_optimizer= None, _print=False):
    criterion_auxilary = nn.CrossEntropyLoss()
    criterion_final = nn.MSELoss()
    
    eta = 1e-3
    if use_optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=eta)
    if use_optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=eta)
    for e in range(nb_epochs):
        acc_loss = 0

        for b in range(0, train_input.size(0), mini_batch_size):
            digit_1, digit_2, comparison = model(train_input.narrow(0, b, mini_batch_size))

            target_comparison = train_target.narrow(0, b, mini_batch_size).reshape(comparison.shape).float()
            
            target_digit_1, target_digit_2 = train_classes.narrow(0, b, mini_batch_size)[:,0], train_classes.narrow(0, b, mini_batch_size)[:,1]
            loss1 = criterion_auxilary(digit_1, target_digit_1)
            loss2 = criterion_auxilary(digit_2, target_digit_2)
            loss3 = criterion_final(comparison, target_comparison)
            acc_loss = acc_loss + loss1.item() + loss2.item() + loss3.item()
 
            model.zero_grad()
            loss1.backward(retain_graph=True)
            loss2.backward(retain_graph=True)
            loss3.backward()
            
            if use_optimizer != None :
                optimizer.step()
            else :
                with torch.no_grad():
                    for p in model.parameters():
                        p -= eta * p.grad
        if _print :
            print(e, acc_loss)
        

In [9]:
def compute_nb_errors_simple_net(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

def compute_nb_errors_auxilary_loss(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        _, _, output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [18]:
model_total = Simple_Net()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_simple_net_2(model_total, train_input, train_target, mini_batch_size=250, nb_epochs=25, use_optimizer="adam")


test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
nb_test_errors = compute_nb_errors_simple_net(model_total, test_input, test_target_total, mini_batch_size=250)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

print("_________________________")

for i in range(10):
    input_to_test = test_input[i]
    first_label = test_classes[i][0]
    second_label = test_classes[i][1] 
    s = input_to_test.shape
    output = model_total(input_to_test.reshape([1,s[0], s[1], s[2]]))
    _, predicted_classes = output.max(1)
    print(f"Predicted : {first_label} {'>' if predicted_classes.item() == 0 else '<'} {second_label}")

test error Net 16.20% 162/1000
_________________________
Predicted : 0 < 7
Predicted : 4 > 1
Predicted : 8 < 7
Predicted : 2 < 9
Predicted : 5 < 4
Predicted : 0 < 0
Predicted : 7 < 6
Predicted : 1 < 9
Predicted : 6 < 8
Predicted : 1 < 7


In [40]:
model_total = Simple_Net()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_simple_net_2(model_total, train_input, train_target, mini_batch_size=250, nb_epochs=25, use_optimizer="adam")


test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
nb_test_errors = compute_nb_errors_simple_net(model_total, test_input, test_target_total, mini_batch_size=250)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

print("_________________________")

for i in range(10):
    input_to_test = test_input[i]
    first_label = test_classes[i][0]
    second_label = test_classes[i][1] 
    s = input_to_test.shape
    output = model_total(input_to_test.reshape([1,s[0], s[1], s[2]]))
    _, predicted_classes = output.max(1)
    print(f"Predicted : {first_label} {'>' if predicted_classes.item() == 0 else '<'} {second_label}")

test error Net 14.40% 144/1000
_________________________
Predicted : 4 > 0
Predicted : 2 > 2
Predicted : 7 > 0
Predicted : 7 > 2
Predicted : 1 < 7
Predicted : 9 > 0
Predicted : 6 > 0
Predicted : 8 > 1
Predicted : 7 > 2
Predicted : 6 < 7


In [94]:
model_auxiliary = Auxiliary_Loss_Net()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_auxiliary_loss(model_auxiliary, train_input, train_target_one_hot, train_classes, mini_batch_size=250, nb_epochs=25, use_optimizer="adam")

test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
nb_test_errors = compute_nb_errors_auxilary_loss(model_auxiliary, test_input, test_target_total, mini_batch_size=250)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

test error Net 11.10% 111/1000


In [114]:
model_auxiliary = Auxiliary_Loss_Net()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_auxiliary_loss_optimized(model_auxiliary, train_input, train_target_one_hot, train_classes, mini_batch_size=250, nb_epochs=25, use_optimizer="adam")

test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
nb_test_errors = compute_nb_errors_auxilary_loss(model_auxiliary, test_input, test_target_total, mini_batch_size=250)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

b 0
train input narrow torch.Size([250, 2, 14, 14])
input 1 torch.Size([250, 1, 14, 14])
input 2 torch.Size([250, 1, 14, 14])
conv 1 torch.Size([250, 32, 6, 6])
conv 2 torch.Size([250, 64, 2, 2])
fct 1 torch.Size([250, 200])
fc 2 torch.Size([250, 10])
conv 1 torch.Size([250, 32, 6, 6])
conv 2 torch.Size([250, 64, 2, 2])
fct 1 torch.Size([250, 200])
fc 2 torch.Size([250, 10])
output 1 torch.Size([250, 10])
output 2 torch.Size([250, 10])
digit 1 torch.Size([250, 10])
digit 2 torch.Size([250, 10])
comparision torch.Size([250, 2])
b 250
train input narrow torch.Size([250, 2, 14, 14])
input 1 torch.Size([250, 1, 14, 14])
input 2 torch.Size([250, 1, 14, 14])
conv 1 torch.Size([250, 32, 6, 6])
conv 2 torch.Size([250, 64, 2, 2])
fct 1 torch.Size([250, 200])
fc 2 torch.Size([250, 10])
conv 1 torch.Size([250, 32, 6, 6])
conv 2 torch.Size([250, 64, 2, 2])
fct 1 torch.Size([250, 200])
fc 2 torch.Size([250, 10])
output 1 torch.Size([250, 10])
output 2 torch.Size([250, 10])
digit 1 torch.Size([250, 

KeyboardInterrupt: 

In [143]:
model_auxiliary = Auxiliary_Loss_Net_Optimized()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_auxiliary_loss_optimized(model_auxiliary, train_input, train_target_one_hot, train_classes, mini_batch_size=250, nb_epochs=25, use_optimizer="adam")

test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
nb_test_errors = compute_nb_errors_auxilary_loss(model_auxiliary, test_input, test_target_total, mini_batch_size=250)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

test error Net 14.10% 141/1000


In [20]:
# Benchmark of the basic network with Adam optimizer
nb_trials = 10
N = 1000
performances = []
for trial in range(nb_trials):
    
    # Generate Data 
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)
    train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
    test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
    
    # Define the model 
    model_total = Simple_Net()
    
    # Train the model
    train_model_simple_net(model_total, train_input, train_target_one_hot, mini_batch_size=250, 
                      nb_epochs=25, use_optimizer="adam")
    
    # Evaluate performances 
    nb_test_errors = compute_nb_errors_simple_net(model_total, test_input, test_target_total, mini_batch_size=250)
    print('test error Net {:d} {:0.2f}% {:d}/{:d}'.format(trial, (100 * nb_test_errors) / test_input.size(0),
                                                          nb_test_errors, test_input.size(0)))
    performances.append(nb_test_errors)
    
mean_perf = 100 * sum(performances) / (N * nb_trials)

print(f"Average precision of this architecture {mean_perf}")

test error Net 0 18.90% 189/1000
test error Net 1 19.10% 191/1000
test error Net 2 17.70% 177/1000
test error Net 3 15.00% 150/1000
test error Net 4 18.70% 187/1000
test error Net 5 18.90% 189/1000
test error Net 6 18.90% 189/1000
test error Net 7 19.10% 191/1000
test error Net 8 17.90% 179/1000
test error Net 9 18.40% 184/1000
Average precision of this architecture 18.26


In [35]:
"""
model_total = Simple_Net()

train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
train_model_simple_net_2(model_total, train_input, train_target,
                        mini_batch_size=250, nb_epochs=25, use_optimizer="adam")


test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
"""
# Benchmark of the basic network with Adam optimizer for second version
nb_trials = 10
N = 1000
performances = []
for trial in range(nb_trials):
    
    # Generate Data 
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)
    train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
    test_target_total = prologue.convert_to_one_hot_labels(test_input, test_target)
    
    # Define the model 
    model_total = Simple_Net()

    # Train the model
    train_model_simple_net_2(model_total, train_input, train_target, mini_batch_size=250, 
                      nb_epochs=25, use_optimizer="adam")
    
    # Evaluate performances 
    nb_test_errors = compute_nb_errors_simple_net(model_total, test_input, test_target_total, mini_batch_size=250)
    print('test error Net {:d} {:0.2f}% {:d}/{:d}'.format(trial, (100 * nb_test_errors) / test_input.size(0),
                                                          nb_test_errors, test_input.size(0)))
    performances.append(nb_test_errors)
    
mean_perf = 100 * sum(performances) / (N * nb_trials)
print(f"Average precision of this architecture {mean_perf}")

test error Net 0 16.00% 160/1000
test error Net 1 16.70% 167/1000
test error Net 2 17.80% 178/1000
test error Net 3 15.30% 153/1000
test error Net 4 17.00% 170/1000
test error Net 5 15.30% 153/1000
test error Net 6 16.10% 161/1000
test error Net 7 17.20% 172/1000
test error Net 8 17.00% 170/1000
test error Net 9 15.40% 154/1000
Average precision of this architecture 16.38


In [22]:
def benchmark_model(model, train_function, evaluate_function, nb_trials=20, N=1000, mini_batch_size=250, nb_epochs=25, model_requires_target_and_classes=False, one_hot_train_target=True, _print=False):
    # Benchmark of the basic network with Adam optimizer
    performances = []
    for trial in range(nb_trials):

        # Generate Data 
        train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)
        if one_hot_train_target:
            train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
        else :
            train_target_one_hot = train_target
        test_target_one_hot = prologue.convert_to_one_hot_labels(test_input, test_target)

        # Define the model 
        model_total = model()

        # Train the model
        if model_requires_target_and_classes : 
            train_function(model_total, train_input, train_target_one_hot, train_classes, mini_batch_size=mini_batch_size,
                           nb_epochs=nb_epochs, use_optimizer="adam", _print=_print)
        else :
            train_function(model_total, train_input, train_target_one_hot, mini_batch_size=mini_batch_size,
                           nb_epochs=nb_epochs, use_optimizer="adam", _print=_print)

        # Evaluate performances 
        nb_test_errors = evaluate_function(model_total, test_input, test_target_one_hot, mini_batch_size=mini_batch_size)
        print('test error Net trial {:d} {:0.2f}% {:d}/{:d}'.format(trial, (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))
        performances.append(nb_test_errors)

    mean_perf = 100 * sum(performances) / (N * nb_trials)
    print(f"Average precision of this architecture {mean_perf}%")
    
    std_dev = math.sqrt(sum(list(map(lambda x : x - mean_perf,performances))))/nb_trials
    print(f"With standard deviation of  {std_dev}")

In [23]:
print("Benchmark of the model with no Weight Sharing")
benchmark_model(No_Weight_Sharing_Net, train_model_simple_net, compute_nb_errors_simple_net)

Benchmark of the model with no Weight Sharing
test error Net trial 0 21.90% 219/1000
test error Net trial 1 19.80% 198/1000
test error Net trial 2 20.60% 206/1000
test error Net trial 3 20.50% 205/1000
test error Net trial 4 25.70% 257/1000
test error Net trial 5 14.00% 140/1000
test error Net trial 6 19.40% 194/1000
test error Net trial 7 20.00% 200/1000
test error Net trial 8 21.80% 218/1000
test error Net trial 9 18.60% 186/1000
test error Net trial 10 17.70% 177/1000
test error Net trial 11 19.20% 192/1000
test error Net trial 12 19.00% 190/1000
test error Net trial 13 21.00% 210/1000
test error Net trial 14 16.10% 161/1000
test error Net trial 15 21.80% 218/1000
test error Net trial 16 17.20% 172/1000
test error Net trial 17 24.10% 241/1000
test error Net trial 18 19.00% 190/1000
test error Net trial 19 20.40% 204/1000
Average precision of this architecture 19.89%
With standard deviation of  2.9917386249470397


In [24]:
print("Benchmark of the model with no Weight Sharing CrossEntropyLoss")
benchmark_model(No_Weight_Sharing_Net, train_model_simple_net_2, compute_nb_errors_simple_net, one_hot_train_target=False)

Benchmark of the model with no Weight Sharing CrossEntropyLoss
test error Net trial 0 15.60% 156/1000
test error Net trial 1 17.50% 175/1000
test error Net trial 2 16.00% 160/1000
test error Net trial 3 16.40% 164/1000
test error Net trial 4 16.90% 169/1000
test error Net trial 5 15.80% 158/1000
test error Net trial 6 16.80% 168/1000
test error Net trial 7 17.10% 171/1000
test error Net trial 8 17.30% 173/1000
test error Net trial 9 17.40% 174/1000
test error Net trial 10 16.60% 166/1000
test error Net trial 11 17.60% 176/1000
test error Net trial 12 18.40% 184/1000
test error Net trial 13 18.10% 181/1000
test error Net trial 14 18.40% 184/1000
test error Net trial 15 20.90% 209/1000
test error Net trial 16 18.50% 185/1000
test error Net trial 17 17.90% 179/1000
test error Net trial 18 16.00% 160/1000
test error Net trial 19 15.50% 155/1000
Average precision of this architecture 17.235%
With standard deviation of  2.7849147204178437


In [27]:
print("Benchmark of the model with Weight Sharing MSE")
benchmark_model(Simple_Net, train_model_simple_net, compute_nb_errors_simple_net)

print("")
print("Benchmark of the model with Weight Sharing CrossEntropyLoss")
benchmark_model(Simple_Net, train_model_simple_net_2, compute_nb_errors_simple_net, one_hot_train_target=False)

Benchmark of the model with Weight Sharing MSE
test error Net trial 0 17.80% 178/1000
test error Net trial 1 17.20% 172/1000
test error Net trial 2 19.90% 199/1000
test error Net trial 3 18.40% 184/1000
test error Net trial 4 14.60% 146/1000
test error Net trial 5 19.70% 197/1000
test error Net trial 6 17.20% 172/1000
test error Net trial 7 14.00% 140/1000
test error Net trial 8 16.60% 166/1000
test error Net trial 9 16.50% 165/1000
test error Net trial 10 19.10% 191/1000
test error Net trial 11 14.40% 144/1000
test error Net trial 12 22.50% 225/1000
test error Net trial 13 17.00% 170/1000
test error Net trial 14 18.50% 185/1000
test error Net trial 15 18.20% 182/1000
test error Net trial 16 16.30% 163/1000
test error Net trial 17 17.40% 174/1000
test error Net trial 18 17.30% 173/1000
test error Net trial 19 17.10% 171/1000
Average precision of this architecture 17.485%
With standard deviation of  2.8050401066651434

Benchmark of the model with Weight Sharing CrossEntropyLoss
test err

In [50]:
print("Benchmark of the model with Weight Sharing and an auxiliary loss MSE")
benchmark_model(Auxiliary_Loss_Net, train_model_auxiliary_loss, compute_nb_errors_auxilary_loss, model_requires_target_and_classes=True)
print("Benchmark of the model with Weight Sharing and an auxiliary loss Cross Entropy Loss")
benchmark_model(Auxiliary_Loss_Net, train_model_auxiliary_loss_2, compute_nb_errors_auxilary_loss, model_requires_target_and_classes=True, one_hot_train_target=False)

Benchmark of the model with Weight Sharing and an auxiliary loss MSE
test error Net trial 0 10.20% 102/1000
test error Net trial 1 10.40% 104/1000
test error Net trial 2 11.00% 110/1000
test error Net trial 3 7.90% 79/1000
test error Net trial 4 11.40% 114/1000
test error Net trial 5 10.40% 104/1000
test error Net trial 6 10.90% 109/1000
test error Net trial 7 8.50% 85/1000
test error Net trial 8 12.50% 125/1000
test error Net trial 9 9.30% 93/1000
test error Net trial 10 10.00% 100/1000
test error Net trial 11 11.70% 117/1000
test error Net trial 12 10.30% 103/1000
test error Net trial 13 8.30% 83/1000
test error Net trial 14 10.30% 103/1000
test error Net trial 15 11.30% 113/1000
test error Net trial 16 11.20% 112/1000
test error Net trial 17 10.20% 102/1000
test error Net trial 18 9.50% 95/1000
test error Net trial 19 10.80% 108/1000
Average precision of this architecture 10.305%
With standard deviation of  2.1534275005209715
Benchmark of the model with Weight Sharing and an auxilia

In [67]:
def benchmark_dropout_model(model, rate, train_function, evaluate_function, nb_trials=20, N=1000, mini_batch_size=250, nb_epochs=25, model_requires_target_and_classes=False, one_hot_train_target=True, _print=False):
    performances = []
    for trial in range(nb_trials):

        # Generate Data 
        train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)
        if one_hot_train_target:
            train_target_one_hot = prologue.convert_to_one_hot_labels(train_input, train_target)
        else :
            train_target_one_hot = train_target
        test_target_one_hot = prologue.convert_to_one_hot_labels(test_input, test_target)

        # Define the model 
        model_total = model(rate)

        # Train the model
        if model_requires_target_and_classes : 
            train_function(model_total, train_input, train_target_one_hot, train_classes, mini_batch_size=mini_batch_size,
                           nb_epochs=nb_epochs, use_optimizer="adam", _print=_print)
        else :
            train_function(model_total, train_input, train_target_one_hot, mini_batch_size=mini_batch_size,
                           nb_epochs=nb_epochs, use_optimizer="adam", _print=_print)

        # Evaluate performances 
        nb_test_errors = evaluate_function(model_total, test_input, test_target_one_hot, mini_batch_size=mini_batch_size)
        print('test error Net trial {:d} {:0.2f}% {:d}/{:d}'.format(trial, (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))
        performances.append(nb_test_errors)

    mean_perf = 100 * sum(performances) / (N * nb_trials)
    print(f"Average precision of this architecture {mean_perf}%")
    
    std_dev = math.sqrt(sum(list(map(lambda x : x - mean_perf,performances))))/nb_trials
    print(f"With standard deviation of  {std_dev}")

In [68]:
for i in dropout_rate:
    print(f"Weight sharing, MSE Loss, dropout {i}")
    benchmark_dropout_model(Auxiliary_Loss_Dropout_Net, i, train_model_auxiliary_loss_2, compute_nb_errors_auxilary_loss, nb_trials=10, model_requires_target_and_classes=True, one_hot_train_target=False)
    print(f"Weight sharing, cross entropy loss, dropout {i}")
    benchmark_dropout_model(Auxiliary_Loss_Dropout_Net, i, train_model_auxiliary_loss_2, compute_nb_errors_auxilary_loss, nb_trials=10, model_requires_target_and_classes=True, one_hot_train_target=False)
    

Weight sharing, MSE Loss, dropout 0.0
test error Net trial 0 10.80% 108/1000
test error Net trial 1 10.30% 103/1000
test error Net trial 2 8.40% 84/1000
test error Net trial 3 11.40% 114/1000
test error Net trial 4 11.80% 118/1000
test error Net trial 5 11.10% 111/1000
test error Net trial 6 10.10% 101/1000
test error Net trial 7 11.30% 113/1000
test error Net trial 8 12.60% 126/1000
test error Net trial 9 12.20% 122/1000
Average precision of this architecture 11.0%
With standard deviation of  3.146426544510455
Weight sharing, cross entropy loss, dropout 0.0
test error Net trial 0 10.80% 108/1000
test error Net trial 1 9.40% 94/1000
test error Net trial 2 10.50% 105/1000
test error Net trial 3 10.00% 100/1000
test error Net trial 4 12.00% 120/1000
test error Net trial 5 12.00% 120/1000
test error Net trial 6 10.80% 108/1000
test error Net trial 7 10.30% 103/1000
test error Net trial 8 9.50% 95/1000
test error Net trial 9 10.40% 104/1000
Average precision of this architecture 10.57%
Wit

In [71]:
from torchsummary import summary
model = No_Weight_Sharing_Net()

summary(model)

ModuleNotFoundError: No module named 'torchsummary'

In [8]:
class Auxiliary_Loss_Net_Optimized_2(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Layers that handle digit classification 
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 10)
        
        # Layers that handle comparisson 
        self.fc3 = nn.Linear(20, 128)
        self.fc4 = nn.Linear(128, 128)
        self.fc5 = nn.Linear(128, 2)
        
    def cnn(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x
    
    def mlp(self, x):
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x
    
    def forward(self, x):
        s = x.shape
                
        input_1 = x[:,0,:,:].reshape([s[0],1,s[2],s[3]])
        input_2 = x[:,1,:,:].reshape([s[0],1,s[2],s[3]])

        
        output_1 = self.cnn(input_1)
        output_2 = self.cnn(input_2)
        concatenated = torch.cat((output_1, output_2), 1)
        
        comparison = self.mlp(concatenated)
        return output_1, output_2, comparison

def compute_nb_parameters(model, name = "Model"):
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    pytorch_total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f'{name} has {pytorch_total_params} parameters. {pytorch_total_trainable_params} of them are trainable')
    
compute_nb_parameters(Auxiliary_Loss_Net_Optimized_2())

Model has 72460 parameters. 72460 of them are trainable
