In [1]:
import torch
import matplotlib.pyplot as plt
import dlc_practical_prologue as prologue
from torch import nn
from torch.nn import functional as F

## Generating the dataset

In [2]:
nbr_pairs = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(nbr_pairs)

print('train_input size =', train_input.size())
print('train_target size =', train_target.size()) #The boolean telling if the two pairs are the same or not 
print('train_classes size =', train_classes.size())
print('test_input size =', test_input.size())
print('test_target size =', test_target.size())
print('test_classes size =', test_classes.size())

train_input size = torch.Size([1000, 2, 14, 14])
train_target size = torch.Size([1000])
train_classes size = torch.Size([1000, 2])
test_input size = torch.Size([1000, 2, 14, 14])
test_target size = torch.Size([1000])
test_classes size = torch.Size([1000, 2])


## Utilities

### Train Model

In [3]:
def train_model_NOaux(model, train_input, train_target, nb_epochs, batch_size, criterion, eta): 
    optimizer = torch.optim.Adam(model.parameters(), lr = eta)
    for e in range(nb_epochs):
        if (e % 10 == 0 and e > 0):
            eta = eta/10
            optimizer = torch.optim.Adam(model.parameters(), lr = eta)
        for step_ in range(0,train_input.size(0),batch_size):                              
            output = model(train_input[step_:step_+batch_size])
            loss = criterion(output, train_target[step_:step_+batch_size])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def train_model_aux(model, train_input, train_target, nb_epochs, batch_size, criterion, eta, lambda_):   
    optimizer = torch.optim.Adam(model.parameters(), lr = eta)
    for e in range(nb_epochs):
        if (e % 10 == 0 and e > 0):
            eta = eta/10
            optimizer = torch.optim.Adam(model.parameters(), lr = eta)
        for step_ in range(0,train_input.size(0),batch_size):                              
            output_target, output_im1, output_im2 = model(train_input[step_:step_+batch_size])
            loss_target = criterion(output_target, train_target[step_:step_+batch_size])
            loss_im1 = criterion(output_im1, train_classes[step_:step_+batch_size,0])
            loss_im2 = criterion(output_im2, train_classes[step_:step_+batch_size,1])
            loss = loss_target + lambda_*(loss_im1 + loss_im2)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

### Number of error

In [4]:
def compute_nb_errors_NOaux(model, data_input, data_target, mini_batch_size): 
    nb_errors = 0
    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1
    return nb_errors

def compute_nb_errors_aux(model, data_input, data_target, mini_batch_size): 
    nb_errors = 0
    for b in range(0, data_input.size(0), mini_batch_size):
        output,_,_ = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1
    return nb_errors

## Architectures

### Shallow

In [5]:
class Shallow_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x = torch.cat([x_1, x_2],1)
        x = self.fc2(x)       
        return x
    
class Shallow_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            fc_image.append(x1)
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc2(x)       
        return x
    
class Shallow_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        
        aux1 = F.softmax(self.fc_aux1(x1),1)
        aux2 = F.softmax(self.fc_aux2(x2),1)
        
        x = torch.cat([x1, x2],1)
        x = self.fc2(x)       
        return x, aux1, aux2
    
class Shallow_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            fc_image.append(x1)
        
        aux1 = F.softmax(self.fc_aux1(fc_image[0]),1)
        aux2 = F.softmax(self.fc_aux2(fc_image[1]),1)
        
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc2(x)       
        return x, aux1, aux2      

### MLP

In [6]:
class MLP_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        self.fc2_1 = nn.Linear(hidden,hidden)
        self.fc2_2 = nn.Linear(hidden,hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x1_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x2_1 = self.act_fun(self.fc2_1(x1_1))
        x2_2 = self.act_fun(self.fc2_2(x1_2))
        x = torch.cat([x2_1, x2_2],1)
        x = self.fc3(x)       
        return x

class MLP_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        self.fc2 = nn.Linear(hidden,hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            x2 = self.act_fun(self.fc2(x1))
            fc_image.append(x2)
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc3(x)       
        return x
    
class MLP_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        self.fc2_1 = nn.Linear(hidden,hidden)
        self.fc2_2 = nn.Linear(hidden,hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x1_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x2_1 = self.act_fun(self.fc2_1(x1_1))
        x2_2 = self.act_fun(self.fc2_2(x1_2))
        
        aux1 = F.softmax(self.fc_aux1(x2_1),1)
        aux2 = F.softmax(self.fc_aux2(x2_2),1)
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.fc3(x)       
        return x, aux1, aux2
    
class MLP_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        self.fc2 = nn.Linear(hidden,hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            x2 = self.act_fun(self.fc2(x1))
            fc_image.append(x2)
            
        aux1 = F.softmax(self.fc_aux1(fc_image[0]),1)
        aux2 = F.softmax(self.fc_aux2(fc_image[1]),1)
        
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc3(x)       
        return x, aux1, aux2

### Deep 1

In [7]:
class Deep_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_2 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(F.max_pool2d(self.conv1_1(x[:,0,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_1 = self.act_fun(F.max_pool2d(self.conv2_1(x1_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(F.max_pool2d(self.conv1_2(x[:,1,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_2 = self.act_fun(F.max_pool2d(self.conv2_2(x1_2), kernel_size=2, stride=2))
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x

class Deep_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            first_conv = self.act_fun(F.max_pool2d(self.conv1(x[:,image,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
            conv_images.append(self.act_fun(F.max_pool2d(self.conv2(first_conv), kernel_size=2, stride=2)))
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x
    
class Deep_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_2 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(256, 10)
        self.fc_aux2 = nn.Linear(256, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(F.max_pool2d(self.conv1_1(x[:,0,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_1 = self.act_fun(F.max_pool2d(self.conv2_1(x1_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(F.max_pool2d(self.conv1_2(x[:,1,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_2 = self.act_fun(F.max_pool2d(self.conv2_2(x1_2), kernel_size=2, stride=2))

        aux1 = F.softmax(self.fc_aux1(x2_1.view(-1,256)),1)
        aux2 = F.softmax(self.fc_aux2(x2_2.view(-1,256)),1)
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x, aux1, aux2
    
class Deep_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(256, 10)
        self.fc_aux2 = nn.Linear(256, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            first_conv = self.act_fun(F.max_pool2d(self.conv1(x[:,image,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
            conv_images.append(self.act_fun(F.max_pool2d(self.conv2(first_conv), kernel_size=2, stride=2)))
            
        aux1 = F.softmax(self.fc_aux1(conv_images[0].view(-1,256)),1)
        aux2 = F.softmax(self.fc_aux2(conv_images[1].view(-1,256)),1)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x, aux1, aux2

### Deep 2

In [8]:
class Deep_NOsharing_NOaux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_NOaux2, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv1_2 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2_1 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv2_2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4_1 = nn.Conv2d(64, 128, kernel_size=2)
        self.conv4_2 = nn.Conv2d(64, 128, kernel_size=2)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.conv1_1(x[:,0,:,:].view(100,1,14,14)))
        x2_1 = self.act_fun(self.conv2_1(x1_1))
        x3_1 = self.act_fun(self.conv3_2(x2_1))
        x4_1 = self.act_fun(F.max_pool2d(self.conv4_1(x3_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(self.conv1_2(x[:,1,:,:].view(100,1,14,14)))
        x2_2 = self.act_fun(self.conv2_2(x1_2))
        x3_2 = self.act_fun(self.conv3_2(x2_2))
        x4_2 = self.act_fun(F.max_pool2d(self.conv4_2(x3_2), kernel_size=2, stride=2))

        x = torch.cat([x4_1, x4_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x

class Deep_sharing_NOaux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_NOaux2, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            x1 = self.act_fun(self.conv1(x[:,image,:,:].view(100,1,14,14)))
            x2 = self.act_fun(self.conv2(x1))
            x3 = self.act_fun(self.conv3(x2))
            x4 = self.act_fun(F.max_pool2d(self.conv4(x3), kernel_size=2, stride=2))
            conv_images.append(x4)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x
    
class Deep_NOsharing_aux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_aux2, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv1_2 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2_1 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv2_2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4_1 = nn.Conv2d(64, 128, kernel_size=2)
        self.conv4_2 = nn.Conv2d(64, 128, kernel_size=2)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(512, 10)
        self.fc_aux2 = nn.Linear(512, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.conv1_1(x[:,0,:,:].view(100,1,14,14)))
        x2_1 = self.act_fun(self.conv2_1(x1_1))
        x3_1 = self.act_fun(self.conv3_2(x2_1))
        x4_1 = self.act_fun(F.max_pool2d(self.conv4_1(x3_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(self.conv1_2(x[:,1,:,:].view(100,1,14,14)))
        x2_2 = self.act_fun(self.conv2_2(x1_2))
        x3_2 = self.act_fun(self.conv3_2(x2_2))
        x4_2 = self.act_fun(F.max_pool2d(self.conv4_2(x3_2), kernel_size=2, stride=2))

        aux1 = F.softmax(self.fc_aux1(x4_1.view(-1,512)),1)
        aux2 = F.softmax(self.fc_aux2(x4_2.view(-1,512)),1)
        
        x = torch.cat([x4_1, x4_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x, aux1, aux2
    
class Deep_sharing_aux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_aux2, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(512, 10)
        self.fc_aux2 = nn.Linear(512, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            x1 = self.act_fun(self.conv1(x[:,image,:,:].view(100,1,14,14)))
            x2 = self.act_fun(self.conv2(x1))
            x3 = self.act_fun(self.conv3(x2))
            x4 = self.act_fun(F.max_pool2d(self.conv4(x3), kernel_size=2, stride=2))
            conv_images.append(x4)
            
        aux1 = F.softmax(self.fc_aux1(conv_images[0].view(-1,512)),1)
        aux2 = F.softmax(self.fc_aux2(conv_images[1].view(-1,512)),1)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x, aux1, aux2

## Models generation

In [9]:
def predictions(input_, target, hidden_units, eta, lambda_, model_type = 'Shallow', sharing = False, 
                aux = False,  nb_epochs = 25, mini_batch_size = 100, criterion = nn.CrossEntropyLoss()):
    if(model_type == 'Shallow'):
        if(sharing):
            if(aux): model = Shallow_sharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = Shallow_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        else:
            if(aux): model = Shallow_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = Shallow_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'MLP'):
        if(sharing):
            if(aux): model = MLP_sharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = MLP_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        else:
            if(aux): model = MLP_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = MLP_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'Deep1'):
        if(sharing):
            if(aux): model = Deep_sharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = Deep_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        else:
            if(aux): model = Deep_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
            else: model = Deep_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'Deep2'):
        if(sharing):
            if(aux): model = Deep_sharing_aux2(hidden = hidden_units, act_fun = F.relu)
            else: model = Deep_sharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
        else:
            if(aux): model = Deep_NOsharing_aux2(hidden = hidden_units, act_fun = F.relu)
            else: model = Deep_NOsharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
                
    if(aux): 
        train_model_aux(model, input_[:700], target[:700], nb_epochs, mini_batch_size, criterion, eta, lambda_)
        accuracy = 1 - compute_nb_errors_aux(model, input_[700:], target[700:], mini_batch_size)/len(target[700:])
    else: 
        train_model_NOaux(model, input_[:700], target[:700], nb_epochs, mini_batch_size, criterion, eta)
        accuracy = 1 - compute_nb_errors_NOaux(model, input_[700:], target[700:], mini_batch_size)/len(target[700:])
    
    return accuracy, model

In [15]:
def create_dict():
    results = {'Shallow':{'NOsharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'sharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'NOsharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1},
                        'sharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1}},
               'MLP':{'NOsharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'sharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'NOsharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1},
                        'sharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1}},  
               'Deep1':{'NOsharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'sharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'NOsharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1},
                        'sharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1}},
               'Deep2':{'NOsharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'sharing_NOaux':{'Acc': 1, 'eta': 1, 'hidden': 1},
                        'NOsharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1},
                        'sharing_aux':{'Acc': 1, 'eta': 1, 'hidden': 1, 'lambda': 1}}  
              }
    return results

def fill_results(results, type_model, sharing_flag, aux_flag, acc, eta, hidden, lambda_):
    if(sharing_flag):
        if(aux_flag):
            results[type_model]['sharing_aux']['Acc'] = acc
            results[type_model]['sharing_aux']['eta'] = eta
            results[type_model]['sharing_aux']['hidden'] = hidden
            results[type_model]['sharing_aux']['lambdas'] = lambda_
        else: 
            results[type_model]['sharing_NOaux']['Acc'] = acc
            results[type_model]['sharing_NOaux']['eta'] = eta
            results[type_model]['sharing_NOaux']['hidden'] = hidden
            results[type_model]['sharing_NOaux']['lambdas'] = lambda_
    else:
        if(aux_flag):
            results[type_model]['NOsharing_aux']['Acc'] = acc
            results[type_model]['NOsharing_aux']['eta'] = eta
            results[type_model]['NOsharing_aux']['hidden'] = hidden
            results[type_model]['NOsharing_aux']['lambdas'] = lambda_
        else: 
            results[type_model]['NOsharing_NOaux']['Acc'] = acc
            results[type_model]['NOsharing_NOaux']['eta'] = eta
            results[type_model]['NOsharing_NOaux']['hidden'] = hidden
            results[type_model]['NOsharing_NOaux']['lambdas'] = lambda_
    return results

def grid_search_(lambdas, etas, hidden_units, train_input, train_target, test_input, test_target):
    type_models = ['Shallow', 'MLP', 'Deep1', 'Deep2']
    sharing_flags = [True, False]
    aux_flags = [True, False]
    acc_test = torch.zeros(len(type_models),len(sharing_flags),len(aux_flags))
    #results = create_dict()
    
    i = 0

    for t, type_model in enumerate(type_models):
        for s, sharing_flag in enumerate(sharing_flags):
            for a, aux_flag in enumerate(aux_flags):
                i += 1
                print(i, '/ 16')
                print('Training architecture...')
                performances = torch.zeros(len(lambdas),len(hidden_units),len(etas))
                for l, lambda_ in enumerate(lambdas):
                    for h, hidden in enumerate(hidden_units):
                        for e, eta in enumerate(etas):
                            acc, _ = predictions(train_input, train_target, hidden.item(), eta.item(), 
                                              lambda_.item(), model_type = type_model, 
                                              sharing = sharing_flag, aux = aux_flag)
                            performances[l,h,e] = acc
                best_performance = torch.max(performances)
                best_idx = (performances == best_performance).nonzero();
                
                best_eta = etas[best_idx[0,2]].item()
                best_hidden = hidden_units[best_idx[0,1]].item()
                best_lambda = lambdas[best_idx[0,0]].item()
                
                #results = fill_results(results, type_model, sharing_flag, aux_flag, 
                #                       best_performance.item(), best_eta, 
                #                       best_hidden, best_lambda)
                
                print('Testing architecture...')
                _, model = predictions(train_input, train_target, best_hidden, best_eta, best_lambda, 
                                      model_type = type_model, sharing = sharing_flag, aux = aux_flag)
                
                if(aux_flag):
                    acc_test[t,s,a] = 1 - compute_nb_errors_aux(model, test_input, test_target, 100)/len(test_target)
                else: 
                    acc_test[t,s,a] = 1 - compute_nb_errors_NOaux(model, test_input, test_target, 100)/len(test_target)
    return acc_test

In [16]:
lambdas = torch.tensor([0.5])
etas = torch.tensor([1e-3])
hidden_units = torch.tensor([10])

acc_test = grid_search_(lambdas, etas, hidden_units, train_input, train_target, test_input, test_target)


1 / 16
Training architecture...
Testing architecture...
2 / 16
Training architecture...
Testing architecture...
3 / 16
Training architecture...
Testing architecture...
4 / 16
Training architecture...
Testing architecture...
5 / 16
Training architecture...
Testing architecture...
6 / 16
Training architecture...
Testing architecture...
7 / 16
Training architecture...
Testing architecture...
8 / 16
Training architecture...
Testing architecture...
9 / 16
Training architecture...
Testing architecture...
10 / 16
Training architecture...
Testing architecture...
11 / 16
Training architecture...
Testing architecture...
12 / 16
Training architecture...
Testing architecture...
13 / 16
Training architecture...
Testing architecture...
14 / 16
Training architecture...
Testing architecture...
15 / 16
Training architecture...
Testing architecture...
16 / 16
Training architecture...
Testing architecture...


In [17]:
print(acc_test)

tensor([[[0.7370, 0.7450],
         [0.7080, 0.7380]],

        [[0.7090, 0.7200],
         [0.7050, 0.7460]],

        [[0.5260, 0.5260],
         [0.4740, 0.8070]],

        [[0.8480, 0.5260],
         [0.4740, 0.5260]]])


In [None]:
mini_batch_size = 100
nb_epochs = 25
criterion = nn.CrossEntropyLoss()

eta = 0.001
hidden_units = 300

# Shallow
model_shallow_NOsharing_NOaux = Shallow_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_shallow_sharing_NOaux = Shallow_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_shallow_NOsharing_aux = Shallow_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
model_shallow_sharing_aux = Shallow_sharing_aux(hidden = hidden_units, act_fun = F.relu)

#MLP
model_MLP_NOsharing_NOaux = MLP_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_MLP_sharing_NOaux = MLP_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_MLP_NOsharing_aux = MLP_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
model_MLP_sharing_aux = MLP_sharing_aux(hidden = hidden_units, act_fun = F.relu)

#Deep1
model_deep_NOsharing_NOaux = Deep_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_deep_sharing_NOaux = Deep_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
model_deep_NOsharing_aux = Deep_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
model_deep_sharing_aux = Deep_sharing_aux(hidden = hidden_units, act_fun = F.relu)

#Deep2
model_deep_NOsharing_NOaux2 = Deep_NOsharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
model_deep_sharing_NOaux2 = Deep_sharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
model_deep_NOsharing_aux2 = Deep_NOsharing_aux2(hidden = hidden_units, act_fun = F.relu)
model_deep_sharing_aux2 = Deep_sharing_aux2(hidden = hidden_units, act_fun = F.relu)

## Models training

In [16]:
#Shallow
train_model_NOaux(model_shallow_NOsharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_NOaux(model_shallow_sharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_aux(model_shallow_NOsharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)
train_model_aux(model_shallow_sharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)

#MLP
train_model_NOaux(model_MLP_NOsharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_NOaux(model_MLP_sharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_aux(model_MLP_NOsharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)
train_model_aux(model_MLP_sharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)

#Deep1
train_model_NOaux(model_deep_sharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_NOaux(model_deep_NOsharing_NOaux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_aux(model_deep_NOsharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)
train_model_aux(model_deep_sharing_aux, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)

#Deep2
train_model_NOaux(model_deep_sharing_NOaux2, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_NOaux(model_deep_NOsharing_NOaux2, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta)
train_model_aux(model_deep_NOsharing_aux2, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)
train_model_aux(model_deep_sharing_aux2, train_input, train_target,nb_epochs, mini_batch_size, criterion, eta, 0.5)


## Model evaluation

In [18]:
#Shallow
train_acc_shallow_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_shallow_NOsharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_shallow_sharing_NOaux  = 1 - compute_nb_errors_NOaux(model_shallow_sharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_shallow_NOsharing_aux  = 1 - compute_nb_errors_aux(model_shallow_NOsharing_aux, train_input, train_target, mini_batch_size)
train_acc_shallow_sharing_aux  = 1 - compute_nb_errors_aux(model_shallow_sharing_aux, train_input, train_target, mini_batch_size)

#MLP
train_acc_MLP_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_MLP_NOsharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_MLP_sharing_NOaux = 1 - compute_nb_errors_NOaux(model_MLP_sharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_MLP_NOsharing_aux = 1 - compute_nb_errors_aux(model_MLP_NOsharing_aux, train_input, train_target, mini_batch_size)
train_acc_MLP_sharing_aux = 1 - compute_nb_errors_aux(model_MLP_sharing_aux, train_input, train_target, mini_batch_size)

#Deep1
train_acc_deep_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_deep_NOsharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_deep_sharing_NOaux = 1 - compute_nb_errors_NOaux(model_deep_sharing_NOaux, train_input, train_target, mini_batch_size)
train_acc_deep_NOsharing_aux = 1 - compute_nb_errors_aux(model_deep_NOsharing_aux, train_input, train_target, mini_batch_size)
train_acc_deep_sharing_aux = 1 - compute_nb_errors_aux(model_deep_sharing_aux, train_input, train_target, mini_batch_size)

#Deep2
train_acc_deep_NOsharing_NOaux2 = 1 - compute_nb_errors_NOaux(model_deep_NOsharing_NOaux2, train_input, train_target, mini_batch_size)
train_acc_deep_sharing_NOaux2 = 1 - compute_nb_errors_NOaux(model_deep_sharing_NOaux2, train_input, train_target, mini_batch_size)
train_acc_deep_NOsharing_aux2 = 1 - compute_nb_errors_aux(model_deep_NOsharing_aux2, train_input, train_target, mini_batch_size)
train_acc_deep_sharing_aux2 = 1 - compute_nb_errors_aux(model_deep_sharing_aux2, train_input, train_target, mini_batch_size)

print('Train performance shallow NOsharing NOaux = ', train_acc_shallow_NOsharing_NOaux)
print('Train performance shallow sharing Noaux = ', train_acc_shallow_sharing_NOaux)
print('Train performance shallow NOsharing aux = ', train_acc_shallow_NOsharing_aux)
print('Train performance shallow sharing aux = ', train_acc_shallow_sharing_aux)

print('Train performance MLP NOsharing NOaux = ', train_acc_MLP_NOsharing_NOaux)
print('Train performance MLP sharing Noaux = ', train_acc_MLP_sharing_NOaux)
print('Train performance MLP NOsharing aux = ', train_acc_MLP_NOsharing_aux)
print('Train performance MLP sharing aux = ', train_acc_MLP_sharing_aux)

print('Train performance Deep NOsharing NOaux = ', train_acc_deep_NOsharing_NOaux)
print('Train performance Deep sharing Noaux = ', train_acc_deep_sharing_NOaux)
print('Train performance Deep NOsharing aux = ', train_acc_deep_NOsharing_aux)
print('Train performance Deep sharing aux = ', train_acc_deep_sharing_aux)

print('Train performance Deep NOsharing NOaux 2 = ', train_acc_deep_NOsharing_NOaux2)
print('Train performance Deep sharing Noaux 2 = ', train_acc_deep_sharing_NOaux2)
print('Train performance Deep NOsharing aux 2 = ', train_acc_deep_NOsharing_aux2)
print('Train performance Deep sharing aux 2 = ', train_acc_deep_sharing_aux2)

#Shallow
test_acc_shallow_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_shallow_NOsharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_shallow_sharing_NOaux = 1 - compute_nb_errors_NOaux(model_shallow_sharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_shallow_NOsharing_aux = 1 - compute_nb_errors_aux(model_shallow_NOsharing_aux, test_input, test_target, mini_batch_size)
test_acc_shallow_sharing_aux = 1 - compute_nb_errors_aux(model_shallow_sharing_aux, test_input, test_target, mini_batch_size)

#MLP
test_acc_MLP_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_MLP_NOsharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_MLP_sharing_NOaux = 1 - compute_nb_errors_NOaux(model_MLP_sharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_MLP_NOsharing_aux = 1 - compute_nb_errors_aux(model_MLP_NOsharing_aux, test_input, test_target, mini_batch_size)
test_acc_MLP_sharing_aux = 1 - compute_nb_errors_aux(model_MLP_sharing_aux, test_input, test_target, mini_batch_size)

#Deep1
test_acc_deep_NOsharing_NOaux = 1 - compute_nb_errors_NOaux(model_deep_NOsharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_deep_sharing_NOaux = 1 - compute_nb_errors_NOaux(model_deep_sharing_NOaux, test_input, test_target, mini_batch_size)
test_acc_deep_NOsharing_aux = 1 - compute_nb_errors_aux(model_deep_NOsharing_aux, test_input, test_target, mini_batch_size)
test_acc_deep_sharing_aux = 1 - compute_nb_errors_aux(model_deep_sharing_aux, test_input, test_target, mini_batch_size)

#Deep2
test_acc_deep_NOsharing_NOaux2 = 1 - compute_nb_errors_NOaux(model_deep_NOsharing_NOaux2, test_input, test_target, mini_batch_size)
test_acc_deep_sharing_NOaux2 = 1 - compute_nb_errors_NOaux(model_deep_sharing_NOaux2, test_input, test_target, mini_batch_size)
test_acc_deep_NOsharing_aux2 = 1 - compute_nb_errors_aux(model_deep_NOsharing_aux2, test_input, test_target, mini_batch_size)
test_acc_deep_sharing_aux2 = 1 - compute_nb_errors_aux(model_deep_sharing_aux2, test_input, test_target, mini_batch_size)

print('')
print('Test performance shallow NOsharing NOaux = ', test_acc_shallow_NOsharing_NOaux)
print('Test performance shallow sharing Noaux = ', test_acc_shallow_sharing_NOaux)
print('Test performance shallow NOsharing aux = ', test_acc_shallow_NOsharing_aux)
print('Test performance shallow sharing aux = ', test_acc_shallow_sharing_aux)

print('Test performance MLP NOsharing NOaux = ', test_acc_MLP_NOsharing_NOaux)
print('Test performance MLP sharing Noaux = ', test_acc_MLP_sharing_NOaux)
print('Test performance MLP NOsharing aux = ', test_acc_MLP_NOsharing_aux)
print('Test performance MLP sharing aux = ', test_acc_MLP_sharing_aux)

print('Test performance Deep NOsharing NOaux = ', test_acc_deep_NOsharing_NOaux)
print('Test performance Deep sharing Noaux = ', test_acc_deep_sharing_NOaux)
print('Test performance Deep NOsharing aux = ', test_acc_deep_NOsharing_aux)
print('Test performance Deep sharing aux = ', test_acc_deep_sharing_aux)

print('Test performance Deep NOsharing NOaux 2 = ', test_acc_deep_NOsharing_NOaux2)
print('Test performance Deep sharing Noaux 2 = ', test_acc_deep_sharing_NOaux2)
print('Test performance Deep NOsharing aux 2 = ', test_acc_deep_NOsharing_aux2)
print('Test performance Deep sharing aux 2 = ', test_acc_deep_sharing_aux2)

Train performance shallow NOsharing NOaux =  1.0
Train performance shallow sharing Noaux =  1.0
Train performance shallow NOsharing aux =  1.0
Train performance shallow sharing aux =  0.999
Train performance MLP NOsharing NOaux =  1.0
Train performance MLP sharing Noaux =  1.0
Train performance MLP NOsharing aux =  1.0
Train performance MLP sharing aux =  1.0
Train performance Deep NOsharing NOaux =  0.979
Train performance Deep sharing Noaux =  0.998
Train performance Deep NOsharing aux =  0.995
Train performance Deep sharing aux =  0.999
Train performance Deep NOsharing NOaux 2 =  0.999
Train performance Deep sharing Noaux 2 =  0.999
Train performance Deep NOsharing aux 2 =  0.999
Train performance Deep sharing aux 2 =  0.999

Test performance shallow NOsharing NOaux =  0.7969999999999999
Test performance shallow sharing Noaux =  0.802
Test performance shallow NOsharing aux =  0.7989999999999999
Test performance shallow sharing aux =  0.808
Test performance MLP NOsharing NOaux =  0.8