In [1]:
import torch
import matplotlib.pyplot as plt
import dlc_practical_prologue as prologue
from torch import nn
from torch.nn import functional as F

## Generating the dataset

In [2]:
nbr_pairs = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(nbr_pairs)

print('train_input size =', train_input.size())
print('train_target size =', train_target.size()) #The boolean telling if the two pairs are the same or not 
print('train_classes size =', train_classes.size())
print('test_input size =', test_input.size())
print('test_target size =', test_target.size())
print('test_classes size =', test_classes.size())

train_input size = torch.Size([1000, 2, 14, 14])
train_target size = torch.Size([1000])
train_classes size = torch.Size([1000, 2])
test_input size = torch.Size([1000, 2, 14, 14])
test_target size = torch.Size([1000])
test_classes size = torch.Size([1000, 2])


## Utilities

### Train Model

In [3]:
def train_model_NOaux(model, train_input, train_target, nb_epochs, batch_size, criterion, eta): 
    optimizer = torch.optim.Adam(model.parameters(), lr = eta)
    for e in range(nb_epochs):
        if (e % 10 == 0 and e > 0):
            eta = eta/10
            optimizer = torch.optim.Adam(model.parameters(), lr = eta)
        for step_ in range(0,train_input.size(0),batch_size):                              
            output = model(train_input[step_:step_+batch_size])
            loss = criterion(output, train_target[step_:step_+batch_size])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def train_model_aux(model, train_input, train_target, nb_epochs, batch_size, criterion, eta, lambda_):   
    optimizer = torch.optim.Adam(model.parameters(), lr = eta)
    for e in range(nb_epochs):
        if (e % 10 == 0 and e > 0):
            eta = eta/10
            optimizer = torch.optim.Adam(model.parameters(), lr = eta)
        for step_ in range(0,train_input.size(0),batch_size):                              
            output_target, output_im1, output_im2 = model(train_input[step_:step_+batch_size])
            loss_target = criterion(output_target, train_target[step_:step_+batch_size])
            loss_im1 = criterion(output_im1, train_classes[step_:step_+batch_size,0])
            loss_im2 = criterion(output_im2, train_classes[step_:step_+batch_size,1])
            loss = loss_target + lambda_*(loss_im1 + loss_im2)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

### Number of error

In [4]:
def compute_nb_errors_NOaux(model, data_input, data_target, mini_batch_size): 
    nb_errors = 0
    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1
    return nb_errors

def compute_nb_errors_aux(model, data_input, data_target, mini_batch_size): 
    nb_errors = 0
    for b in range(0, data_input.size(0), mini_batch_size):
        output,_,_ = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_errors = nb_errors + 1
    return nb_errors

## Architectures

### Shallow

In [5]:
class Shallow_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x = torch.cat([x_1, x_2],1)
        x = self.fc2(x)       
        return x
    
class Shallow_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            fc_image.append(x1)
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc2(x)       
        return x
    
class Shallow_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        
        aux1 = F.softmax(self.fc_aux1(x1),1)
        aux2 = F.softmax(self.fc_aux2(x2),1)
        
        x = torch.cat([x1, x2],1)
        x = self.fc2(x)       
        return x, aux1, aux2
    
class Shallow_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Shallow_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc2 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            fc_image.append(x1)
        
        aux1 = F.softmax(self.fc_aux1(fc_image[0]),1)
        aux2 = F.softmax(self.fc_aux2(fc_image[1]),1)
        
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc2(x)       
        return x, aux1, aux2      

### MLP

In [6]:
class MLP_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        self.fc2_1 = nn.Linear(hidden,hidden)
        self.fc2_2 = nn.Linear(hidden,hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x1_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x2_1 = self.act_fun(self.fc2_1(x1_1))
        x2_2 = self.act_fun(self.fc2_2(x1_2))
        x = torch.cat([x2_1, x2_2],1)
        x = self.fc3(x)       
        return x

class MLP_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        self.fc2 = nn.Linear(hidden,hidden)
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            x2 = self.act_fun(self.fc2(x1))
            fc_image.append(x2)
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc3(x)       
        return x
    
class MLP_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1_1 = nn.Linear(196, hidden)
        self.fc1_2 = nn.Linear(196, hidden)
        self.fc2_1 = nn.Linear(hidden,hidden)
        self.fc2_2 = nn.Linear(hidden,hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.fc1_1(x[:,0,:,:].view(-1,196)))
        x1_2 = self.act_fun(self.fc1_2(x[:,1,:,:].view(-1,196)))
        x2_1 = self.act_fun(self.fc2_1(x1_1))
        x2_2 = self.act_fun(self.fc2_2(x1_2))
        
        aux1 = F.softmax(self.fc_aux1(x2_1),1)
        aux2 = F.softmax(self.fc_aux2(x2_2),1)
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.fc3(x)       
        return x, aux1, aux2
    
class MLP_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(MLP_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.fc1 = nn.Linear(196, hidden)
        self.fc2 = nn.Linear(hidden,hidden)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(hidden, 10)
        self.fc_aux2 = nn.Linear(hidden, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc3 = nn.Linear(hidden*2,2)

    def forward(self, x):
        fc_image = []
        for image in range(2):
            x1 = self.act_fun(self.fc1(x[:,image,:,:].view(-1,196)))
            x2 = self.act_fun(self.fc2(x1))
            fc_image.append(x2)
            
        aux1 = F.softmax(self.fc_aux1(fc_image[0]),1)
        aux2 = F.softmax(self.fc_aux2(fc_image[1]),1)
        
        x = torch.cat([fc_image[0],fc_image[1]],1)
        x = self.fc3(x)       
        return x, aux1, aux2

### Deep 1

In [7]:
class Deep_NOsharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_2 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(F.max_pool2d(self.conv1_1(x[:,0,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_1 = self.act_fun(F.max_pool2d(self.conv2_1(x1_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(F.max_pool2d(self.conv1_2(x[:,1,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_2 = self.act_fun(F.max_pool2d(self.conv2_2(x1_2), kernel_size=2, stride=2))
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x

class Deep_sharing_NOaux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_NOaux, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            first_conv = self.act_fun(F.max_pool2d(self.conv1(x[:,image,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
            conv_images.append(self.act_fun(F.max_pool2d(self.conv2(first_conv), kernel_size=2, stride=2)))
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x
    
class Deep_NOsharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_aux, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_2 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(256, 10)
        self.fc_aux2 = nn.Linear(256, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(F.max_pool2d(self.conv1_1(x[:,0,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_1 = self.act_fun(F.max_pool2d(self.conv2_1(x1_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(F.max_pool2d(self.conv1_2(x[:,1,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
        x2_2 = self.act_fun(F.max_pool2d(self.conv2_2(x1_2), kernel_size=2, stride=2))

        aux1 = F.softmax(self.fc_aux1(x2_1.view(-1,256)),1)
        aux2 = F.softmax(self.fc_aux2(x2_2.view(-1,256)),1)
        
        x = torch.cat([x2_1, x2_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x, aux1, aux2
    
class Deep_sharing_aux(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_aux, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(256, 10)
        self.fc_aux2 = nn.Linear(256, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(512, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            first_conv = self.act_fun(F.max_pool2d(self.conv1(x[:,image,:,:].view(100,1,14,14)), kernel_size=2, stride=2))
            conv_images.append(self.act_fun(F.max_pool2d(self.conv2(first_conv), kernel_size=2, stride=2)))
            
        aux1 = F.softmax(self.fc_aux1(conv_images[0].view(-1,256)),1)
        aux2 = F.softmax(self.fc_aux2(conv_images[1].view(-1,256)),1)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 512)))
        x = self.fc2(x)
        return x, aux1, aux2

### Deep 2

In [8]:
class Deep_NOsharing_NOaux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_NOaux2, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv1_2 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2_1 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv2_2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4_1 = nn.Conv2d(64, 128, kernel_size=2)
        self.conv4_2 = nn.Conv2d(64, 128, kernel_size=2)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.conv1_1(x[:,0,:,:].view(100,1,14,14)))
        x2_1 = self.act_fun(self.conv2_1(x1_1))
        x3_1 = self.act_fun(self.conv3_2(x2_1))
        x4_1 = self.act_fun(F.max_pool2d(self.conv4_1(x3_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(self.conv1_2(x[:,1,:,:].view(100,1,14,14)))
        x2_2 = self.act_fun(self.conv2_2(x1_2))
        x3_2 = self.act_fun(self.conv3_2(x2_2))
        x4_2 = self.act_fun(F.max_pool2d(self.conv4_2(x3_2), kernel_size=2, stride=2))

        x = torch.cat([x4_1, x4_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x

class Deep_sharing_NOaux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_NOaux2, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            x1 = self.act_fun(self.conv1(x[:,image,:,:].view(100,1,14,14)))
            x2 = self.act_fun(self.conv2(x1))
            x3 = self.act_fun(self.conv3(x2))
            x4 = self.act_fun(F.max_pool2d(self.conv4(x3), kernel_size=2, stride=2))
            conv_images.append(x4)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x
    
class Deep_NOsharing_aux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_NOsharing_aux2, self).__init__()
        self.act_fun = act_fun
        self.conv1_1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv1_2 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2_1 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv2_2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3_1 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4_1 = nn.Conv2d(64, 128, kernel_size=2)
        self.conv4_2 = nn.Conv2d(64, 128, kernel_size=2)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(512, 10)
        self.fc_aux2 = nn.Linear(512, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        x1_1 = self.act_fun(self.conv1_1(x[:,0,:,:].view(100,1,14,14)))
        x2_1 = self.act_fun(self.conv2_1(x1_1))
        x3_1 = self.act_fun(self.conv3_2(x2_1))
        x4_1 = self.act_fun(F.max_pool2d(self.conv4_1(x3_1), kernel_size=2, stride=2))
        
        x1_2 = self.act_fun(self.conv1_2(x[:,1,:,:].view(100,1,14,14)))
        x2_2 = self.act_fun(self.conv2_2(x1_2))
        x3_2 = self.act_fun(self.conv3_2(x2_2))
        x4_2 = self.act_fun(F.max_pool2d(self.conv4_2(x3_2), kernel_size=2, stride=2))

        aux1 = F.softmax(self.fc_aux1(x4_1.view(-1,512)),1)
        aux2 = F.softmax(self.fc_aux2(x4_2.view(-1,512)),1)
        
        x = torch.cat([x4_1, x4_2],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x, aux1, aux2
    
class Deep_sharing_aux2(nn.Module):
    def __init__(self, hidden, act_fun):
        super(Deep_sharing_aux2, self).__init__()
        self.act_fun = act_fun
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        
        # For classification with classes
        self.fc_aux1 = nn.Linear(512, 10)
        self.fc_aux2 = nn.Linear(512, 10)
        
        # After concatenation of the features from image 1 and image 2
        self.fc1 = nn.Linear(1024, hidden)
        self.fc2 = nn.Linear(hidden,2)

    def forward(self, x):
        conv_images = []
        for image in range(2):
            x1 = self.act_fun(self.conv1(x[:,image,:,:].view(100,1,14,14)))
            x2 = self.act_fun(self.conv2(x1))
            x3 = self.act_fun(self.conv3(x2))
            x4 = self.act_fun(F.max_pool2d(self.conv4(x3), kernel_size=2, stride=2))
            conv_images.append(x4)
            
        aux1 = F.softmax(self.fc_aux1(conv_images[0].view(-1,512)),1)
        aux2 = F.softmax(self.fc_aux2(conv_images[1].view(-1,512)),1)
        
        x = torch.cat([conv_images[0], conv_images[1]],1)
        x = self.act_fun(self.fc1(x.view(-1, 1024)))
        x = self.fc2(x)
        return x, aux1, aux2

## Models generation

In [9]:
def model_training(input_, target, hidden_units, eta, lambda_, model_type = 'Shallow', sub_model = 'NOsharing_NOaux', 
                   nb_epochs = 25, mini_batch_size = 100, criterion = nn.CrossEntropyLoss()):
    if(model_type == 'Shallow'):
        if(sub_model == 'NOsharing_NOaux'):
            model = Shallow_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_NOaux'):
            model = Shallow_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'NOsharing_aux'):
            model = Shallow_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_aux'):
            model = Shallow_sharing_aux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'MLP'):
        if(sub_model == 'NOsharing_NOaux'):
            model = MLP_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_NOaux'):
            model = MLP_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'NOsharing_aux'):
            model = MLP_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_aux'):
            model = MLP_sharing_aux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'Deep1'):
        if(sub_model == 'NOsharing_NOaux'):
            model = Deep_NOsharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_NOaux'):
            model = Deep_sharing_NOaux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'NOsharing_aux'):
            model = Deep_NOsharing_aux(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_aux'):
            model = Deep_sharing_aux(hidden = hidden_units, act_fun = F.relu)
    elif(model_type == 'Deep2'):
        if(sub_model == 'NOsharing_NOaux'):
            model = Deep_NOsharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_NOaux'):
            model = Deep_sharing_NOaux2(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'NOsharing_aux'):
            model = Deep_NOsharing_aux2(hidden = hidden_units, act_fun = F.relu)
        elif(sub_model == 'sharing_aux'):
            model = Deep_sharing_aux2(hidden = hidden_units, act_fun = F.relu)
                
    if(sub_model == 'NOsharing_aux' or sub_model == 'sharing_aux'): 
        train_model_aux(model, input_[:700], target[:700], nb_epochs, mini_batch_size, criterion, eta, lambda_)
        accuracy = 1 - compute_nb_errors_aux(model, input_[700:], target[700:], mini_batch_size)/len(target[700:])
    else: 
        train_model_NOaux(model, input_[:700], target[:700], nb_epochs, mini_batch_size, criterion, eta)
        accuracy = 1 - compute_nb_errors_NOaux(model, input_[700:], target[700:], mini_batch_size)/len(target[700:])
    
    return accuracy, model

In [10]:
def create_dict():
    results = {'Shallow':{'NOsharing_NOaux':{}, 'sharing_NOaux':{}, 'NOsharing_aux':{}, 'sharing_aux':{}},
               'MLP':{'NOsharing_NOaux':{}, 'sharing_NOaux':{}, 'NOsharing_aux':{}, 'sharing_aux':{}},  
               'Deep1':{'NOsharing_NOaux':{}, 'sharing_NOaux':{}, 'NOsharing_aux':{}, 'sharing_aux':{}},
               'Deep2':{'NOsharing_NOaux':{}, 'sharing_NOaux':{}, 'NOsharing_aux':{}, 'sharing_aux':{}}  
              }
    return results

def fill_results(results, type_model, sub_model, acc, eta, hidden, lambda_):
    results[type_model][sub_model]['Acc'] = acc
    results[type_model][sub_model]['eta'] = eta
    results[type_model][sub_model]['hidden'] = hidden
    if(sub_model == 'NOsharing_aux' or sub_model == 'sharing_aux'): 
        results[type_model][sub_model]['lambda'] = lambda_
    return results

def grid_search_(lambdas, etas, hidden_units, train_input, train_target, test_input, test_target):
    type_models = ['Shallow', 'MLP', 'Deep1', 'Deep2']
    sub_models = ['NOsharing_NOaux', 'sharing_NOaux', 'NOsharing_aux', 'sharing_aux']
    acc_test = torch.zeros(len(type_models),len(sub_models))
    results = create_dict()
    
    i = 0

    for t, type_model in enumerate(type_models):
        for s, sub_model in enumerate(sub_models):
            i += 1
            print('Getting hyper-parameters for architecture', i, '/ 16...')
            performances = torch.zeros(len(lambdas),len(hidden_units),len(etas))
            for l, lambda_ in enumerate(lambdas):
                for h, hidden in enumerate(hidden_units):
                    for e, eta in enumerate(etas):
                        acc, _ = model_training(train_input, train_target, hidden.item(), eta.item(), 
                                              lambda_.item(), model_type = type_model, 
                                              sub_model = sub_model)
                        performances[l,h,e] = acc
            best_performance = torch.max(performances)
            best_idx = (performances == best_performance).nonzero();
            
            best_eta = etas[best_idx[0,2]].item()
            best_hidden = hidden_units[best_idx[0,1]].item()
            best_lambda = lambdas[best_idx[0,0]].item()
                
            results = fill_results(results, type_model, sub_model, best_performance.item(), 
                                       best_eta, best_hidden, best_lambda)
    return results

In [11]:
lambdas = torch.tensor([0.25, 0.5, 0.75, 1])
etas = torch.tensor([0.1, 0.01, 0.001])
hidden_units = torch.tensor([50, 100, 200, 300])
HP = grid_search_(lambdas, etas, hidden_units, train_input, train_target, test_input, test_target)

f = open("HP.txt","w")
f.write( str(HP) )
f.close()

print(results)


Getting hyper-parameters for architecture 1 / 16...
Getting hyper-parameters for architecture 2 / 16...
Getting hyper-parameters for architecture 3 / 16...
Getting hyper-parameters for architecture 4 / 16...
Getting hyper-parameters for architecture 5 / 16...
Getting hyper-parameters for architecture 6 / 16...
Getting hyper-parameters for architecture 7 / 16...
Getting hyper-parameters for architecture 8 / 16...
Getting hyper-parameters for architecture 9 / 16...
Getting hyper-parameters for architecture 10 / 16...
Getting hyper-parameters for architecture 11 / 16...
Getting hyper-parameters for architecture 12 / 16...
Getting hyper-parameters for architecture 13 / 16...
Getting hyper-parameters for architecture 14 / 16...
Getting hyper-parameters for architecture 15 / 16...
Getting hyper-parameters for architecture 16 / 16...


In [12]:
type_models = ['Shallow', 'MLP', 'Deep1', 'Deep2']
sub_models = ['NOsharing_NOaux', 'sharing_NOaux', 'NOsharing_aux', 'sharing_aux']
n_iter = 10
all_values = torch.zeros(n_iter,len(type_models),len(sub_models))

for j in range(n_iter):
    print('Iteration', j+1, '/ 10...')
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(nbr_pairs)
    acc_test = torch.zeros(len(type_models),len(sub_models))
    i = 0
    for t, type_model in enumerate(type_models):
        for s, sub_model in enumerate(sub_models):
            i += 1
            print('    Architecture', i, '/ 16...')
            best_hidden = HP[type_model][sub_model]['hidden']
            best_eta = HP[type_model][sub_model]['eta']
            if(sub_model == 'NOsharing_aux' or sub_model == 'sharing_aux'): 
                best_lambda = HP[type_model][sub_model]['lambda']
            else: best_lambda = 0
            
            _, model = model_training(train_input, train_target, best_hidden, best_eta, best_lambda, 
                                      model_type = type_model, sub_model = sub_model)
                
            if(sub_model == 'NOsharing_aux' or sub_model == 'sharing_aux'): 
                acc_test[t,s] = 1 - compute_nb_errors_aux(model, test_input, test_target, 100)/len(test_target)
            else: 
                acc_test[t,s] = 1 - compute_nb_errors_NOaux(model, test_input, test_target, 100)/len(test_target)
    all_values[j] = acc_test

Iteration 1 / 10...
    Architecture 1 / 16...
    Architecture 2 / 16...
    Architecture 3 / 16...
    Architecture 4 / 16...
    Architecture 5 / 16...
    Architecture 6 / 16...
    Architecture 7 / 16...
    Architecture 8 / 16...
    Architecture 9 / 16...
    Architecture 10 / 16...
    Architecture 11 / 16...
    Architecture 12 / 16...
    Architecture 13 / 16...
    Architecture 14 / 16...
    Architecture 15 / 16...
    Architecture 16 / 16...
Iteration 2 / 10...
    Architecture 1 / 16...
    Architecture 2 / 16...
    Architecture 3 / 16...
    Architecture 4 / 16...
    Architecture 5 / 16...
    Architecture 6 / 16...
    Architecture 7 / 16...
    Architecture 8 / 16...
    Architecture 9 / 16...
    Architecture 10 / 16...
    Architecture 11 / 16...
    Architecture 12 / 16...
    Architecture 13 / 16...
    Architecture 14 / 16...
    Architecture 15 / 16...
    Architecture 16 / 16...
Iteration 3 / 10...
    Architecture 1 / 16...
    Architecture 2 / 16...
    Arch

In [2]:
torch.save(all_values, 'results')
print('Mean :')
print(torch.mean(all_values, 0))
print('Std :')
print(torch.std(all_values, 0))

Mean :


NameError: name 'all_values' is not defined