# CNN Siamese network

In [1]:
# Important imports
from helper_functions import *
from torch import nn
from torch.autograd import Variable
from torch.nn import functional as F

In [2]:
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(N)



## Preprocessing data 

We split the input images into two different images with which we are going to feed our siamese Network.

In [3]:
train_input1, train_input2, test_input1, test_input2, train_classes1,\
train_classes2, test_classes1, test_classes2 = split_img_data(train_input, test_input,\
                                                              train_classes, test_classes)

ValueError: not enough values to unpack (expected 8, got 4)

We do some preprocessing :

- We multiply our inputs by 0.9 in order to prevent from the vanishing gradients with tanh.
- We convert the classes in to one hot encoded labels for the training
- we normalize our input data

In [12]:
train_input1 = 0.9*train_input1
train_input2 = 0.9*train_input2

test_input1 = 0.9*test_input1
test_input2 = 0.9*test_input2

#train_classes1 = convert_to_one_hot_labels(train_input1, train_classes1)
#train_classes2 = convert_to_one_hot_labels(train_input2, train_classes2)

#test_classes1 = convert_to_one_hot_labels(test_input1, test_classes1)
#test_classes2 = convert_to_one_hot_labels(test_input2, test_classes2)

train_input1, test_input1 = normalize(train_input1, test_input1)
train_input2, test_input2 = normalize(train_input2, test_input2)

We reshape the data by adding the channel dimension in order to feed the convolutionnal layers

In [13]:
train_input1 = torch.unsqueeze(train_input1, 1)
print("Dimension of train_input1: {}".format(torch.unsqueeze(train_input1, 1).shape))

test_input1 = torch.unsqueeze(test_input1, 1)
print("Dimension of test_input1: {}".format(torch.unsqueeze(test_input1, 1).shape))

train_input2 = torch.unsqueeze(train_input2, 1)
print("Dimension of train_input2: {}".format(torch.unsqueeze(train_input2, 1).shape))

test_input2 = torch.unsqueeze(test_input2, 1)
print("Dimension of test_input2: {}".format(torch.unsqueeze(test_input2, 1).shape))

Dimension of train_input1: torch.Size([1000, 1, 1, 14, 14])
Dimension of test_input1: torch.Size([1000, 1, 1, 14, 14])
Dimension of train_input2: torch.Size([1000, 1, 1, 14, 14])
Dimension of test_input2: torch.Size([1000, 1, 1, 14, 14])


## Building Siamese model

We build the model and some functions that will help us train it and compute its errors.

In [14]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 100
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_bn = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 32, kernel_size=2)
        self.conv2_bn = nn.BatchNorm2d(32)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.conv3_bn = nn.BatchNorm2d(64)
        
        self.drop1 = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(64, nb_hidden)
        self.drop2 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(nb_hidden, 10)
        self.drop3 = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(nb_hidden*2, 2)
        self.fc4 = nn.Sigmoid()

    def forward(self, x, y):
        x = F.relu(F.max_pool2d(self.conv1_bn(self.conv1(x)), kernel_size=2))
        y = F.relu(F.max_pool2d(self.conv1_bn(self.conv1(y)), kernel_size=2))
        
        x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), kernel_size=2))
        y = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(y)), kernel_size=2))
        
        x = F.relu(self.conv3_bn(self.conv3(x)))
        y = F.relu(self.conv3_bn(self.conv3(y)))
        
        x = self.drop1(x)
        y = self.drop1(y)
        
        x = F.relu(self.fc1(x.view(-1, 64)))
        y = F.relu(self.fc1(y.view(-1, 64)))
        
        x = self.drop2(x)
        y = self.drop2(y)
        
        
        binary_target = torch.cat([x, y], 1)


        x = self.fc2(x)
        y = self.fc2(y)
        
        #
        x = self.fc4(x)
        y = self.fc4(y)
        binary_target = self.drop3(binary_target)
        binary_target = self.fc3(binary_target)
        binary_target = self.fc4(binary_target)
        return x, y, binary_target
    

def train_model(model, train_input1, train_input2, train_target1, train_target2, train_target3, mini_batch_size, digit_scalar, binary_target_scalar):
    criterion = nn.CrossEntropyLoss() # Crossentropy MSELoss 
    eta = 1e-1
    optimizer = torch.optim.SGD(model.parameters(), lr=eta, momentum=0) # check the lectures

    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output_x, output_y, output_binary_target = model(train_input1.narrow(0, b, mini_batch_size),\
                                                             train_input2.narrow(0, b, mini_batch_size))
            
            loss_x = criterion(output_x, train_target1.narrow(0, b, mini_batch_size).long())
            loss_y = criterion(output_y, train_target2.narrow(0, b, mini_batch_size).long())
            loss_binary_target = criterion(output_binary_target, train_target3.narrow(0, b, mini_batch_size).long())
            loss = digit_scalar*(loss_x + loss_y) + binary_target_scalar*loss_binary_target
            model.zero_grad()
            loss.backward()
            sum_loss = sum_loss + loss.item()
            optimizer.step()


        
def compute_nb_errors(prediction, target):
    errors = 0
    for (a,b) in zip(prediction, target):
        if a.float() != b.float():
            errors+=1
    return errors/len(prediction)*100
            

## Training of the model

In [15]:
list_weights = [0.1, 0.01, 0.5, 1]
dicto = {}
for a in list_weights:
    for b in list_weights:
        model = Net2()
        for k in range(15):
            train_model(model, train_input1, train_input2, train_classes1, train_classes2, train_target, mini_batch_size, a, b)
            model.eval()
            _, _, prediction = model(test_input1, test_input2)
            #print(compute_nb_errors(prediction.max(1)[1], test_target.float()))
            if k==9:
                print(compute_nb_errors(prediction.max(1)[1], test_target.float()))
                dicto[('digit_scalar: '+str(a), 'target_scalar: '+str(b))] = compute_nb_errors(prediction.max(1)[1], test_target.float())
            
            
            
            
    
    

14.499999999999998
18.7
17.0
15.1
19.2
28.1
19.5
18.6
13.4
16.2
11.700000000000001
12.2
12.1
21.3
10.7
12.1


In [16]:
dicto

{('digit_scalar: 0.1', 'target_scalar: 0.1'): 14.499999999999998,
 ('digit_scalar: 0.1', 'target_scalar: 0.01'): 18.7,
 ('digit_scalar: 0.1', 'target_scalar: 0.5'): 17.0,
 ('digit_scalar: 0.1', 'target_scalar: 1'): 15.1,
 ('digit_scalar: 0.01', 'target_scalar: 0.1'): 19.2,
 ('digit_scalar: 0.01', 'target_scalar: 0.01'): 28.1,
 ('digit_scalar: 0.01', 'target_scalar: 0.5'): 19.5,
 ('digit_scalar: 0.01', 'target_scalar: 1'): 18.6,
 ('digit_scalar: 0.5', 'target_scalar: 0.1'): 13.4,
 ('digit_scalar: 0.5', 'target_scalar: 0.01'): 16.2,
 ('digit_scalar: 0.5', 'target_scalar: 0.5'): 11.700000000000001,
 ('digit_scalar: 0.5', 'target_scalar: 1'): 12.2,
 ('digit_scalar: 1', 'target_scalar: 0.1'): 12.1,
 ('digit_scalar: 1', 'target_scalar: 0.01'): 21.3,
 ('digit_scalar: 1', 'target_scalar: 0.5'): 10.7,
 ('digit_scalar: 1', 'target_scalar: 1'): 12.1}

{('digit_scalar: 0.1', 'target_scalar: 0.1'): 21.0,
 ('digit_scalar: 0.1', 'target_scalar: 0.01'): 19.3,
 ('digit_scalar: 0.1', 'target_scalar: 0.5'): 17.7,
 ('digit_scalar: 0.1', 'target_scalar: 1'): 17.2,
 ('digit_scalar: 0.1', 'target_scalar: 2'): 16.7,
 ('digit_scalar: 0.1', 'target_scalar: 3'): 16.900000000000002,
 ('digit_scalar: 0.01', 'target_scalar: 0.1'): 15.4,
 ('digit_scalar: 0.01', 'target_scalar: 0.01'): 15.1,
 ('digit_scalar: 0.01', 'target_scalar: 0.5'): 17.5,
 ('digit_scalar: 0.01', 'target_scalar: 1'): 17.0,
 ('digit_scalar: 0.01', 'target_scalar: 2'): 18.4,
 ('digit_scalar: 0.01', 'target_scalar: 3'): 16.900000000000002,
 ('digit_scalar: 0.5', 'target_scalar: 0.1'): 15.299999999999999,
 ('digit_scalar: 0.5', 'target_scalar: 0.01'): 15.0,
 ('digit_scalar: 0.5', 'target_scalar: 0.5'): 16.5,
 ('digit_scalar: 0.5', 'target_scalar: 1'): 14.099999999999998,
 ('digit_scalar: 0.5', 'target_scalar: 2'): 15.7,
 ('digit_scalar: 0.5', 'target_scalar: 3'): 17.0,
 ('digit_scalar: 1', 'target_scalar: 0.1'): 16.3,
 ('digit_scalar: 1', 'target_scalar: 0.01'): 15.4,
 ('digit_scalar: 1', 'target_scalar: 0.5'): 17.5,
 ('digit_scalar: 1', 'target_scalar: 1'): 15.8,
 ('digit_scalar: 1', 'target_scalar: 2'): 16.5,
 ('digit_scalar: 1', 'target_scalar: 3'): 13.5,
 ('digit_scalar: 2', 'target_scalar: 0.1'): 15.8,
 ('digit_scalar: 2', 'target_scalar: 0.01'): 14.299999999999999,
 ('digit_scalar: 2', 'target_scalar: 0.5'): 16.2,
 ('digit_scalar: 2', 'target_scalar: 1'): 14.299999999999999,
 ('digit_scalar: 2', 'target_scalar: 2'): 13.4,
 ('digit_scalar: 2', 'target_scalar: 3'): 17.7,
 ('digit_scalar: 3', 'target_scalar: 0.1'): 15.9,
 ('digit_scalar: 3', 'target_scalar: 0.01'): 15.8,
 ('digit_scalar: 3', 'target_scalar: 0.5'): 14.7,
 ('digit_scalar: 3', 'target_scalar: 1'): 15.2,
 ('digit_scalar: 3', 'target_scalar: 2'): 16.6,
 ('digit_scalar: 3', 'target_scalar: 3'): 15.5}