# Mini Project 1: MNSIT Pair Comparison

In [34]:
import torch
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
import dlc_practical_prologue as prologue
import pandas as pd
from IPython.display import display, Markdown


In [9]:
from torch import nn
import torch

class Print(torch.nn.Module):
    def forward(self, x):
        print(x.shape)
        return x

#method to flatten the images
class Flatten(torch.nn.Module):
    def forward(self, x):
        batch_size = x.shape[0]
        return torch.reshape(x, (batch_size, -1))
## Architectures: 

# The architecture represents the part of the model that recognizes the images.

# Deep

# Note: arch1,arch2,arch3 are functions that instanciate a new architecture not  architectures !
arch1 = lambda :  nn.Sequential(                     # input shape (100, 2, 14, 14)
            nn.Conv2d(
                in_channels=1,              # input height
                out_channels=16,            # n_filters
                kernel_size=3,              # filter size
                stride=1,                   # filter movement/step
                padding=1,                  # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1
            ),                              # output shape (16, 14, 14)
            nn.ReLU(),                      # activation
            nn.MaxPool2d(kernel_size=2),    # choose max value in 2x2 area, output shape (16, 7, 7)
            nn.Dropout2d(0.05),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 32, 5, 1, 4),     # output shape (32, 7, 7)
            nn.ReLU(),                      # activation
            nn.MaxPool2d(2),                # output shape (32, 7, 7)
            nn.Dropout2d(0.05),
            nn.BatchNorm2d(32),
            Flatten(),        
            nn.Linear(800,20),      # fully connected layer, output 10 classes
            nn.Dropout(0.1),
            nn.Linear(20, 10) ,             # fully connected layer, output 10 classes
            nn.BatchNorm1d(10),
            nn.Dropout(0.1),
            nn.ReLU())


# Fully connected

arch2 = lambda  : nn.Sequential(                      # input shape (1, 28, 28)
            Flatten(),
            nn.Linear(56, 128), 
            nn.ReLU(),
            nn.Linear(128, 128), 
            nn.ReLU(),
            nn.Linear(128, 56),
            nn.ReLU(),
            nn.Linear(56, 10),              
            nn.ReLU()
            )



# Deep with sigmoids

arch3 =  lambda  :  nn.Sequential(         # input shape (1, 28, 28)
            nn.Conv2d(
                in_channels=1,              # input height
                out_channels=16,            # n_filters
                kernel_size=2,              # filter size
                stride=1,                   # filter movement/step
                padding=4,                  # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1
            ),                              # output shape (16, 28, 28)
            nn.Sigmoid(),                   # activation
            nn.MaxPool2d(kernel_size=2),    # choose max value in 2x2 area, output shape (16, 14, 14)
            nn.Dropout2d(0.05),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 32, 5, 1, 4),     # output shape (32, 14, 14)
            nn.Sigmoid(),                   # activation
            nn.MaxPool2d(2),                # output shape (32, 7, 7)
            nn.Dropout2d(0.05),
            nn.BatchNorm2d(32),
            nn.Linear(32 * 7 * 7, 20),      # fully connected layer, output 10 classes
            nn.Dropout(0.1),
            nn.Linear(20, 10) ,             # fully connected layer, output 10 classes
            nn.BatchNorm1d(10),
            nn.Dropout(0.1),
            nn.ReLU())








class CNN(nn.Module):
    ''' To create this class use CNN(True, 'deep) for example.
    other options are False, 'deep with sigmoids' and 'fully connected'. '''
 
    def __init__(self, weight_sharing, architecture):
        super(CNN, self).__init__()
        self.weight_sharing = weight_sharing

        # select the proper architecture
        if architecture == 'deep':
            # arch_copy is used with no weight sharing only
            self.arch = arch1()
            self.arch_copy = arch1()
        if architecture == 'deep with sigmoids':
            self.arch = arch3()
            self.arch_copy = arch3()
        if architecture == 'fully connected':
            self.arch = arch2()
            self.arch_copy = arch2()

        self.fc = nn.Sequential(
            nn.Linear(20, 1) ,  # fully connected layer, output 10 classes
            nn.BatchNorm1d(1),
            nn.Dropout(0.05),
            nn.Sigmoid()
        )

    def forward(self, x):
        #first convolutional layer


        _x = torch.reshape(x[:,0,:,:], (-1, 1, 14, 14))
        _x1 = self.arch(_x)
        _x = torch.reshape(x[:,1,:,:], (-1, 1, 14, 14))

        # if there is no weight sharing use the arch_copy layers
        if self.weight_sharing: _x2 = self.arch(_x)
        else: _x2 = self.arch_copy(_x)

        #concatenate and retrun auxilary output
        _x = torch.cat((_x1, _x2), 1)   
        aux_out = (_x1, _x2)


        #fc layer to merge the two recognitions
        _x = self.fc(_x)
        
        # we print _x[:,0] because otherwise _x is of size (N,1) which is not usefull 
        # it should be of size (N)
        return aux_out, _x[:,0]






In [54]:
DATA=pd.DataFrame(columns = ['architecture', 
                             'training mode',
                             'weight sharing', 
                             'accuracy',
                             'digit recognition accuracy',
                             'model'])

for architecture in ['deep', 'fully connected', 'deep with sigmoids']:
    for weight_sharing in [True, False]:
        for training_mode in ['without auxiliar loss', 'with auxiliar loss']:
            
            model = CNN(weight_sharing, architecture)
            
            DATA = DATA.append({'architecture':architecture,
                               'training mode': training_mode,
                               'weight sharing': weight_sharing,
                               'accuracy': [],
                               'digit recognition accuracy':[],
                               'model':model},
                               ignore_index=True)


In [62]:
DATA

Unnamed: 0,architecture,training mode,weight sharing,accuracy,digit recognition accuracy,model
0,deep,without auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...
1,deep,with auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...
2,deep,without auxiliar loss,False,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...
3,deep,with auxiliar loss,False,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...
4,fully connected,without auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Flatten(...
5,fully connected,with auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Flatten(...
6,fully connected,without auxiliar loss,False,[],[],CNN(\n (arch): Sequential(\n (0): Flatten(...
7,fully connected,with auxiliar loss,False,[],[],CNN(\n (arch): Sequential(\n (0): Flatten(...
8,deep with sigmoids,without auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...
9,deep with sigmoids,with auxiliar loss,True,[],[],CNN(\n (arch): Sequential(\n (0): Conv2d(1...


In [58]:
# Load the data
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(1000)

In [59]:
# un hot-encode train and test classes to use the function nn.CrossEntropyLoss() 
train_classes = train_classes.argmax(dim = 2)
test_classes = test_classes.argmax(dim = 2)

RuntimeError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [63]:
test_classes

tensor([[3, 2],
        [0, 2],
        [7, 9],
        ...,
        [9, 3],
        [4, 0],
        [7, 9]])

In [17]:
# train and test classes to categorical
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return torch.Tensor(np.eye(num_classes, dtype='uint8')[y]).type(torch.LongTensor)
train_classes, test_classes = to_categorical(train_classes, 10), to_categorical(test_classes, 10)

In [18]:
# function to compute classes accuracy
def accuracy_classes(predicted, target):
    '''
    computes the accuracy of the predicted classes in %
    '''

    predicted_1 = predicted[0]
    predicted_2 = predicted[1]
    predicted_1 = predicted_1.argmax(dim=1)
    predicted_2 = predicted_2.argmax(dim=1)
    target_1=target[:,0]
    target_2=target[:,1]
    return ( 100 -( ( (target_1 != predicted_1) | (target_2 != predicted_2 ) ).sum() ).item() /target_1.shape[0] * 100 )



In [19]:
def accuracy_comparison(predicted, target):
    '''computes accuracy for output'''
    return( (torch.abs(predicted - target) < 0.5).sum().float() / target.shape[0] * 100)



In [68]:
loss_function = lambda x, y : x+y

data_row = DATA.iloc[0]

# dictionnary to store the values
d= {'comparison loss':[],
           'recognition loss':[],
           'comparison acc':[],
           'recognition acc':[]}


# Training of the model
eta= 0.0001
mini_batch_size = 100
epochs = 5

criterion_aux = nn.CrossEntropyLoss() # criterion for digit recognition
criterion_main = torch.nn.BCELoss() # criterion for digit comparison

# use adam optimizer for SGD
optimizer = torch.optim.Adam(model.parameters(), lr=eta)

# compute minibatch test target
minibatch_test_target = test_target.narrow(0, 0, mini_batch_size)
minibatch_test_input = test_input.narrow(0, 0, mini_batch_size)


# print total number of epochs
print('epoch: (../ '+str(epochs)+' )')

# necessary for loss_function
aux_validation_acc_item = 0

for e in range(0, epochs):
    #print current epoch
    print(str(e), sep=' ', end=' ', flush=True)

    # We do this with mini-batches
    for b in range(0, train_input.size(0), mini_batch_size):

        mini_batch_input = train_input.narrow(0, b, mini_batch_size)
        mini_batch_target = train_target.narrow(0, b, mini_batch_size) #classification labels Nx1
        mini_batch_target_aux = train_classes.narrow(0, b, mini_batch_size) #binary 'what number are these images' Nx20


        #output_aux is the Nx20 output of the second fc layer corresponding to what image pairs were predicted
        #output is the Nx1 output corresponding to: if image 0 or image 1 is bigger
        output_aux, output = model(mini_batch_input)  
        loss_aux = criterion_aux(output_aux[0], mini_batch_target_aux[:,0]) +\
        criterion_aux(output_aux[1], mini_batch_target_aux[:,1])
        loss_main = criterion_main(output, mini_batch_target.float())

        optimizer.zero_grad()
        loss = loss_function( loss_main, loss_aux )
        loss.backward()
        optimizer.step()



    # compute validation loss and accuracy

    with torch.no_grad():


        #compute outputs for test data
        validation_output_aux, validation_output = model(test_input)

        # compute loss for test data
        main_validation_loss = criterion_main( validation_output, test_target.float()) 
        aux_validation_loss = criterion_aux( validation_output_aux[0], test_classes[:,0].long()) + criterion_aux(validation_output_aux[1], test_classes[:,0].long())

        # compute accuracy for test and train data
        main_validation_acc_item = accuracy_comparison( validation_output, test_target.float())
        aux_validation_acc_item = accuracy_classes(validation_output_aux, test_classes)


        # append to arrays
        # save results in d
      

        d['comparison loss'].append(main_validation_loss.item())
        d['recognition loss'].append(aux_validation_loss.item())
        d['comparison acc'].append(main_validation_acc_item)
        d['recognition acc'].append(aux_validation_acc_item)

display(Markdown('''### Model:
 - '''+data_row['architecture']+'''
 - '''+data_row['training mode']+'''
 - '''+('with weight sharing' if data_row['weight sharing'] else 'without weight sharing')+'''
'''))


        
# plotting accuracy
plt.figure(figsize=(12,7))


description =  'model of type '+data_row['architecture']+',\n with lr= '+str(eta)+\
(', with weight sharing, ' if data_row['weight sharing'] else ', without weight sharing, ') +\
    'and loss function '+ data_row['training mode'] 

plt.title('Accuracy curves of the '+ description)

plt.plot(d['comparison acc'], label='comparison acc')
plt.plot(d['recognition acc'], label='recognition acc')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('accuracy  in %')
plt.show()

# plotting loss
plt.figure(figsize=(12,7))

plt.title('Loss curves of the '+description)


plt.plot(d['comparison loss'], label='comparison loss')
plt.plot(d['recognition loss'], label='recognition loss')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.yscale('log')
plt.show()



epoch: (../ 5 )
0 

RuntimeError: size mismatch, m1: [22400 x 7], m2: [1568 x 20] at /opt/conda/conda-bld/pytorch_1549628766161/work/aten/src/TH/generic/THTensorMath.cpp:940