In [1]:
import import_ipynb
import dlc_practical_prologue as prologue
import matplotlib.pyplot as plt


import torch
from torch import nn
from torch.nn import init
from torch.utils.data import TensorDataset, DataLoader


def standardize(x, mu, std):
    '''
    Standardize data to zero-mean and unit variance.
    Parameters
    -------
    x
        Data to be standardized
    mu
        Mean
    std
        Standard deviation
    Returns
    -------
    tensor
        Standardized data
    '''
    return x.sub_(mu).div_(std)


def load_data(N=1000, batch_size=50, seed=42):
    '''
    Load training and test data from MNIST
    Data: pairs of MNIST images
    Label: 1 if first digit is lesser or equal than the second, 0 otherwise
    Parameters
    -------
    N
        Number of examples to generate for each set
    batch_size
        Batch size (for loading datasets into DataLoader type)
    seed
        Random seed (for reproducibility)
    Returns
    -------
    train_loader
        DataLoader containing training examples, binary labels and true image classes
    test_loader
        DataLoader containing test examples, binary labels and true image classes
    '''
    
    # Generate pairs
    trainX, trainY, trainC, testX, testY, testC = prologue.generate_pair_sets(N)
    
    # Retrieve mean and standard deviation of training set
    mu, std = trainX.mean(), trainX.std()
    
    # Standardize data
    trainX, testX = [standardize(x, mu, std) for x in [trainX, testX]]

    # Assemble all data
    train_data = TensorDataset(trainX, trainY, trainC)
    test_data = TensorDataset(testX, testY, testC)
    
    # Load data in DataLoader and shuffle training set
    torch.manual_seed(seed) # For reproducibility
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)
    return train_loader, test_loader

importing Jupyter notebook from dlc_practical_prologue.ipynb


In [2]:
train_loader, test_loader = load_data()

In [4]:
import torch
from torch import nn


class CNN(nn.Module):
    '''
    Baseline Convolutional Neural Network
    Attributes
    -------
    conv1, conv2
        First, second convolutional layers
    fc1, fc2, fc3
        First, second and third fully-connected layers
    classifier
        Final fully-connected layer, rendering the binary prediction
    drop
        Dropout layer, applied after each linear layer before classification (excepting fc3)
    pool
        Max-Pooling layer
    relu
        ReLU activation
    sigmoid
        Sigmoid activation (for classification layer)
    '''
    
    def __init__(self, verbose=True):
        '''
        Initialize the CNN
        Parameters
        -------
        verbose
            If true, prints number of parameters in the model
        '''
        
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(2, 24, kernel_size=3)
        self.conv2 = nn.Conv2d(24, 49, kernel_size=3)
        
        # fully connected layers
        self.fc1 = nn.Linear(196, 128)
        self.fc2 = nn.Linear(128, 20)
        self.fc3 = nn.Linear(20, 10)
        self.classifier = nn.Linear(10, 1)
        
        # Regularizers
        self.drop = nn.Dropout(0.2)
        self.pool = nn.MaxPool2d(2,2)
        
        # Activation functions
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        if verbose:
            print(f'{self._get_name()} - Number of parameters: {self.count_params()}')

    def count_params(self):
        '''
        Counts number of parameters in model
        '''
        return sum(p.numel() for p in self.parameters())
    
    def forward(self, x):
        '''
        Forward pass
        Parameters
        -------
        x
            Input to the model, dimension: Nx2x14x14
        Returns
        -------
        tensor
            Binary classification output, dimension: Nx1
        tensor
            Auxiliary classification (None as the network is not Siamese)
            (Siamese networks cannot benefit from auxiliary training)
        '''
        
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        
        x = self.relu(self.fc1(x.flatten(start_dim=1)))
        x = self.drop(x)
        
        x = self.relu(self.fc2(x))
        x = self.drop(x)
        
        x = self.relu(self.fc3(x.flatten(start_dim=1)))
        
        x = self.sigmoid(self.classifier(x))
        return x.squeeze()

In [25]:
class SiameseCNN(nn.Module):
    '''
    Siamese Convolutional Neural Network
    Attributes
    -------
    conv1, conv2
        First, second convolutional layers
    fc1, fc2, fc3
        First, second and third fully-connected layers
    classifier
        Final fully-connected layer, rendering the binary prediction
    drop
        Dropout layer, applied after each linear layer before classification (excepting fc3)
    pool
        Max-Pooling layer
    relu
        ReLU activation
    sigmoid
        Sigmoid activation (for classification layer)
    '''
    
    def __init__(self, verbose=True):
        '''
        Initialize the S-CNN
        Parameters
        -------
        verbose
            If true, prints number of parameters in the model
        '''
        super(SiameseCNN, self).__init__()
        
        # Siamese block
        self.conv1 = nn.Conv2d(1, 24, kernel_size=3)
        self.conv2 = nn.Conv2d(24, 49, kernel_size=3)
        self.fc1 = nn.Linear(196, 128)
        self.fc2 = nn.Linear(128, 10)
        
        # Decision block
        self.fc3 = nn.Linear(20, 10)
        self.classifier = nn.Linear(10, 1)
        
        # Regularizers: Dropout, Max-Pooling
        self.drop = nn.Dropout(0.2)
        self.pool = nn.MaxPool2d(2,2)
        
        # Activation functions
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        if verbose:
            print(f'{self._get_name()} - Number of parameters: {self.count_params()}  \n')

    def count_params(self):
        '''
        Counts number of parameters in model
        '''
        return sum(p.numel() for p in self.parameters())
    
    def siamese_block(self, x):
        '''
        Pass a single image through Siamese block
        Parameters
        -------
        x
            Single image input, dimension: Nx14x14
        Returns
        -------
        x
            Siamese block output, dimension: Nx1x10
        '''
        x = self.pool(self.relu(self.conv1(x.unsqueeze(1))))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.fc1(x.flatten(start_dim=1)))
        x = self.drop(x)
        x = self.relu(self.fc2(x))
        return x
    
    def forward(self, x):
        '''
        Forward pass
        Parameters
        -------
        x
            Input to the model, dimension: Nx2x14x14
        Returns
        -------
        x
            Binary classification output, dimension: Nx1
        aux
            Auxiliary classification output, dimension: Nx2x10
        '''
        
        x1, x2 = x.unbind(1)
        
        x1, x2 = self.siamese_block(x1), self.siamese_block(x2)
        
        # Collect auxiliary classification output
        # Dimension of aux: Nx2x10
        aux = torch.stack([x1, x2], dim=1)
        
        # Concatenate outputs from Siamese blocks
        # Dimension of x: Nx20
        x = torch.cat([x1, x2], dim=1)
        
        x = self.drop(x)
        x = self.relu(self.fc3(x.flatten(start_dim=1)))
        x = self.sigmoid(self.classifier(x))
        return x.squeeze()

In [26]:
model = SiameseCNN()
alpha=0
eta= 1e-3 
decay= 1e-2
aux = None

SiameseCNN - Number of parameters: 37600  



In [27]:
import torch
import time
from torch import nn, optim


def train(net, train_loader, alpha, eta, decay,
          n_epochs=25, verbose=False, plotting=False):
    '''
    Train a neural network
    Parameters
    -------
    model
        The neural network
    train_loader
        The training set (DataLoader)
    alpha
        Auxiliary loss coefficient for Siamese networks (0, 0.5 or 1s)
        Not taken into account for non-Siamese networks
    eta
        Learning rate
    decay
        L2-regularization coefficient
    n_epochs
        Number of epochs
    verbose
        If true, print loss at each epoch
    plotting
        If true, collects training accuracy at each epoch for future plotting
    Returns
    -------
    tr_losses (tensor)
        Training losses collected at each epoch
    tr_accuracies (tensor)
        Training accuracies collected at each epoch
        If plotting is False, tr_accuracies will only consist of zeros.
    '''
    
    aux_crit = nn.CrossEntropyLoss()
    binary_crit = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=eta, weight_decay=decay)

    tr_losses = torch.zeros(n_epochs)
    tr_accuracies = torch.zeros(n_epochs)

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training mode
        net.train()

        for (trainX, trainY, trainC) in train_loader:
            # Forward pass
            out = net(trainX)

            # Binary classification loss
            binary_loss = binary_crit(out, trainY.float())

            # Compute auxiliary loss for Siamese netwoks
            if aux is not None:
                # Separate outputs and target classes for each image
                aux1, aux2 = aux.unbind(1)
                c1, c2 = trainC.unbind(1)

                # Auxiliary loss
                aux_loss = aux_crit(aux1, c1) + aux_crit(aux2, c2)
            else:
                # Total loss
                aux_loss = 0
                
            # Total loss = Binary loss + alpha*auxiliary loss
            total_loss = binary_loss + alpha*aux_loss
            tr_loss += total_loss.item()

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        if plotting:
            # Collect accuracy data for later plotting
            tr_accuracies[e] = compute_accuracy(net, train_loader)

        # Collect loss data
        tr_losses[e] = tr_loss

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f, Auxiliary loss: %.3f' %
                  (e+1, n_epochs, binary_loss, aux_loss))

    return tr_losses, tr_accuracies


In [28]:
tr_losses, tr_accuracies = train(model, train_loader, alpha, eta, decay)

In [29]:
def compute_accuracy(net, data_loader):
    '''
    Compute accuracy of the network on a dataset.
    Accuracy = (1/N) * sum(predicted_label_i == true_label_i), i = 1, ..., N
    Parameters
    -------
    net
        The model/network.
    data_loader
        The training/test set.
    Returns
    -------
    tensor
         The accuracy of the model.
    '''
    
    acc = 0.
    total = 0
    net.eval()
    with torch.no_grad():
        for (X, y, _) in data_loader:
            out = net(X)
            acc += ((out > 0.5) == y).float().sum().item()
            total += len(y)
    return acc/total

In [30]:

import torch
from torch import nn
import matplotlib.pyplot as plt
import time
import warnings


def run_train(model, alpha, eta, decay, plotting=False, verbose=True, seed=14):
    '''
    Run a single training.
    Parameters
    -------
    model
        The neural network
    alpha
        The auxiliary loss coefficient
    eta
        Learning rate for training
    decay
        L2-regularization coefficient
    plotting
        If true, plots training loss and training accuracy at each epoch
    verbose
        If true, gives additional information during training (loss at each epoch)
    seed
        Random seed (for reproducibility)
    '''

    # Generate data
    torch.manual_seed(seed) # For reproducbility
    train_loader, test_loader = load_data(seed=seed)

    # Apply training mode and weight initialization
    model.train()
    #model.apply(weight_initialization)

    # Train model
    start = time.time()
    tr_loss, tr_acc = train(model, train_loader, alpha=alpha,
                            eta=eta, decay=decay,
                            verbose=verbose, plotting=plotting)

    print('\n Training ended. Training time: %.2f s \n' % (time.time()-start))

    model.eval() # Disable dropout layers for testing
    final_train_accuracy = compute_accuracy(model, train_loader)
    final_test_accuracy = compute_accuracy(model, test_loader)

    # Visualize data if plotting
    if plotting:
        train_visualization(model, tr_loss, tr_acc, final_test_accuracy)

    print('Train accuracy: %.4f // Test accuracy: %.4f' %
         (final_train_accuracy, final_test_accuracy))

In [31]:
run_train(model, alpha, eta, decay)

Epoch 1/25, Binary loss: 0.250, Auxiliary loss: 0.000
Epoch 2/25, Binary loss: 0.321, Auxiliary loss: 0.000
Epoch 3/25, Binary loss: 0.336, Auxiliary loss: 0.000
Epoch 4/25, Binary loss: 0.271, Auxiliary loss: 0.000
Epoch 5/25, Binary loss: 0.340, Auxiliary loss: 0.000
Epoch 6/25, Binary loss: 0.233, Auxiliary loss: 0.000
Epoch 7/25, Binary loss: 0.289, Auxiliary loss: 0.000
Epoch 8/25, Binary loss: 0.192, Auxiliary loss: 0.000
Epoch 9/25, Binary loss: 0.193, Auxiliary loss: 0.000
Epoch 10/25, Binary loss: 0.189, Auxiliary loss: 0.000
Epoch 11/25, Binary loss: 0.106, Auxiliary loss: 0.000
Epoch 12/25, Binary loss: 0.241, Auxiliary loss: 0.000
Epoch 13/25, Binary loss: 0.260, Auxiliary loss: 0.000
Epoch 14/25, Binary loss: 0.116, Auxiliary loss: 0.000
Epoch 15/25, Binary loss: 0.108, Auxiliary loss: 0.000
Epoch 16/25, Binary loss: 0.153, Auxiliary loss: 0.000
Epoch 17/25, Binary loss: 0.124, Auxiliary loss: 0.000
Epoch 18/25, Binary loss: 0.066, Auxiliary loss: 0.000
Epoch 19/25, Binary