In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
import random
from sklearn.model_selection import ParameterGrid
import pandas as pd


# Seed for reproducibility
torch.manual_seed(1234)

<torch._C.Generator at 0x7f84c0155810>

In [None]:
def weights_init(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0.0, 1e-3)
        m.bias.data.fill_(0.)

def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


#--------------------------------
# Device configuration
#--------------------------------

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: %s'%device)

#--------------------------------
# Hyper-parameters
#--------------------------------

In [None]:
input_size = 32 * 32 * 3
hidden_size = [50]
num_classes = 10
num_epochs = 10
batch_size = 200
learning_rate = 1e-3
learning_rate_decay = 0.95
reg=0.001
num_training= 49000
num_validation =1000
train = True

#-------------------------------------------------
# Load the CIFAR-10 dataset
#-------------------------------------------------

In [None]:
norm_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                     ])
cifar_dataset = torchvision.datasets.CIFAR10(root='datasets/',
                                           train=True,
                                           transform=norm_transform,
                                           download=False)

test_dataset = torchvision.datasets.CIFAR10(root='datasets/',
                                          train=False,
                                          transform=norm_transform
                                          )

#-------------------------------------------------
# Prepare the training and validation splits
#-------------------------------------------------

In [None]:
mask = list(range(num_training))
train_dataset = torch.utils.data.Subset(cifar_dataset, mask)
mask = list(range(num_training, num_training + num_validation))
val_dataset = torch.utils.data.Subset(cifar_dataset, mask)

#-------------------------------------------------
# Data loader
#-------------------------------------------------

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

#======================================================================================
# Q4: Implementing multi-layer perceptron in PyTorch
#======================================================================================

So far we have implemented a two-layer network using numpy by explicitly writing down the forward computation and deriving and implementing the equations for backward computation. This process can be tedious to extend to large network architectures

Popular deep-learning libraries like PyTorch and Tensorflow allow us to quickly implement complicated neural network architectures. They provide pre-defined layers which can be used as building blocks to define our network. They also enable automatic-differentiation, which allows us to define only the forward pass and let the libraries perform back-propagation using automatic differentiation.

In this question we will implement a multi-layer perceptron using the PyTorch library.  Please complete the code for the MultiLayerPerceptron, training and evaluating the model. Once you can train the two layer model, experiment with adding more layers and report your observations
#--------------------------------------------------------------------------------------


#-------------------------------------------------
# Fully connected neural network with one hidden layer
#-------------------------------------------------

In [None]:
class MultiLayerPerceptron(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes, activation_function):
        super(MultiLayerPerceptron, self).__init__()
        #################################################################################
        # TODO: Initialize the modules required to implement the mlp with the layer     #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        # Make use of linear and relu layers from the torch.nn module                   #
        #################################################################################
        
        layers = [] #Use the layers list to store a variable number of layers
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        
        if len(hidden_layers) == 0:
            raise ValueError("ERROR:  Check the number of layers")
        
        elif len(hidden_layers) == 1:
            layers.append(nn.Linear(input_size, hidden_layers[0]))
            layers.append(activation_function)
            layers.append(nn.Linear(hidden_layers[-1], num_classes))
        
        else:
            for i in range(len(hidden_layers)):
                if i == 0:
                    layers.append(nn.Linear(input_size, hidden_layers[0]))
                    layers.append(activation_function)
                elif i +1 == len(hidden_layers):
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                    layers.append(activation_function)
                    layers.append(nn.Linear(hidden_layers[i], num_classes))
                else:
                    layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
                    layers.append(activation_function)

        
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        # Enter the layers into nn.Sequential, so the model may "see" them
        # Note the use of * in front of layers
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        #################################################################################
        # TODO: Implement the forward pass computations                                 #
        # Note that you do not need to use the softmax operation at the end.            #
        # Softmax is only required for the loss computation and the criterion used below#
        # nn.CrossEntropyLoss() already integrates the softmax and the log loss together#
        #################################################################################
        
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        out = self.layers(x)

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        return out

In [None]:
functions = [nn.Sigmoid(),nn.ReLU()]
learning_rates = [1e-3, 1e-2]
numbers = [500, 300, 250, 100, 50, 25]
num_layers = [numbers[:i] for i in range(1, len(numbers)+1)]
Epochs = [10]

hyper_parameters = {
    'number_layers' : num_layers,
    'activation_funct' : functions,
    'epochs' : Epochs,
    'learning_R' : learning_rates
 }

grid = ParameterGrid(hyper_parameters)


Results = []

count = 0

print(len(grid))

In [None]:
functions = [nn.Sigmoid(),nn.ReLU()]
learning_rates = [1e-3, 1e-2]
numbers = [500, 300, 250, 100, 50, 25]
num_layers = [numbers[:i] for i in range(1, len(numbers)+1)]
Epochs = [10, 20]

hyper_parameters = {
    'number_layers' : num_layers,
    'activation_funct' : functions,
    'epochs' : Epochs,
    'learning_R' : learning_rates
 }

grid = ParameterGrid(hyper_parameters)


Results = []

count = 0

for params in tqdm(grid):
    num_epochs = params['epochs']
    hidden_size = params['number_layers']
    act_func = params['activation_funct']
    learning_rate = params['learning_R']
    
    
    model = MultiLayerPerceptron(input_size, hidden_size, num_classes, act_func).to(device)
    
    print()
    
    print('Layers: {}, learning_rate: {}, Activation Function: {}'
                       .format(len(params['number_layers'])+1, learning_rate,params['activation_funct']))

    print()

    if train:
        model.apply(weights_init)
        model.train() #set dropout and batch normalization layers to training mode
        
        final_accuracy = 0
        
        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)

        # Train the model
        lr = learning_rate
        total_step = len(train_loader)
        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):
                # Move tensors to the configured device
                images = images.to(device)
                labels = labels.to(device)
                #################################################################################
                # TODO: Implement the training code                                             #
                # 1. Pass the images to the model                                               #
                # 2. Compute the loss using the output and the labels.                          #
                # 3. Compute gradients and update the model using the optimizer                 #
                # Use examples in https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
                #################################################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

                # load images
                images = images.view(images.size(0), -1)
                predicted_lables = model(images)

                # compute the loss
                loss = criterion(predicted_lables, labels)

                # Compute gradients and update the model using the optimizer
                optimizer.zero_grad() # Prevent gradients from accumulating
                loss.backward()
                optimizer.step()

                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                if (i+1) % 100 == 0:
                    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

            # Code to update the lr
            lr *= learning_rate_decay
            update_lr(optimizer, lr)
            with torch.no_grad():
                correct = 0
                total = 0
                for images, labels in val_loader:
                    images = images.to(device)
                    labels = labels.to(device)
                    ####################################################
                    # TODO: Implement the evaluation code              #
                    # 1. Pass the images to the model                  #
                    # 2. Get the most confident predicted class        #
                    ####################################################
                    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

                    # Pass the images to the model       
                    images = images.view(images.size(0), -1)

                    # Get the most confident predicted class 
                    predicted = torch.argmax(model(images), dim=1)
                    # y_pred = model(images)
                    # predicted = torch.max(y_pred, 1).indices

                    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                print('Validataion accuracy is: {} %'.format(100 * correct / total))

        ##################################################################################
        # TODO: Now that you can train a simple two-layer MLP using above code, you can  #
        # easily experiment with adding more layers and different layer configurations   #
        # and let the pytorch library handle computing the gradients                     #
        #                                                                                #
        # Experiment with different number of layers (at least from 2 to 5 layers) and   #
        # record the final validation accuracies Report your observations on how adding  #
        # more layers to the MLP affects its behavior. Try to improve the model          #
        # configuration using the validation performance as the guidance. You can        #
        # experiment with different activation layers available in torch.nn, adding      #
        # dropout layers, if you are interested. Use the best model on the validation    #
        # set, to evaluate the performance on the test set once and report it            #
        ##################################################################################

        # Save the model checkpoint
        torch.save(model.state_dict(), 'model.ckpt')
        
        Accuracy = 0

        # In test phase, we don't need to compute gradients (for memory efficiency)
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = images.to(device)
                labels = labels.to(device)
                ####################################################
                # TODO: Implement the evaluation code              #
                # 1. Pass the images to the model                  #
                # 2. Get the most confident predicted class        #
                ####################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

                # Pass the images to the model       
                images = images.view(images.size(0), -1)

                # Get the most confident predicted class 
                predicted = torch.argmax(model(images), dim=1)

                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                if total == 1000:
                    break
            Accuracy = 100 * correct / total

    final_accuracy = Accuracy
    
    
    # Save result in a dataframe
    Results.append({'Layers': len(params['number_layers'])+1, 'learning_rate': learning_rate,
                    'Activation Function': str(params['activation_funct']),'val_acc': str(final_accuracy)+'%'})
    out_rs = pd.DataFrame(Results)
    out_rs.to_csv('grid_search.csv', index=False)
    
    torch.save(model.state_dict(), '/Users/yves/Desktop/Data_Science/second_year/AML/Assignment2/models/model'+str(count)+'.ckpt')
    
    count  += 1
    print()
print('done')

In [31]:
df.to_csv('grid_search.csv', index=False)