
## <font color=red> You should not import any new libraries. Your code should run with python=3.x</font>

#### <font color=red>For lab assignment, you will work with two datasets. The trained weights need to be saved and shared with us in a folder called models with the name ./models/{dataset_name}_weights.pkl. Your predict function should load these weights, initialize the DNN and predict the labels.</font>

- Your solutions will be auto-graded. Hence we request you to follow the instructions.
- Modify the code only between 
```
## TODO
## END TODO
```
- In addition to above changes, you can play with arguments to the functions for generating plots
- We will run the auto grading scripts with private test data

In [15]:
import numpy as np
from matplotlib import pyplot as plt
import math
import pickle as pkl

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

Preprocessing

In [16]:
def preprocessing(X):
  """
  Implement Normalization for input image features

  Args:
  X : numpy array of shape (n_samples, 784)
   
  Returns:
  X_out: numpy array of shape (n_samples, 784) after normalization
  """
  X_out = None
  
  ## TODO
    
  X_out = (X - np.mean(X, axis=0))/(np.std(X, axis=0) + 1e-9)
  
  ## END TODO

  assert X_out.shape == X.shape

  return X_out

### Split data into train/val

In [17]:
def split_data(X, Y, train_ratio=0.8):
    '''
    Split data into train and validation sets
    The first floor(train_ratio*n_sample) samples form the train set
    and the remaining the test set

    Args:
    X - numpy array of shape (n_samples, n_features)
    Y - numpy array of shape (n_samples, 1)
    train_ratio - fraction of samples to be used as training data

    Returns:
    X_train, Y_train, X_val, Y_val
    '''
    # Try Normalization and scaling and store it in X_transformed
    X_transformed = X

    ## TODO
    
    X_transformed = preprocessing(X)
    
    ## END TODO

    assert X_transformed.shape == X.shape

    num_samples = len(X)
    indices = np.arange(num_samples)
    num_train_samples = math.floor(num_samples * train_ratio)
    train_indices = np.random.choice(indices, num_train_samples, replace=False)
    val_indices = list(set(indices) - set(train_indices))
    X_train, Y_train, X_val, Y_val = X_transformed[train_indices], Y[train_indices], X_transformed[val_indices], Y[val_indices]
  
    return X_train, Y_train, X_val, Y_val

#Flatten the input

In [18]:
class FlattenLayer:
    '''
    This class converts a multi-dimensional into 1-d vector
    '''
    def __init__(self, input_shape):
        
         '''
         Args:
          input_shape : Original shape, tuple of ints
         '''
         self.input_shape = input_shape

    def forward(self, input):
        '''
        Converts a multi-dimensional into 1-d vector
        Args:
          input : training data, numpy array of shape (n_samples , self.input_shape)

        Returns:
          input: training data, numpy array of shape (n_samples , -1)
        '''
        ## TODO
        #Modify the return statement to return flattened input
        self.input = input.flatten()
        self.input = self.input.reshape(1, self.input.shape[0])
        return self.input
        
        ## END TODO
        
    
    def backward(self, output_error, learning_rate):
        '''
        Converts back the passed array to original dimention 
        Args:
        output_error :  numpy array 
        learning_rate: float

        Returns:
        output_error: A reshaped numpy array to allow backward pass
        '''
        ## TODO
        #Modify the return statement to return reshaped array
        return output_error.reshape(self.input_shape)
        ## END TODO
        

#Fully Connected Layer

In [19]:
class FCLayer:
    '''
    Implements a fully connected layer  
    '''
    def __init__(self, input_size, output_size):
        '''
        Args:
         input_size : Input shape, int
         output_size: Output shape, int 
        '''
        self.input_size = input_size
        self.output_size = output_size
        ## TODO
        #initialize weights and biases
        self.weights = np.random.randn(self.input_size, self.output_size)*np.sqrt(2/(self.input_size + self.output_size))
        self.bias = np.random.randn(1, self.output_size)
        ## END TODO

    def forward(self, input):
        '''
        Performs a forward pass of a fully connected network
        Args:
          input : training data, numpy array of shape (n_samples , self.input_size)

        Returns:
           numpy array of shape (n_samples , self.output_size)
        '''
        ## TODO
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        self.inputs = input
        return np.matmul(input, self.weights) + self.bias
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO

        #Modify the return statement to return numpy array resulting from backward pass
        dw = np.matmul(self.inputs.T, output_error)
        db = np.sum(output_error, axis=0)
        self.weights -= learning_rate*dw
        self.bias -= learning_rate*db
        return np.matmul(output_error, self.weights.T)

        ## END TODO

In [20]:
class ActivationLayer:
    '''
    Implements a Activation layer which applies activation function on the inputs. 
    '''
    def __init__(self, activation, activation_prime):
         '''
          Args:
          activation : Name of the activation function (sigmoid,tanh or relu)
          activation_prime: Name of the corresponding function to be used during backpropagation (sigmoid_prime,tanh_prime or relu_prime)
         '''
         self.activation = activation
         self.activation_prime = activation_prime
    
    def forward(self, input):
        '''
        Applies the activation function 
        Args:
          input : numpy array on which activation function is to be applied

        Returns:
           numpy array output from the activation function
        '''
        ## TODO
        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        self.inputs = input
        return self.activation(input)
        ## END TODO
        

    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a fully connected network along with updating the parameter 
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO
        #Modify the return statement to return numpy array resulting from backward pass
        return np.multiply(output_error,self.activation_prime(self.inputs))
        ## END TODO

In [21]:

class SoftmaxLayer:
    '''
      Implements a Softmax layer which applies softmax function on the inputs. 
    '''
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        '''
        Applies the softmax function 
        Args:
          input : numpy array on which softmax function is to be applied

        Returns:
           numpy array output from the softmax function
        '''
        ## TODO

        #Modify the return statement to return numpy array of shape (n_samples , self.output_size)
        self.inputs = input
        self.outputs = np.exp(input)/(np.sum(np.exp(input))+1e-12)
        return self.outputs
        ## END TODO
        
    def backward(self, output_error, learning_rate):
        '''
        Performs a backward pass of a Softmax layer
        Args:
          output_error :  numpy array 
          learning_rate: float

        Returns:
          Numpy array resulting from the backward pass
        '''
        ## TODO

        #Modify the return statement to return numpy array resulting from backward pass
        #back propagation for softmax layer
        #jacobian matrix for softmax
        
        jacobian = np.diagflat(self.outputs) - np.matmul(self.outputs.T, self.outputs)
        res = np.matmul(jacobian, output_error.T)
        return res.T
        ## END TODO

In [22]:
def sigmoid(x):
    '''
    Sigmoid function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying simoid function
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return 1/(1 + np.exp(-x))
    ## END TODO

def sigmoid_prime(x):
    '''
     Implements derivative of Sigmoid function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Sigmoid function
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return sigmoid(x)*(1-sigmoid(x))
    ## END TODO

def tanh(x):
    '''
    Tanh function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying tanh function
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return np.tanh(x)
    ## END TODO

def tanh_prime(x):
    '''
     Implements derivative of Tanh function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of Tanh function
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return 1 - np.square(np.tanh(x))
    ## END TODO

def relu(x):
    '''
    ReLU function 
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying ReLU function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.maximum(0, x)
    ## END TODO

def relu_prime(x):
    '''
     Implements derivative of ReLU function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of ReLU function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return np.where(x > 0, 1, 0)
    ## END TODO

In [23]:
def mse(y_true, y_pred):
    '''
    MSE loss
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return np.mean(np.square(y_true - y_pred))
    ## END TODO

def mse_prime(y_true, y_pred):
    '''
    Implements derivative of MSE function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of MSE function
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    return 2*(y_true - y_pred)/y_true.shape[1]
    ## END TODO

def cross_entropy(y_true, y_pred):
    '''
    Cross entropy loss 
    Args:
        y_true :  Ground truth labels, numpy array 
        y_true :  Predicted labels, numpy array 
    Returns:
       loss : float
    '''
    ## TODO
    #Modify the return statement to return numpy array resulting from backward pass
    # return cross entropy loss
    return -1*np.mean(np.sum(np.multiply(y_true, np.log(y_pred)), axis=1))
    
    ## END TODO

def cross_entropy_prime(y_true, y_pred):
    '''
    Implements derivative of cross entropy function, for the backward pass
    Args:
        x :  numpy array 
    Returns:
        Numpy array after applying derivative of cross entropy function
    '''
    ## TODO

    #Modify the return statement to return numpy array resulting from backward pass
    return -y_true/y_pred
    ## END TODO

Fit function

In [24]:
def fit(X_train, Y_train,dataset_name):

    '''
    Create and trains a feedforward network

    Do not forget to save the final weights of the feed forward network to a file. Use these weights in the `predict` function 
    Args:
        X_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        Y_train -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.
        dataset_name -- name of the dataset (flowers or mnist)
    
    '''
     
    #Note that this just a template to help you create your own feed forward network 
    ## TODO

    #define your network
    #This network would work only for mnist

    network = [
        FlattenLayer(input_shape=(28, 28)),
        FCLayer(28*28, 12),
        ActivationLayer(tanh, tanh_prime),
        FCLayer(12, 10),
        SoftmaxLayer(10)
    ] # This creates feed forward 


    # Choose appropriate learning rate and no. of epoch
    epochs = 50
    learning_rate = 0.01

    #implement one hot encoding of Y_train

    #implement batch gradient descent
    for epoch in range(epochs):
        error = 0
        acc = 0
        for x, y_true in zip(X_train, Y_train):
            # forward
            y_onehot = np.eye(10)[y_true]
            output = x
            for layer in network:
                output = layer.forward(output)
            
            # error (display purpose only)
            error += cross_entropy(y_onehot, output)
            
            acc += np.sum(np.argmax(output, axis=1) == y_true)

            # backward
            output_error = cross_entropy_prime(y_onehot, output)
            for layer in reversed(network):
                output_error = layer.backward(output_error, learning_rate)
        
        error /= len(X_train)
        acc /= len(X_train)
        print('%d/%d, error=%f acc=%f' % (epoch + 1, epochs, error, acc))

    # Save the weights
    with open('./models/%s_weights.pkl' % dataset_name, 'wb') as f:
        pkl.dump(network, f)
    
    ## END TODO


Loading datasets

In [25]:
dataset = "mnist" 
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_mnist = pkl.load(file)
    print(f"train_x -- {train_mnist[0].shape}; train_y -- {train_mnist[1].shape}")

mx_tr, my_tr, mx_val, my_val = split_data(train_mnist[0], train_mnist[1])
fit(mx_tr, my_tr, 'mnist')

dataset = "flowers" # "mnist"/"flowers"
with open(f"./data/{dataset}_train.pkl", "rb") as file:
    train_flowers = pkl.load(file)
    print(f"train_x -- {train_flowers[0].shape}; train_y -- {train_flowers[1].shape}")

fx_tr, fy_tr, fx_val, fy_val = split_data(train_flowers[0], train_flowers[1]) 
fit(fx_tr, fy_tr, 'flowers')

train_x -- (60000, 28, 28); train_y -- (60000,)
train_x -- (2936, 2048); train_y -- (2936,)


In [26]:
def predict(X_test, dataset_name):
    """
    input.shape[0], input.shape[1]*input.shape[2]
    X_test -- np array of share (num_test, 2048) for flowers and (num_test, 28, 28) for mnist.

    

    This is the only function that we will call from the auto grader. 

    This function should only perform inference, please donot train your models here.
    
    Steps to be done here:
    1. Load your trained weights from ./models/{dataset_name}_weights.pkl
    2. Ensure that you read weights using only the libraries we have given above.
    3. Initialize your model with your trained weights
    4. Compute the predicted labels and return it

    Please provide us the complete code you used for training including any techniques
    like data augmentation etc. that you have tried out. 

    Return:
    Y_test - nparray of shape (num_test,)
    """
    Y_test = np.zeros(X_test.shape[0],)

    ## TODO

    #predict using your trained model
    #load your trained weights
    with open("./models/{}_weights.pkl".format(dataset_name), "rb") as file:
        network = pkl.load(file)
    
    preprocessing(X_test)
    error = 0
    for i in range(X_test.shape[0]):
        output = X_test[i]
        for layer in network:
            output = layer.forward(output)
        
        Y_test[i] = np.argmax(output, axis=1)
    
    ## END TODO
    assert Y_test.shape == (X_test.shape[0],) and type(Y_test) == type(X_test), "Check what you return"
    return Y_test

In [27]:
yt = predict(mx_tr, 'mnist')
acc = np.mean(yt == my_tr)
print(f"Accuracy on MNIST Training set: {acc}")

yt = predict(mx_val, 'mnist')
acc = np.mean(yt == my_val)
print(f"Accuracy on MNIST Validation set: {acc}")

yt = predict(fx_tr, 'flowers')
acc = np.mean(yt == fy_tr)
print(f"Accuracy on FLOWERS Training set: {acc}")

yt = predict(fx_val, 'flowers')
acc = np.mean(yt == fy_val)
print(f"Accuracy on FLOWERS Validation set: {acc}")

Accuracy on MNIST Training set: 0.9425208333333334
Accuracy on MNIST Validation set: 0.9394166666666667
Accuracy on FLOWERS Training set: 0.9510221465076661
Accuracy on FLOWERS Validation set: 0.9472789115646258
