## MA23M002 - ABHINAV T K <br> CS6910 - Assignment 1

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
#from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [2]:
# Load the Fashion MNIST dataset
(x, y), (x_test, y_test) = fashion_mnist.load_data()

# Change the shape of the data to (60000, 784)
x = x.reshape(x.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Normalize the data
x = x/255.0
x_test = x_test/255.0

# Splitting the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=42)

In [3]:
# Dataset analysis and data preprocessing

# no. of classes
c = len(np.unique(y_train))
print("Total no. of classes = ", c)

# Input features
inp_features = x_train.shape[1]
print("Number of input features = ", inp_features)

# training samples size
m = x_train.shape[0]
print("Training samples = ", m)

# validation samples size
m_val = x_val.shape[0]
print("Validation samples = ", m_val)

# test samples size
m_test = x_test.shape[0]
print("Test samples = ", m_test)

# Class names - the index of the class names corresponds to the class label
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


Total no. of classes =  10
Number of input features =  784
Training samples =  54000
Validation samples =  6000
Test samples =  10000


In [4]:
# One hot encoding y
y_train_encoded = np.eye(np.max(y_train) + 1)[y_train].T
y_val_encoded = np.eye(np.max(y_val) + 1)[y_val].T
y_test_encoded = np.eye(np.max(y_test) + 1)[y_test].T


# Neural Network Architecture

In [5]:
# Defining activation functions and their derivatives

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_grad(x):
    return sigmoid(x) * (1-sigmoid(x))

def relu(x):
    return np.maximum(0,x)

def relu_grad(x):
    return 1*(x>0)

def tanh(x):
    return np.tanh(x)

def tanh_grad(x):
    return (1 - (np.tanh(x)**2))

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def softmax_grad(x):
    return softmax(x) * (1-softmax(x))


In [6]:
# Initializing parameters W and b

def initialize_parameters(nn_layers):
  '''
  nn_layers: a list containing the number of neurons of each layer - where each layer no. is the index of the list
  '''
  np.random.seed(32)
  parameters = {}                         # dictionary to hold weights and biases of each layer
  for i in range(1, len(nn_layers)):
    parameters["W"+str(i)] = np.random.rand(nn_layers[i], nn_layers[i-1])*0.01
    parameters["b"+str(i)] = np.random.rand(nn_layers[i], 1)*0.01
  return parameters

In [7]:
# Forward propagation
def forward_propagation(x, nn_layers, parameters):

  l = len(nn_layers)  # 5 # No. of neural network layers, including input and output layers

  a = {}              # dictionary to hold hidden layer (pre-activation)
  h = {}              # dictionary to hold hidden layer (activation)

  h[0] = x.T  # input layer
  #print(h[0].shape)
  for i in range(1, l-1):
    W = parameters["W"+str(i)]    # weights of hidden layer i
    b = parameters["b"+str(i)]    # bias of hidden layer i

    a[i] = np.matmul(W,h[i-1]) + b

    h[i] = sigmoid(a[i])          # activation for hidden layers

  # output layer
  W = parameters["W"+str(l-1)]    # weights of hidden layer i
  b = parameters["b"+str(l-1)]    # bias of hidden layer i
  a[l-1] = np.matmul(W,h[l-2]) + b          # activation function for output layer

  y_hat = softmax(a[l-1])
  return y_hat, h, a                # Returns y_hat, h, a

In [8]:
# Backpropagation
def back_propagation(y_hat, y, h, a, nn_layers, parameters, batch_size):
  l = len(nn_layers)
  grads = {}            # dictionary to store gradient of loss function wrt parameters and hidden layer neurons

  # Computing gradient wrt output layer
  grads["grada"+str(l-1)] = y_hat - y

  for i in range(l-1,0,-1):
    grads["gradW" + str(i)] = (1/batch_size)*np.dot(grads["grada" + str(i)], h[i-1].T)
    grads["gradb" + str(i)] = (1/batch_size)*np.sum(grads["grada" + str(i)], axis=1, keepdims=True)
    if i>1:
      grads["grada"+str(i-1)] = np.matmul(parameters["W" + str(i)].T, grads["grada" + str(i)]) * sigmoid_grad(a[i-1])   # Computing gradients wrt hidden layers
  return grads

In [9]:
def update_parameters(parameters, grads , lr):
    l = len(parameters) // 2
    for i in range(1, l + 1):
        parameters["W" + str(i)] = parameters["W" + str(i)] - lr * grads["gradW" + str(i)]
        parameters["b" + str(i)] = parameters["b" + str(i)] - lr * grads["gradb" + str(i)]
    return parameters

In [10]:
def compute_loss(y, y_hat, batch_size, parameters):
  loss = (1/batch_size)*(-1.0 * np.sum(np.multiply(y, np.log(y_hat))))
  return loss

In [11]:
# training model
hidden_layers = [128, 64, 32]
nn_layers = [inp_features] + hidden_layers + [c]
parameters = initialize_parameters(nn_layers)
iter = 0
epochs = 10
lr = 0.01 # learning rate
batch_size = 64

while iter < epochs:
  iter += 1
  for i in range(0, x_train.shape[0], batch_size):
    batch_count = batch_size
    if i + batch_size > x_train.shape[0]: # the last mini-batch might contain fewer than "batch_size" examples
      batch_count = x_train.shape[0] - i + 1

    y_hat, h, a = forward_propagation(x_train[i:i+batch_size,:], nn_layers, parameters)
    grads = back_propagation(y_hat, y_train_encoded[:,i:i+batch_size], h, a, nn_layers, parameters, batch_size)
    parameters = update_parameters(parameters, grads, lr)

  # Mean loss for the full training set
  y_hat, _, _ = forward_propagation(x_train, nn_layers, parameters)
  cost = compute_loss(y_train_encoded, y_hat, 54000, parameters)
  y_hat = np.argmax(y_hat, axis=0)
  accuracy = np.mean(y_hat == y_train_encoded)
  print("Epochs = ", iter, "\tTraining cost:", cost, "\tAccuracy:", accuracy)

Epochs =  1 	Training cost: 2.302679361987962 	Accuracy: 0.1
Epochs =  2 	Training cost: 2.3026778773319814 	Accuracy: 0.1
Epochs =  3 	Training cost: 2.3026764068354377 	Accuracy: 0.1
Epochs =  4 	Training cost: 2.302674973154227 	Accuracy: 0.1
Epochs =  5 	Training cost: 2.302673574290012 	Accuracy: 0.1
Epochs =  6 	Training cost: 2.302672208268539 	Accuracy: 0.1
Epochs =  7 	Training cost: 2.302670873135556 	Accuracy: 0.1
Epochs =  8 	Training cost: 2.302669566949644 	Accuracy: 0.1
Epochs =  9 	Training cost: 2.302668287774903 	Accuracy: 0.1
Epochs =  10 	Training cost: 2.302667033673437 	Accuracy: 0.1
