Question 2 (10 Marks)

Implement a feedforward neural network which takes images from the fashion-mnist data as input and outputs a probability distribution over the 10 classes.

Your code should be flexible such that it is easy to change the number of hidden layers and the number of neurons in each hidden layer.

In [98]:
#Importing all the libraries that will be used
from keras.datasets import fashion_mnist
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import random




In [99]:

#Load the fashion MNIST data


(X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()
#Normalizing the data
X_train_com = X_train/255.0
X_test = X_test/255.0
np.random.seed(137)
encoder = OneHotEncoder()

#Splitting to get 10% data as validation set
#X_train, X_val, Y_train, Y_val = train_test_split(X_train_com, Y_train_com, test_size=0.1, random_state=137)

Y_train_unencoded = Y_train
#One hot encoding of the class labels



Y_train = encoder.fit_transform(np.expand_dims(Y_train,1)).toarray()






In [100]:
class FeedForwardNN():
  """
  This class implements the forward propagation functionality of a feedforward neural network. It supports flexible hidden layer sizes and four different activation functions.

  Attributes:
      hidden_layer_sizes: A list containing the sizes of the hidden layers (the length of the list indicates the number of hidden layers in the network).
  """
  def __init__(self, hidden_layer_sizes):
    self.weights = {}
    self.biases = {}
    self.hidden_layer_sizes = hidden_layer_sizes
    self.input_layer_size=0
    self.output_layer_size=1
    # The array of layer sizes will be initialized after obtaining the input and output layer sizes
    self.layer_sizes = []

    self.A = {}
    self.H = {}



  def initialize_weights(self):
    """
    Initializes the weights between the layers of the network. Weights are randomly initialized.
    """
    self.layer_sizes = [self.input_layer_size] + self.hidden_layer_sizes + [self.output_layer_size]
    len_hl=len(self.hidden_layer_sizes)
    weight_counts = len_hl +1
    # np.random.seed(137)
    # np.random.RandomState(137)
    i=0
    while i<(weight_counts):
      # Initialize weights for each layer randomly as a matrix of size (previous layer size) * (next layer size)
      self.weights[i+1] = np.random.randn(self.layer_sizes[i], self.layer_sizes[i+1])
     # Initialize biases for each layer to 0 as a matrix of size 1 * (next layer size)
      self.biases[i+1] = np.zeros((1, self.layer_sizes[i+1]))
      i+=1

  def calculate_activation(self, name, X):

    if name=="ReLU":
      s=self.ReLU(X)
      return s

    if name=="sigmoid":
      s=self.sigmoid(X)
      return s

    if name=="identity":
      s=self.identity(X)
      return s


    if name=="tanh":
      s=self.tanh(X)
      return s





  #calculate the ReLU function
  def ReLU(self,X):
    return X * (X > 0)
  # calculating the softmax function

  #function used
  def u1():
    return 1
  def softmax(self, X):
    X_max = np.max(X)
    exponentials = np.exp(X - X_max)
    return exponentials / np.sum(exponentials)




  # Utility function to calculate the sigmoid function
  def sigmoid(self, X):
    clipped_X = np.clip(X, -500, 500)
    return 1.0 / (1.0 + np.exp(-clipped_X))

  def identity(self, X):
  # Utility to calculate identity function
    return X

  #calculating the cross entropy funtion
  def cross_entropy(self,Y_true,Y_pred):
    loss=np.multiply(Y_pred,Y_true)
    loss=loss[loss!=0]
    loss=-np.log(loss)
    loss=np.mean(loss)
    return loss



  #calculating the tanh funtion
  def tanh(self,X):
    return np.tanh(X)



  """
   Performing forward propagation in the data X.
  """
  def forward_prop(self, activation, X):


    self.H = {}
    #set i=0 for the while loop
    i=0
    self.A = {}
    #Make a row vector intialize the output from input layer as H[0] into a single row
    self.H[0] = X.reshape(1,-1)

    #formula for calculation
    #formula applied:a(x) = W_x*h(x-1) + b and h(a(x)) = activation(a(x)) for hidden layer

    while i<(len(self.hidden_layer_sizes)):
      mat = np.matmul(self.H[i], self.weights[i+1])
      self.A[i+1] =  mat + self.biases[i+1]
      cal=self.calculate_activation(activation, self.A[i+1])
      self.H[i+1] = cal
      i+=1

    # calculating hadamard and softmax at the output layer
    # for the output layer a(x) and h(a(x)) = softmax(a(x))
    mul=np.matmul(self.H[len(self.hidden_layer_sizes)], self.weights[len(self.hidden_layer_sizes)+1])
    self.A[len(self.hidden_layer_sizes)+1] = mul + self.biases[len(self.hidden_layer_sizes)+1]

    self.H[len(self.hidden_layer_sizes)+1] = self.softmax(self.A[len(self.hidden_layer_sizes)+1])
    return


  def fit(self, activation, X, Y):
    """
    This method trains the model with the given data (X, Y) by performing a single forward pass and producing the probabilities calculated at the output layer.
    """

    # columns in output (label count)
    self.output_layer_size = Y.shape[1]

    # features in data(features)
    self.input_layer_size = X.shape[1]*X.shape[1]

    output_prob = []
    self.initialize_weights()

    for x, y in zip(X, Y):
      #Forward Propogation
      self.forward_prop(activation, x)

      output_prob.append(self.H[len(self.hidden_layer_sizes)+1][0])
    return output_prob



In [101]:
#Sample implementation using 3 hidden layers of sizes 100, 256 and 512 respectively

model = FeedForwardNN([128,128,128])
activation_name = "sigmoid"
class_predictions = model.fit(activation_name, X_train, Y_train)
#Class probabilities for 0th indexed image

np.set_printoptions(suppress=True)
print(class_predictions[0])

[0.00126001 0.42737141 0.51574302 0.00017754 0.00000039 0.00000002
 0.00138624 0.00199597 0.00003026 0.05203515]
