<a href="https://colab.research.google.com/github/moni6264/deeplearning_cs6910/blob/main/CS6910_Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import mnist, fashion_mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Question 1 :Download the fashion-MNIST dataset and plot 1 sample image for each class as shown in the grid below. Use "from keras.datasets import fashion_mnist" for getting the fashion mnist dataset.

In [2]:
def prepare_dataset():
    (X, y), (X_test, y_test) = fashion_mnist.load_data()

    # Reshaping the data matrices
    X = X.reshape(X.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)

    # Normalizing the pixel intensities
    X = X/255.0
    X_test = X_test/255.0
    
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
    
    y_train_encode = np.zeros((10,y_train.shape[0]))
    y_train_encode[y_train, np.array(list(range(y_train.shape[0])))] = 1


    
    
    print("Number of examples in training set")
    print(X_train.shape)
    print(y_train_encode.shape)
    print(y_train.shape)
    
    print("No of examples in validation set")
    print(X_val.shape)
    print(y_val.shape)
    print("No of examples in test set")
    print(X_test.shape)
    print(y_test.shape)
    
    return X_train.T, y_train_encode, y_train.T ,X_val.T, y_val.T, X_test.T, y_test.T


X_train, y_train_encode, y_train ,X_val, y_val, X_test, y_test = prepare_dataset()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
Number of examples in training set
(54000, 784)
(10, 54000)
(54000,)
No of examples in validation set
(6000, 784)
(6000,)
No of examples in test set
(10000, 784)
(10000,)


In [3]:
def sigmoid(x):
    return 1. / (1.+np.exp(-x))
def relu(x):
    return np.maximum(0,x)
def tanh(x):
    return 1.*(np.exp(x)+np.exp(-x))/(np.exp(x)-np.exp(-x))
def softmax(x):
    soft = np.zeros(x.shape)
    for i in range(0, x.shape[1]):
        numr = np.exp(x[:, i])
        soft[:, i] = numr/np.sum(numr)
    return soft


#Derivatives of sigmoid function
def sigmoid_derivative(x):
    return sigmoid(x) * (1-sigmoid(x))
def relu_derivative(x):
    return 1*(x>0)
def tanh_derivative(x):
    return (1 - (np.tanh(x)**2))

#Initialisation of parameters
def initialize_parameters(input_layer, hidden_layer, output_layer,initilisation):
    weights = []
    biases  = []
    layers = [input_layer] + hidden_layer + [output_layer]
    for i in range(len(hidden_layer)+1):
        if initilisation == 'random':
            weights.append(np.random.rand(layers[i+1], layers[i])*0.01)
        if initilisation == 'xavier':
            weights.append(np.random.randn(layers[i+1],layers[i])*np.sqrt(2/layers[i+1]))
        biases.append(np.random.randn(layers[i+1],1))
    return weights, biases


In [4]:
import numpy as np
from sklearn.metrics import mean_squared_error

class ANN:

    def __init__(self,n_inputs,n_hidden,n_outputs,initialisation, activation,loss):
           
        self.input_layer   = n_inputs
        self.output_layer  = n_outputs
        self.hidden_layer   = n_hidden
        self.activation = activation
        self.loss_function = loss
        self.weights    = []
        self.biases     = []
        self.initialisation = initialisation
        self.weights,self.biases = initialize_parameters(self.input_layer,self.hidden_layer,self.output_layer,self.initialisation)

    
    
    #Forward Propogation code here
    def forward_propagation(self,input):
 
        self.A = []
        self.H  = []
        k=0
       
        self.A.append(np.dot(self.weights[k],input)+self.biases[k])
        if self.activation == 'sigmoid':
          self.H.append(sigmoid(self.A[k]))
        elif self.activation == 'tanh':
          self.H.append(tanh(self.A[k]))
        elif self.activation == 'relu':
          self.H.append(relu(self.A[k]))

        for k in range(1,len(self.hidden_layer)):
            self.A.append(np.dot(self.weights[k],self.H[k-1])+self.biases[k])
            if self.activation == 'sigmoid':
              self.H.append(sigmoid(self.A[k]))
            elif self.activation == 'tanh':
              self.H.append(tanh(self.A[k]))
            elif self.activation == 'relu':
              self.H.append(relu(self.A[k]))
 
        k=len(self.hidden_layer)
        self.A.append(np.matmul(self.weights[k],self.H[k-1])+self.biases[k])
        self.H.append(softmax(self.A[k]))

        return self.H[-1]


     #Back propogation
    def back_propagation(self,X_train,y_train):

        g_a  = [0]*(len(self.hidden_layer)+1)
        g_h  = [0]*(len(self.hidden_layer)+1)
        g_w  = [0]*(len(self.weights))
        g_b  = [0]*(len(self.biases))

        n_samples = X_train.shape[1] 

        for k in reversed(range(len(self.hidden_layer)+1)):
            if k == len(self.hidden_layer):
              if self.loss_function == 'cross_entropy':
                  g_a[k] = self.H[k]  - y_train  
              elif self.loss_function == 'square_loss': 
                  g_a[k] = (self.H[k] - y_train) * self.H[k] * (1 - self.H[k]) 
                
            else:
                g_h[k] = (1/n_samples)*np.matmul(self.weights[k+1].T,g_a[k+1])
                if self.activation == 'sigmoid':
                  g_a[k] = (1/n_samples)*np.multiply(g_h[k],sigmoid_derivative(self.A[k]))
                elif self.activation == 'tanh':
                  g_a[k] = (1/n_samples)*np.multiply(g_h[k],tanh_derivative(self.A[k]))
                elif self.activation == 'relu':
                  g_a[k] = (1/n_samples)*np.multiply(g_h[k],relu_derivative(self.A[k]))
                

            if k == 0:
                g_w[k] = (1/n_samples)*np.matmul(g_a[k],X_train.T) 
            else:
                g_w[k] = (1/n_samples)*np.matmul(g_a[k],self.H[k-1].T)

            g_b[k]  = (1/n_samples)*np.sum(g_a[k], axis=1, keepdims = True)
        return g_w,g_b
