In [3]:
import wandb
import numpy  as np
import pandas as pd
import tensorflow as tf
from keras.datasets import fashion_mnist
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math
import seaborn as sns
from sklearn.metrics import confusion_matrix
import copy

In [4]:
'''LOADING TRAIN AND TEST DATA SET'''
(X_train,Y_train),(X_test,Y_test) = fashion_mnist.load_data()

In [5]:
print(f'Shape of input train data is : {X_train.shape}')
print(f'Shape of input test data is : {X_test.shape}')
print(f'Shape of output train data is : {Y_train.shape}')
print(f'Shape of output test data is : {Y_test.shape}')

Shape of input train data is : (60000, 28, 28)
Shape of input test data is : (10000, 28, 28)
Shape of output train data is : (60000,)
Shape of output test data is : (10000,)


In [6]:
'''Finding image of each category'''
categories=[]
images_list=[]
attributes={0:'T-shirt',1:'Trouser',2:'Pullover',3:'Skirt',4:'Overcoat',5:'Sandal',
            6:'Shirt',7:'Sneakers',8:'Bag',9:'Ankle boot'}
title=list(attributes.values())
print(f'The various categorical variables in the data are : {title}')
for i in range(10):
    categories.append(i)
for i in range(Y_train.shape[0]):
               if Y_train[i]==categories[0]:
                    images_list.append(X_train[i])
                    del(categories[0])
                    if len(categories)==0:
                        break


The various categorical variables in the data are : ['T-shirt', 'Trouser', 'Pullover', 'Skirt', 'Overcoat', 'Sandal', 'Shirt', 'Sneakers', 'Bag', 'Ankle boot']


In [8]:
'''CHANGING THE SHAPE OF INPUT DATA'''
x_train=np.zeros((60000,784))
for i in range(X_train.shape[0]):
    a=X_train[i].reshape(1,784)
    x_train[i]=a
print('Train data',x_train.shape)
x_test=np.zeros((10000,784))
for i in range(X_test.shape[0]):
    a=X_test[i].reshape(1,784)
    x_test[i]=a
print('Test data',x_test.shape)

Train data (60000, 784)
Test data (10000, 784)


In [9]:
'''CONVERTING OUTPUT DATA INTO ONE HOT VECTOR FORM'''
a = np.max(Y_train)+1
y_train=np.zeros((Y_train.shape[0],a))
for i in range(Y_train.shape[0]):
    for j in range(a):
        if Y_train[i]==j:
            y_train[i,j]=1
print('Train data',y_train.shape)
y_test=np.zeros((Y_test.shape[0],a))
for i in range(Y_test.shape[0]):
    for j in range(a):
        if Y_test[i]==j:
            y_test[i,j]=1
#print(y_test[0,:])
print('Test data',y_test.shape)

Train data (60000, 10)
Test data (10000, 10)


In [10]:
'''CREATING VALIDATION DATA SET'''
'''The input data is split into train and validation data where validation comprises of 10% of the data.'''

x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=0.10,random_state=10)
x_train=x_train.T/255
y_train=y_train.T
x_test=x_test.T/255
y_test=y_test.T
x_val=x_val.T/255
y_val=y_val.T
print(f'The shape of train data is:{x_train.shape}')
print(f'The shape of test data is:{x_test.shape}')
print(f'The shape of validation data is:{x_val.shape}')

The shape of train data is:(784, 54000)
The shape of test data is:(784, 10000)
The shape of validation data is:(784, 6000)


In [11]:
'''ACTIVATION FUNCTIONS'''

'''input: zl = w(l)*a(l-1) + b(l) where l is the lth Layer.The various activation functions implemented here are 
sigmoid,tanh,ReLu and Identity functions.'''


#SIGMOID FUNCTION
def sigmoid_function(z):
    h=1./(1.+np.exp(-z))
    
    return h

#TANH FUNCTION
def tanh_function(z):
    h=np.tanh(z)
    
    return h

#RELU FUNCTION
def relu_function(z):
    h=np.maximum(z,0)
    
    return h
    

#IDENTITY FUNCTION
def identity_function(z):
    
    return z


In [12]:
#OUTPUT LAYER FUNCTION
'''The given problem is a multi-class classification problem.So,we use softmax function for the output layer(L)
    Z(L) = W(L)*A(L-1) + B(L) where Lth layer is the output layer.'''


#SOFTMAX FUNCTION
'''OUTPUT LAYER FUNCTION'''

def softmax_function(z):
    #z=z-np.max(z,axis=0,keepdims=True) # to avoid Nan or division by zero errors
    h = np.exp(z)/np.sum(np.exp(z), axis=0)
    return h

In [13]:
#CROSS ENTROPY FUNCTION(DERIVATIVE OF OUTPUT LAYER)
def cross_entropy_function(y,ycap,w,lambd):
    '''This function is called as categorical cross entropy function.
       input: Y:actual value of output
              YCAP:predicted value of output
              lambd:Regularisation parameter(L2 Rregularization is used here)'''
    
    #ycap = np.clip(ycap, 1e-12, 1.0 - 1e-12) #to avoid Nan error 
    m=y.shape[1]
    cost=-(1/m)*np.sum(y*np.log(ycap))
    regularization_cost=0
    for i in range(len(w)):
        regularization_cost += (lambd/(2*m))*np.sum(np.square(w[i]))
        
    return cost+regularization_cost
     
        
#MEAN SQUARED ERROR FUNCTION
def mean_squared_error_function(y,ycap,w,lambd):
    '''input: Y:actual value of output
              YCAP:predicted value of output
              lambd:Regularisation parameter(L2 Rregularization is used here)'''   
    ycap = np.clip(ycap, 1e-12, 1.0 - 1e-12)
    m = y.shape[1]
    mean_square_error = (1/m)*np.sum((y-ycap)**2)
    reg_cost=0
    for i in range(len(w)):
        reg_cost += (lambd/(2*m))*np.sum(w[i]**2)
    return mean_square_error + reg_cost


In [14]:
#INITIALISE PARAMETERS

'''input:  Layer_attributes is a list consisting of number of 
    neurons in each layer. Here,input layer is considered as 0th Layer, output layer is considered as Lth layer
    and the layers from 1 to (L-1) are considered as hidden layers.Therefore, layer-attributes consists of (L+1)
    values. The methods used here to initialise the values of parameters are Random and Xavier Initialisations.'''

def random_initialization(layer_attributes):
    
    L=len(layer_attributes)-1
    W=[]
    B=[]
    np.random.seed(10)
    for i in range(1,L+1):
        weight_i = np.random.uniform(-1,1,(layer_attributes[i],layer_attributes[i-1]))
        bias_i=np.zeros((layer_attributes[i],1))
        #bias_i=np.full((layer_attributes[i],1),0.01) # in case of relu function to avoid vanishing gradient
        W.append(weight_i)
        B.append(bias_i)
        
    return W,B

def xavier_initialization(layer_attributes):
    
    L=len(layer_attributes)-1
    W=[]
    B=[]
    for i in range(1,L+1):
        lim = np.sqrt(6/(i+(i-1)))
        weight_i = np.random.uniform(-lim,lim,(layer_attributes[i],layer_attributes[i-1]))
        bias_i=np.zeros((layer_attributes[i],1))
        W.append(weight_i)
        B.append(bias_i)
        
    return W,B
        

In [15]:
#FORWARD PROPAGATION
def forward_propagation(x,w,b,activation='sigmoid_function'):
    
    '''Forward propagation is used to find the predicted value of output and cost function by going forward,starting from 
    input layer until the output layer.We calculate the pre-activation and activation values and returns the latter after each
    layer. The input parameters taken are input data set,weights and bias value, and activation function to be used where the 
    default is set as sigmoid function. Softmax function is used to find the values at the output layer.
    Here,z is the linear part and a is the non-linear part(activation function) of a neuron.'''
    A=[]
    Z=[]
    length=len(w)
    #Hidden layers
    A.append(x)
    for i in range(length-1):
        z_i=np.dot(w[i],A[-1])+b[i]
        Z.append(z_i)
        if activation =='sigmoid_function':
            a_i = sigmoid_function(z_i)
            A.append(a_i)
        elif activation=='tanh_function':
            a_i = tanh_function(z_i)
            A.append(a_i)
        elif activation == 'relu_function':
            a_i = relu_function(z_i)
            A.append(a_i)
        elif activation == 'identity_function':
            a_i = identity_function(z_i)
            A.append(a_i)
    #output layer
    z_l = np.dot(w[-1],A[-1]) + b[-1]
    a_l = softmax_function(z_l)
    A.append(a_l)
    Z.append(z_l)

    return Z,A


In [16]:
def accuracy(y,yout):
    '''Function to find the accuracy taking y and ypred as input and returns accracy value.'''
    yout=np.argmax(yout,axis=0)
    y = np.argmax(y,axis=0)   
    acc=np.mean(y==yout)*100
    return acc

In [23]:
 #PREDICT FUNCTION
def predict(x,y,w,b,lambd,activation):
    '''This function is to predict the cost and accuracy values of the test data by applying forward propogation
       input :  x(input)
                y(output)
                w,b(weights and biases)
                lamb(regularisation parameter)
                loss(loss function)
                activation(activation function)'''
    
    z,a = forward_propagation(x_train,w,b,activation)
    acc= accuracy(y,a[-1])
    y_pred=np.argmax(a[-1],axis=0)
    Y=np.argmax(y,axis=0)
    cost_train=cross_entropy_function(y_train,a[-1],w,lambd)
            
    return acc,cost_train

In [20]:
def neural_network(neurons,num_hidden_layers,weight_init='random_initialization'):
    
    layer=[]
    n,m=x_train.shape
    layer.append(x_train.shape[0])
    for i in range(num_hidden_layers):
        layer.append(neurons)
    layer.append(y_train.shape[0])
    print(f'neuron configuration: {layer}')
    if weight_init=='random_initialization':
        w,b=random_initialization(layer)
    elif weight_init=='xavier_initialization':
        w,b=xavier_initialization(layer)
    return w,b
             

In [21]:
w,b=neural_network(8,2)

neuron configuration: [784, 8, 8, 10]


In [24]:
acc,cost_train=predict(x_train,y_train,w,b,lambd=0,activation='sigmoid_function')

In [28]:
print(f'Current accuracy of train data using random weights:{acc}')

Current accuracy of train data using random weights:9.96111111111111


In [29]:
print(f'Current cost of train data using random weights:{cost_train}')

Current cost of train data using random weights:2.7509770074850888
