In [64]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

In [65]:
class NeuralNetwork():
    def __init__(self, N, hiddenLayer, lr, activation, weightInitializationFunction, epochs, batchSize):
        self.N = N
        self.hiddenLayer = hiddenLayer
        self.lr = lr
        self.activation = activation
        self.weightInitializationFunction = weightInitializationFunction
        self.epochs = epochs
        self.batchSize = batchSize

    def softmax(self, Z):
        return np.exp(Z)/sum(np.exp(Z))
    
    def forward_propagation(self, X):
        self.Z=[]
        self.A=[]
        for i in range(0, len(self.W)):
            w = self.W[i]
            b = self.B[i]
            if i == 0:
                self.Z.append(np.dot(w, X) + b)
            else:
                self.Z.append(np.dot(w, self.A[i-1]) + b)
            
            if i == len(self.W)-1:
                self.A.append(self.softmax(self.Z[i]))
            else:
                self.A.append(self.activation(self.Z[i]))
    
    def one_hot(self, Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        one_hot_Y = one_hot_Y.T
        return one_hot_Y

    def backward(self, X, Y):
        one_hot_Y = self.one_hot(Y)
        self.dW = [0]*len(self.W)
        self.dB = [0]*len(self.W)
        self.dZ = [0]*len(self.W)
        for i in range(len(self.W)-1, -1, -1):
            if i == len(self.W)-1:
                self.dZ[i] = self.A[i] - one_hot_Y
                self.dW[i] = np.dot(self.dZ[i], self.A[i-1].T)/len(Y)
                self.dB[i] = np.sum(self.dZ[i], axis=1, keepdims=True)/len(Y)
            else:
                self.dZ[i] = np.dot(self.W[i+1].T, self.dZ[i+1]) * self.activation(self.A[i], derivative=True)
                if i == 0:
                    self.dW[i] = np.dot(self.dZ[i], X.T)/len(Y)
                else:
                    self.dW[i] = np.dot(self.dZ[i], self.A[i-1].T)/len(Y)
                self.dB[i] = np.sum(self.dZ[i], axis=1, keepdims=True)/len(Y)
    
    def update_parameters(self):
        for i in range(len(self.W)):
            self.W[i] = self.W[i] - self.lr*self.dW[i]
            self.B[i] = self.B[i] - self.lr*self.dB[i]
    
    def fit(self, X, Y):
        self.W , self.B = self.weightInitializationFunction(self.hiddenLayer,X)
        for i in range(self.epochs):
            self.forward_propagation(X)
            # print(self.A[-1])
            # print(self.dB)
            self.backward(X, Y)
            print(self.B)
            self.update_parameters()
            print(self.B)

    def predict(self, X):
        self.forward_propagation(X)
        return np.argmax(self.A[-1], axis=0)
    
    def predict_proba(self, X):
        self.forward_propagation(X)
        return self.A[-1]
    
    def score(self, X, Y):
        return np.count_nonzero(self.predict(X) == Y)

In [66]:
def relu(x, derivative=False):
    if derivative:
        return derivative_relu(x)
    return np.maximum(0, x)
def derivative_relu(x):
    return 1*(x>0)

def leaky_relu(x, derivative=False):
    if derivative:
        return derivative_leaky_relu(x)
    return max(0.01*x, x)
def derivative_leaky_relu(x):
    return 1*(x>0) + 0.01*(x<=0)

def tanh(x, derivative=False):
    if derivative:
        return derivative_tanh(x)
    return np.tanh(x)
def derivative_tanh(x):
    return 1.0-np.tanh(x)**2

def sigmoid(x, derivative=False):
    if derivative:
        return derivative_sigmoid(x)
    return 1/(1+np.exp(-x))
def derivative_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

In [67]:
def randomWeightInitializationFunction(hiddenLayer, X_train):
    W = []
    B = []
    for i in range(0, len(hiddenLayer)+1):
        if i == 0:
            W.append(np.random.randn(hiddenLayer[i], X_train.shape[0]))
            B.append(np.zeros((hiddenLayer[i], 1)))
        elif i == len(hiddenLayer):
            W.append(np.random.randn(10, hiddenLayer[i-1]))
            B.append(np.zeros((10, 1)))
        else:
            W.append(np.random.randn(hiddenLayer[i], hiddenLayer[i-1]))
            B.append(np.zeros((hiddenLayer[i], 1)))
    return W, B

def zeroWeightInitializationFunction(hiddenLayer, X_train):
    W = []
    B = []
    for i in range(0, len(hiddenLayer)+1):
        if i == 0:
            W.append(np.zeros((hiddenLayer[i], X_train.shape[0])))
            B.append(np.zeros((hiddenLayer[i], 1)))
        elif i == len(hiddenLayer):
            W.append(np.zeros((10, hiddenLayer[i-1])))
            B.append(np.zeros((10, 1)))
        else:
            W.append(np.zeros((hiddenLayer[i], hiddenLayer[i-1])))
            B.append(np.zeros((hiddenLayer[i], 1)))
    return W, B

def normalWeightInitializationFunction(hiddenLayer, X_train):
    W = []
    B = []
    for i in range(0, len(hiddenLayer)+1):
        if i == 0:
            W.append(np.random.normal(0, 1/np.sqrt(X_train.shape[0]), (hiddenLayer[i], X_train.shape[0])))
            B.append(np.zeros((hiddenLayer[i], 1)))
        elif i == len(hiddenLayer):
            W.append(np.random.normal(0, 1/np.sqrt(hiddenLayer[i-1]), (10, hiddenLayer[i-1])))
            B.append(np.zeros((10, 1)))
        else:
            W.append(np.random.normal(0, 1/np.sqrt(hiddenLayer[i-1]), (hiddenLayer[i], hiddenLayer[i-1])))
            B.append(np.zeros((hiddenLayer[i], 1)))
    return W, B

In [None]:
training_data = pd.read_csv('fashion-mnist_test.csv')
testing_data = pd.read_csv('fashion-mnist_test.csv')
training_data = training_data[0:8000]
testing_data = testing_data[0:2000]
print(training_data.shape)
print(testing_data.shape)
#splitting the data into X and y
X_train = training_data.drop('label', axis=1)
y_train = training_data['label']
X_test = testing_data.drop('label', axis=1)
y_test = testing_data['label']
print(X_train.shape)
#normalizing
X_train = X_train/255.0
X_test = X_test/255.0

nn = NeuralNetwork(N=4, hiddenLayer=[256, 128, 64, 32], lr=2e-5, activation=tanh, weightInitializationFunction=zeroWeightInitializationFunction, epochs=10, batchSize=128)
nn.fit(X_train.T, y_train)
# print(nn.predict(X_test.T[:2])) 
# print(nn.score(X_test.T, y_test))

(8000, 785)
(2000, 785)
(8000, 784)
[array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
 

In [None]:
# print(nn.predict(X_test[:1000].T)) 
print(nn.W)

[array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), array([[0., 0., 0., 0., 0., 0.,