# Artificial Neural Networks

More often than not, we will use a deep learning library (Tensorflow, Pytorch, or the wrapper known as Keras) to implement our models. However, the abstraction afforded by those libraries can make it hard to troubleshoot issues if we don't understand what is going on under the hood. 

### In this section we will implement a fully-connected and a convolutional neural network from scratch.

##### The neural network will have the following architecture:

- Input layer
- Dense hidden layer with 512 neurons, using relu as the activation function
- Dropout with a value of 0.2
- Dense hidden layer with 512 neurons, using relu as the activation function
- Dropout with a value of 0.2
- Output layer, using softmax as the activation function

The model will use categorical crossentropy as its loss function. 
We will optimize the gradient descent using RMSProp, with a learning rate of 0.001 and a rho value of 0.9.
We will evaluate the model using accuracy.

In [5]:
import numpy as np


class NeuralNetwork(object):
    def __init__(self, epochs, drop, learning_rate, rho, mini_batch, ip_dimension, op_dimension):
        self.epochs=epochs
        self.learning_rate=learning_rate
        self.mini_batch=mini_batch
        self.ip_dimension=ip_dimension
        self.op_dimension=op_dimension
        self.drop=drop
        self.rho=rho
        self.neurons=512
        self.bias_1=np.zeros((1,self.neurons))
        self.weight_1=np.random.randn(self.ip_dimension,self.neurons)
        self.bias_2=np.zeros((1,self.neurons))
        self.weight_2=np.random.randn(self.neurons,self.neurons)
        self.bias_3=np.zeros((1,self.op_dimension))
        self.weight_3=np.random.randn(self.neurons,self.op_dimension)



    #ReLu Activation Function
    def relu(self,z):
        return np.maximum(0,z)

    #Derivative of ReLu Activation
    def relu_derivative(self,z):
        z[z<=0] = 0
        z[z>0] = 1
        return z

    #Softmax Activation Function
    def softmax(self,z):
        z=z-np.max(z,axis=1,keepdims=True)
        return np.exp(z)/np.sum(np.exp(z),axis=1,keepdims=True)

    #RMSProp to optimize gradient descent
    def rmsprop(self,x,del_x,rho=0.9):
        ep=1e-8
        w=np.zeros(x.shape, dtype=float)
        w=rho * w + (1-rho) * del_x ** 2
        x=x-self.learning_rate * (del_x / (np.sqrt(w)+ep))
        return x

    #Cross entropy loss
    def cross_entropy(self,out_3,y):
        entropy = -np.mean(y * np.log(out_3 + 1e-8))
        return entropy

    #Error function
    def error(self,predicted, real):
        samples = real.shape[0]
        res = predicted - real
        return res/samples


    def fit(self,x_train,y_train):

        print("Model Fit...")
        for i in range(self.epochs):
            #print("Epoch:",i+1)
            start=0
            end=self.mini_batch
            while end<=x_train.shape[0]:

                #FeedForward
                x=x_train[start:end:]
                y=y_train[start:end:]

                net_1=np.matmul(x,self.weight_1)+self.bias_1
                out_1=self.relu(net_1)

                drop_1 = np.random.binomial(1, (1-self.drop), size=out_1.shape)/(1-self.drop)
                out_1*=drop_1

                net_2=np.matmul(out_1,self.weight_2)+self.bias_2
                out_2=self.relu(net_2)

                drop_2 = np.random.binomial(1, (1-self.drop), size=out_2.shape)/(1-self.drop)
                out_2*=drop_2

                net_3=np.matmul(out_2,self.weight_3)+self.bias_3
                out_3=self.softmax(net_3)

                cross_entr = self.cross_entropy(out_3,y)
                #print("Cross Entropy Loss:",cross_entr)

                #Backprop

                del_a3=self.error(out_3,y)
                del_weight_3=np.matmul(out_2.T,del_a3)
                del_bias_3=del_a3

                del_inter2= np.matmul(del_a3,self.weight_3.T)
                del_a2= del_inter2 * self.relu_derivative(out_2)
                del_weight_2=np.matmul(out_1.T,del_a2)
                del_bias_2=del_a2

                del_inter1=np.matmul(del_a2,self.weight_2.T)
                del_a1= del_inter1 * self.relu_derivative(out_1)
                del_weight_1=np.matmul(x.T,del_a1)
                del_bias_1=del_a1

                self.weight_3=self.rmsprop(self.weight_3,del_weight_3,self.rho)
                self.bias_3=self.rmsprop(self.bias_3,del_bias_3,self.rho)

                self.weight_2=self.rmsprop(self.weight_2,del_weight_2,self.rho)
                self.bias_2=self.rmsprop(self.bias_2,del_bias_2,self.rho)

                self.weight_1=self.rmsprop(self.weight_1,del_weight_1,self.rho)
                self.bias_1=self.rmsprop(self.bias_1,del_bias_1,self.rho)

                start+=self.mini_batch
                end+=self.mini_batch

        return


    def evaluate(self,x_test,y_test):
        print("Model Evaluate...")
        res=[]
        for i in range(20):
            #print("Epoch:",i+1)
            res=[]
            start=0
            end=self.mini_batch
            while end<=x_test.shape[0]:

                x=x_test[start:end:]

                net_1=np.matmul(x,self.weight_1)+self.bias_1
                out_1=self.relu(net_1)

                drop_1 = np.random.binomial(1, (1-self.drop), size=out_1.shape)/(1-self.drop)
                out_1*=drop_1

                net_2=np.matmul(out_1,self.weight_2)+self.bias_2
                out_2=self.relu(net_2)

                drop_2 = np.random.binomial(1, (1-self.drop), size=out_2.shape)/(1-self.drop)
                out_2*=drop_2

                net_3=np.matmul(out_2,self.weight_3)+self.bias_3
                out_3=self.softmax(net_3)

                res.append(np.argmax(out_3,axis=1))

                start+=self.mini_batch
                end+=self.mini_batch


        flat_res=[]
        for i in res:
            flat_res.extend(i)

        flat_res=np.asarray(flat_res)
        actual=np.argmax(y_test,axis=1)

        accuracy = (actual == flat_res).mean()
        #print("Acccuracy:",accuracy * 100)
        return accuracy * 100

##### Let us train our fully-connected neural network on the Fashion-MNIST dataset using 5-fold cross validation and report accuracy on the folds, as well as on the test set.

In [7]:
# To simplify the usage of our dataset, we will be importing it from the Keras 
# library. Keras can be installed using pip: python -m pip install keras

# Original source for the dataset:
# https://github.com/zalandoresearch/fashion-mnist

# Reference to the Fashion-MNIST's Keras function: 
# https://keras.io/datasets/#fashion-mnist-database-of-fashion-articles


from sklearn import preprocessing
from sklearn.model_selection import KFold
import keras
from keras.datasets import fashion_mnist


# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255


# convert class vectors to binary class matrices
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train= preprocessing.normalize(x_train)
y_train=preprocessing.normalize(y_train)

x_test= preprocessing.normalize(x_test)
y_test= preprocessing.normalize(y_test)


#Initializing Neural Network
neuNet=NeuralNetwork(20,0.2,0.001,0.9,1000,x_train.shape[1],y_train.shape[1])

#5-Fold Cross Validation
kf=KFold(n_splits=5, random_state=None, shuffle=False)
fold_acc=[]
for train_index, test_index in kf.split(x_train):
    print("Train:", train_index, "Test:", test_index)
    x_train_fold, x_test_fold = x_train[train_index], x_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    neuNet.fit(x_train_fold,y_train_fold)
    accuracy=neuNet.evaluate(x_test_fold,y_test_fold)
    fold_acc.append(accuracy)


print("\nValidation Accuracies:")
for i in range(len(fold_acc)):
    print("Fold %d Accuracy: "%(i+1),fold_acc[i])

#Testing Model
accuracy=neuNet.evaluate(x_test,y_test)
print("\nTest Accuracy: ",accuracy)



Train: [12000 12001 12002 ... 59997 59998 59999] Test: [    0     1     2 ... 11997 11998 11999]
Model Fit...
Model Evaluate...
Train: [    0     1     2 ... 59997 59998 59999] Test: [12000 12001 12002 ... 23997 23998 23999]
Model Fit...
Model Evaluate...
Train: [    0     1     2 ... 59997 59998 59999] Test: [24000 24001 24002 ... 35997 35998 35999]
Model Fit...
Model Evaluate...
Train: [    0     1     2 ... 59997 59998 59999] Test: [36000 36001 36002 ... 47997 47998 47999]
Model Fit...
Model Evaluate...
Train: [    0     1     2 ... 47997 47998 47999] Test: [48000 48001 48002 ... 59997 59998 59999]
Model Fit...
Model Evaluate...

Validation Accuracies:
Fold 1 Accuracy:  77.80833333333334
Fold 2 Accuracy:  85.175
Fold 3 Accuracy:  88.94999999999999
Fold 4 Accuracy:  89.56666666666668
Fold 5 Accuracy:  89.86666666666666
Model Evaluate...
Test Accuracy:  81.37
