# Neural Network implementation (using only numpy)

## 1. Implement Activation Functions
> sigmoid, tanh, relu, leaky_relu

## 2. Design a class Neural Network
> constructor, addHiddenLayer, parameter initializer, forward propagation, backward propagation, predict

## 3. Implement Constructor

## 4. Implement ```addHiddenLayer```, ```initialize_parameters```,  ```compute_cost```

## 5. Implement ```forward_propagation```, ```backward_propagation```,  ```update_parameters```

## 6. Implement ```run_model```, ```predict```

In [0]:
# Pakage Imports
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model

%matplotlib inline

In [0]:
#Activation Functions
def sigmoid(z, derivative = False):
    """Sigmoid Function"""
    
    if derivative:
      a = sigmoid(z)
      return a * (1 - a)
    
    return 1 / (1 + np.exp(-z))

def tanh(z, derivative = False):
    """Hyperbolic Tangent Function"""
    
    if derivative:
      a = tanh(z)
      return 1 - np.power(a, 2)
    
    return np.tanh(z)

def relu(z, derivative = False):
    """Rectified Linear Unit Function"""
    
    if derivative:
        return z > 0
    
    return np.maximum(0, z)

def leaky_relu(z, scale = 0.01, derivative = False):
    """Leaky ReLU"""
    
    if derivative:
        return z > 0 + (z <= 0) * scale
      
    return np.maximum(scale * z, z)

class NeuralNetwork:    
    def __init__(self, activation_function = sigmoid):
        self.n = [-1, 1]
        self.actFunc = [0, activation_function]
    
    def addHiddenLayer(self, number_of_units:int, activation_function = relu):
        self.n.insert(len(self.n) - 1, number_of_units)
        self.actFunc.insert(len(self.actFunc) - 1, activation_function)

    def initialize_parameters(self, X, scale = 0.01):
        print('# of Layers =', len(self.n))
        number_of_layers = len(self.n)
        self.W = [0] * number_of_layers
        self.dW = [0] * number_of_layers
        self.b = [0] * number_of_layers
        self.db = [0] * number_of_layers
        self.A = [0] * number_of_layers
        self.Z = [0] * number_of_layers
        self.dZ = [0] * number_of_layers
        
        self.A[0] = X
        self.n[0] = X.shape[0]
        
        for i in range(1, len(self.n)):
            self.W[i] = np.random.randn(self.n[i], self.n[i - 1]) * scale
            self.b[i] = np.zeros((self.n[i], 1))
            
    def compute_cost(self, Y):
        Y_hat = self.A[-1]
        m = Y.shape[1] # number of example

        # Compute the cross-entropy cost
        logprobs = Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat)
        cost = - np.sum(logprobs) / m

        cost = np.squeeze(cost)
        assert(isinstance(cost, float))

        return cost
      
    def forward_propagation(self):
        for i in range(1, len(self.n)):
            self.Z[i] = self.W[i] @ self.A[i - 1] + self.b[i]
            self.A[i] = self.actFunc[i](self.Z[i])
            
    def backward_propagation(self, Y):
        """
          Implement the backward propagation using the instructions above.

          Arguments:
          X -- input data of shape (2, number of examples)
          Y -- "true" labels vector of shape (1, number of examples)

          Returns:
          grads -- python dictionary containing your gradients with respect to different parameters
        """
        
        m = self.A[0].shape[1]

        # Backward propagation: calculate dW, db
        
        self.dZ[-1] = self.A[-1] - Y
        self.dW[-1] = 1 / m * self.dZ[-1] @ self.A[-2].T
        self.db[-1] = 1 / m * np.sum(self.dZ[-1], axis = 1, keepdims = True)
        
        for i in range(len(self.A) - 2, 0, -1):
            self.dZ[i] = self.W[i + 1].T @ self.dZ[i + 1] * self.actFunc[i](self.A[i], derivative = True)
            self.dW[i] = 1 / m * self.dZ[i] @ self.A[i-1].T
            self.db[i] = 1 / m * np.sum(self.dZ[i], axis = 1, keepdims = True)

    def update_parameters(self, learning_rate):
        """
          Updates parameters using the gradient descent update rule
        """
        
        # Update rule for each parameter
        number_of_layers = len(self.n) - 1
        for i in range(1, number_of_layers + 1):
            self.W[i] = self.W[i] - learning_rate * self.dW[i]
            self.b[i] = self.b[i] - learning_rate * self.db[i]
    
    def run_model(self, X, Y, learning_rate = 0.01, num_iterations = 10000, print_cost = False):
        """
            Arguments:
            X -- dataset of shape (2, number of examples)
            Y -- labels of shape (1, number of examples)
            learning_rate -- Learning Rate of Gradient Descent
            num_iterations -- Number of iterations in gradient descent loop
            print_cost -- if True, print the cost every 1000 iterations
        """
        
        # Initialize parameters.
        self.initialize_parameters(X)        

        # Loop (gradient descent)
        for i in range(num_iterations):

            # Forward propagation.
            self.forward_propagation()

            # Cost function.
            cost = self.compute_cost(Y)

            # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
            self.backward_propagation(Y)

            # Gradient descent parameter update.
            self.update_parameters(learning_rate)

            # Print the cost every 1000 iterations
            if print_cost and i % 1000 == 0:
                print ("Cost after iteration %i: %f" %(i, cost))
                
    def predict(self, X, Y, threshold = 0.5):
        """
        Using the learned parameters, predicts a class for each example in X

        Arguments:
        X -- input data of size (n_x, m)
        Y -- output data of size (1, m)

        Returns
        predictions -- vector of predictions of our model
        """
        
        A = [0] * len(self.n)
        Z = [0] * len(self.n)
        A[0] = X

        # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
        for i in range(1, len(self.n)):
            Z[i] = self.W[i] @ A[i - 1] + self.b[i]
            A[i] = self.actFunc[i](Z[i])
            
        predictions = A[-1] > threshold

        return predictions

# Make a new neural network with two hidden layers

> ## First hidden layer has 5 hidden units.

> ## Second hidden layer has 4 hidden units.

In [0]:
network = NeuralNetwork()
network.addHiddenLayer(5)
network.addHiddenLayer(4)

# Make a training set and a test set with random numbers.

In [0]:
training_set_size, test_set_size = 10000, 1000
X = np.random.randn(5, training_set_size); Y = np.sum(X, axis = 0, keepdims = True) > 0; Y = Y.reshape(1, -1)
test_X = np.random.randn(5, test_set_size); test_Y = np.sum(test_X, axis = 0, keepdims = True) > 0; test_Y = test_Y.reshape(1, -1)

# Run the model.

In [5]:
network.run_model(X, Y, learning_rate = 0.0355, print_cost = True)

# of Layers = 4
Cost after iteration 0: 0.693149
Cost after iteration 1000: 0.693134
Cost after iteration 2000: 0.693134
Cost after iteration 3000: 0.693132
Cost after iteration 4000: 0.693129
Cost after iteration 5000: 0.693121
Cost after iteration 6000: 0.693077
Cost after iteration 7000: 0.692467
Cost after iteration 8000: 0.176914
Cost after iteration 9000: 0.040253


# Predict the results by a trained neural network.

In [0]:
predictions = network.predict(test_X, test_Y)

# Print the accuracy

In [7]:
print('Accuracy: %d%%' % (float(test_Y @ predictions.T + (1 - test_Y) @ (1 - predictions.T)) / float(test_Y.size) * 100))

Accuracy: 50%
