### Build Neural Network from scratch

1. Data sample( independent variables and dependent variables)
2. Define Hyper parameters
3. Define ACtivation function and the derivative
4. Train the model
4. Make predictions


In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
import os
import numpy as np
np.random.seed(100)

### Building model



<img src="https://i.imgur.com/5EHSfO6.png" alt='cat-noncat' width='75%' />


### The general methodology to build a Neural Network is to:

1. Define the neural network structure(- input layer - hidden layer -output layer)
2. Initialize the model's parameters
3. Loop
- Implement forward propagation
- Copute Loss
- Implement back ward probagation to get the gradients
- Update parameters( gradient descent)


### Forward Propagation
1. INput Layer: X (row) each object
2. Initialize Weights and Bias: W1, B1
3. $Z^{[1]} = W^{[1]}X^T + b^{[1]}$
4. $A^{[1]} = relu(Z^{[1]})$
5. $Z^{[2]} = W^{[2]}A^{[1]} + b^{[2]}$
6. $A^{[2]} = sigmoid(Z^{[2]}) = \hat{y}$


- Loss function/ Cost Function/ Objective Function

- $J = y - \hat{y} = error$
- $J = -\frac1m\sum \bigg(Y \odot log(A^{[2]}) + (1-Y) \odot log(1-A^{[2]})
\bigg)$

> Note that $\odot$ denotes element wise multiplication

### Backward Propagation
1. $dZ{[2]} = (A^{[2]} - Y)$
2. $dW^{[2]} = \frac{1}{m} dZ^{[2]}A^{[1]^T}$
3. $db^{[2]} = \frac{1}{m}np.sum(dZ^{[2]}, axis=1, keepdims=True)$
4. $dZ^{[1]} =   W^{[2]^T}dZ^{[2]} \odot g^{[1]'}(Z^{[1]})$
5. $dW^{[1]} = \frac1mdZ^{[1]}X$
6. $db^{[1]} = \frac{1}{m}np.sum(dZ^{[1]}, axis=1, keepdims=True)$

> Note that $\odot$ denotes elementwise multiplication.

> The notation you will use is common in deep learning coding:
>    - dW1 = $\frac{\partial \mathcal{J} }{ \partial W_1 }$
>    - db1 = $\frac{\partial \mathcal{J} }{ \partial b_1 }$
>    - dW2 = $\frac{\partial \mathcal{J} }{ \partial W_2 }$
>    - db2 = $\frac{\partial \mathcal{J} }{ \partial b_2 }$

> To compute dZ1 you'll need to compute $g^{[1]'}(Z^{[1]})$. Since $g^{[1]}(.)$ is the tanh activation function, if $a = g^{[1]}(z)$ then $g^{[1]'}(z) = 1-a^2$. So you can compute 
    $g^{[1]'}(Z^{[1]})$ using `(1 - A1**2)`.


In [None]:
class Neural_Network():
    def __init__(self, n_h, learning_rate, n_iterations):
        self.n_h = n_h
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        
    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_backward(self, Z):
        result = np.ones(Z.shape)
        result[Z < 0] = 0
        return result

    def layer_sizes(self, X, Y, n_h):
        n_x = X.shape[1]
        n_h = n_h
        n_y = Y.shape[0]
        
    def initialize_params(self, n_x, n_h, n_y):
        np.random.seed(102)
        W1 = np.random.randn(n_h, n_x) * np.sqrt(1/n_x)
        b1 = np.zeros((n_h, 1))
        W2 = np.random.randn(n_y, n_h) * np.sqrt(1/n_h)
        b2 = np.zeros((n_y, 1))
        
        assert (W1.shape==(n_h, n_x))
        assert (W2.shape==(n_y, n_h))
        assert (b1.shape==(n_h, 1))
        assert (b2.shape==(n_y, 1))
        
        self.parameters = {
            'W1': W1,
            'W2': W2,
            'b1': b1,
            'b2': b2
        }
        
    def forward(self, X, parameters):
        W1 = self.parameters['W1']
        W2 = self.parameters['W2']
        b1 = self.parameters['b1']
        b2 = self.parameters['b2']

        Z1 = np.dot(W1, X.T) + b1
        A1 = relu(Z1)
        Z2 = np.dot(W2, A1) + b2
        A2 = sigmoid(Z2)

        return (Z1, A1, Z2, A2)
    
    def forward_propagation(self, X):
        Z1, A1, Z2, A2 = self.forward(X, self.parameters)
        
        self.cache = {
            'Z1': Z1,
            'A1': A1,
            'Z2': Z2,
            'A2': A2
        }
        return A2
    def compute_cross_entropy_cost(self, A2, Y):
        m = Y.shape[1]
        loss = np.multiply(Y, np.log(A2)) + np.multiply(1-Y, np.log(1-A2))
        cost = -(1/m) * np.sum(loss)
        return cost
    
    def backward_propagation(self, X, Y):
        W1 = self.parameters['W1']
        W2 = self.parameters['W2']
        b1 = self.parameters['b1']
        b2 = self.parameters['b2']

        A1 = self.cache['A1']
        A2 = self.cache['A2']
        Z1 = self.cache['Z1']
        Z2 = self.cache['Z2']

        m = Y.shape[1]

        dZ2 = A2 - Y
        dW2 = (1/m) * np.dot(dZ2, A1.T)
        db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
        dZ1 = np.multiply(np.dot(W2.T, dZ2), relu_backward(Z1))
        dW1 = (1/m)*np.dot(dZ1, X)
        db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
        
        self.grads = {
            'dW1': dW1,
            'dW2': dW2,
            'db1': db1,
            'db2': db2
        }
        return self.grads
    
    def update_parameters(self):
        W1 = self.parameters['W1']
        W2 = self.parameters['W2']
        b1 = self.parameters['b1']
        b2 = self.parameters['b2']

        dW1 = self.grads['dW1']
        dW2 = self.grads['dW2']
        db1 = self.grads['db1']
        db2 = self.grads['db2']

        W1 = W1 - self.learning_rate * dW1
        W2 = W2 - self.learning_rate * dW2
        b1 = b1 - self.learning_rate * db1
        b2 = b2 - self.learning_rate * db2
        
        self.parameters = {
            'W1': W1,
            'W2': W2,
            'b1': b1,
            'b2': b2
        }
    
    def fit(self, X, Y):
        n_x, n_h, n_y = self.layer_sizes(X, Y, self.n_h)

        self.initialize_params(n_x, n_h, n_y)

        costs = []
        test_errors = []
        for i in range(self.n_iterations):
            # Forward propagation
            A2 = self.forward_propagation(X)
            cost = self.compute_cross_entropy_cost(A2, Y)
            costs.append(cost)
            # Backward propagation
            self.backward_propagation(X, Y)
            # Update parameters
            self.update_parameters()

            if (i % 100)==0:
                print(f'Iteration {i}, Cost: {cost}')

        fig, ax = plt.subplots(figsize=(6, 8))
        plt.plot(costs)
        plt.plot(test_errors)
        plt.ylabel('Cost')
        plt.xlabel('iterations')
        plt.show()
        
        
    def predict(self, X):
        _, _, _, A2 = self.forward(X, self.parameters)
        predictions = A2 > 0.5
        return predictions

In [None]:
# model = Neural_Network(7, 0.001, 500)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy Score : %f" % accuracy_score(y_test[0], predictions[0]))
print('Confusion Matrix:')
print(confusion_matrix(y_test[0], predictions[0]))


In [5]:
def sigmoid(Z):
    return 1. / (1 + np.exp(-Z))


In [6]:
def relu(Z):
    return np.maximum(0, Z)


In [7]:
def relu_backward(Z):
    result = np.ones(Z.shape)
    result[Z < 0] = 0
    return result


In [8]:
def layer_sizes(X, Y, hidden_neuron):
    input_neuron = X.shape[1]
    hidden_neuron = hidden_neuron
    output_neuron = Y.shape[0]
    
    return(input_neuron, hidden_neuron, output_neuron)



In [9]:
def initialize_params(in_n, hi_n, ou_n):
    np.random.seed(102)
    W1 = np.random.randn(hi_n, in_n) * np.sqrt(1/in_n)
    b1 = np.zeros((hi_n, 1))
    W2 = np.random.randn(ou_n, hi_n) * np.sqrt(1/hi_n)
    b2 = np.zeros((ou_n, 1))
    
    assert (W1.shape == (hi_n, in_n))
    assert (W2.shape == (ou_n, hi_n))
    assert (b1.shape == (hi_n, 1))
    assert (b2.shape == (ou_n, 1))
    
    parameters = {
        'W1': W1,
        'W2': W2,
        'b1': b1,
        'b2': b2
    }
    
    return parameters

In [10]:
def forward_propagation(X, parameters):
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']
    
    Z1 = np.dot(W1, X.T) + b1
    A1 = relu(Z1)
    Z2 = np.dot(W2, A1) +b2
    A2 = sigmoid(Z2)
    
    
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
    }
    return A2, cache


In [11]:
def compute_cross_entropy(A2, Y):
    m = Y.shape[1]
    loss = np.multiply(Y, np.log(A2)) + np.multiply(1-Y, np.log(1-A2))
    cost = -(1/m) * np.sum(loss)
    
    return cost

In [12]:
def backward_propagation(X, Y, parameters, cache):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']
    
    A1 = cache['A1']
    A2 = cache['A2']
    Z1 = cache['Z1']
    Z2 = cache['Z2']
    
    m = Y.shape[1]
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), relu_backward(Z1))
    dW1 = (1/m) * np.dot(dZ1, X)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {
        'dW1': dW1,
        'dW2': dW2,
        'db1': db1,
        'db2': db2
    }
    return grads

In [13]:
def update_parameters(parameters, grads, learning_rate):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    dW2 = grads['dW2']
    db1 = grads['db1']
    db2 = grads['db2']
    
    W1 = W1 - learning_rate * dW1
    W2 = W2 - learning_rate * dW2
    b1 = b1 - learning_rate * db1
    b2 = b2 - learning_rate * db2
    
    parameters = {
        'W1': W1,
        'W2': W2,
        'b1': b1,
        'b2': b2
    }
    return parameters

In [15]:
def nn_model(X, Y, hi_n, iterations, learning_rate, parameters=None):
    in_n, hi_n, ou_n = layer_sizes(X, Y, hi_n)
    
    if parameters is None:
        parameters = initialize_params(in_n, hi_n, ou_n)
        
    costs = []
    test_errors = []
    
    for i in range(iterations):
        
        #Forward propagation
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cross_entropy(A2, Y)
        costs.append(cost)
        
        # Backward propagation
        grads = backward_propagation(X, Y, parameters, cache)
        
        #update parameters
        parameters = update_parameters(parameters, grads, learning_rate)
        
        # Test error
        A2_test, _ = forward_propagation(X_test, parameters)
        test_errors.append(compute_cross_entropy(A2_test, y_test))
        
        
        if i % 100 == 0:
            print(f"Iteration {i}, Cost: {cost}")
    
    
    fig, ax = plt.subplots(figsize=(6, 8))
    
    plt.plot(costs)
    plt.plot(test_errors)
    plt.ylabel('Cost')
    plt.xlabel('iterations')
    plt.show()
    
    return parameters

In [16]:
def predict(X, parameters):
    A2, cache = forward_propagation(X, parameters)
    
    predictions = A2 > 0.5
    return predictions


In [None]:
# parameters = nn_model(X_train, y_train, hi_n=16, iterations=500, 
#                       learning_rate=0.0001)
# predictions = predict(X_test, parameters)

In [3]:
# training data independent variable (x)

training_set = np.array([[0, 1, 0], [0, 0, 1],
                         [1, 0, 0], [1, 1, 0],
                         [1, 1, 1], [0, 1, 1],
                         [0, 1, 0]])
# 3 featrues, 7 entries - objects

#training data dependent variables (y)
labels = np.array([[1, 0, 0, 1, 1, 0, 1]])

#reshape dependent variable

labels = labels.reshape(7, 1)


In [4]:
# Define Hyper parameters

np.random.seed(42)
weights = np.random.rand(3, 1)
bias = np.random.rand(1)
learning_rate = 0.05


In [5]:
weights

array([[0.37454012],
       [0.95071431],
       [0.73199394]])

In [6]:
print(bias)

[0.59865848]


In [7]:
# Activation Function
# methods

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1-sigmoid(x))


In [None]:
# Training model
for epoch in range(30000):
    inputs = training_set
    XW = np.dot(inputs, weights) * bias
    z = sigmoid(XW)
    error = z - labels
    print(error.sum())
    dcost = error
    dpred = sigmoid_derivative(z)
    z_del = dcost * dpred
    inputs = training_set.T
    weights = weigths - learning_rate * np.dot(inputs, z_del)
    for num in z_del:
        bias = bias = learning_rate * num
        
inputs = training_set

In [None]:
# MODel feed-forward nerural nerwork

# Feed Forward
XW = np.dot(inputs, weights * bias)
z = sigmoid(XW)

#error
error = z - labels
print(error.sum())

# Determining slope
slope = inputs * dcost * dpred

dcost = error
dpred = sigmoid_derivative(z)
z_del = dcost * dpred
inputs = training_set.T
weights = weights = learning_rate * np.dot(inputs, z_del)

for sum in z_del:
    bias = bias - learning_rate * num

In [None]:
# Predicting outcomes

single_pt = np.array([1, 0, 0])
result = sigmoid(np.dot(single_pt, weights) + bias)

print(result)

single_pt = np.array([0, 1, 0])

result = sigmoid(no.dot(single_pt, weights) + bias)
print(result)