# Neural Network

<img src='n11.jpeg' />

In [None]:
# flow of neural network

- input data into neural network
- this data flows from layer to layer to get the output
- we calculate the error(scalar)
- adjust parameters(weights)

interate the process again and again

### Forward Propagation

<img src='n1.png' />

In [None]:
input -> layer -> output

- the goal is to minimize the error by changing the paramters in the network.

### Gradient Descent

<img src='n2.png' />

In [1]:
#alpha -> learning rate(between -> [0,1])

0.12 * 2

0.24

### Backword Propagation

<img src='n3.png' />

In [None]:
- E is a scalar
- X and Y are matirces

<img src='n4.png' />

In [None]:
int a = 10 # 2 bytes   c-language

a = 10   # object of class int

## Layer Class

In [2]:
# base class

class Layer:
    
    def __init__(self):
        self.input = None
        self.output = None
        
    # forward pass - computes the output Y of a layer from a given intput X
    def forward_propagation(self, input):
        raise NotImplementedError
    
    # backward pass - dE/dX for a given dE/dY (updating the paramters)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError
    

### Fully Connected Layer

<img src='n5.png' />

### Foward Propagation

<img src='n6.png' />

In [None]:
the above equation is the value of each neuron

<img src='n7.png' />

In [None]:
- derivative of error with respect to the parameters (dE/dW, dE/dB)
- derivative of the error with respect to the input (dE/dX)

### Backward Propagation

<img src='n8.png' />

In [9]:
import numpy as np

class FClayer(Layer):
    
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
        
    
    # returns the output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        
        return self.output
    
    # computes dE/dW, dE/dB for a given output_error = dE/dY, return input_error = dE/dX
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error) # output_error = dBias
        
        # update the parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        
        return input_error

### Activation Layer

<img src='n9.png' />

In [3]:
# add non-linearity to the model -> applying non-linear functions

class ActivationLayer(Layer):
    
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    
    # return the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        
        return self.output
    
    # return input_error= dE/dX for a given output_error = dE/dY
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [4]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x) ** 2

### Loss Function

<img src='n10.png' />

Mean Squared Error

In [11]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_true - y_pred)/y_true.size

### Netowrk Class

In [5]:
class Network:
    
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        
    # add the layers to our network
    def add(self, layer):
        self.layers.append(layer)
    
    # useLoss for loss
    def useLoss(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime
    
    
    # predict the output of a given input
    def predict(self, input_data):
        samples = len(input_data)
        results = []
        
        # run the network
        for i in range(samples):
            # forward propagation
            
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            
            results.append(output)
        
        return results
    
    # train our network
    
    def fit(self, xtrain, ytrain, epochs, learning_rate):
        samples = len(xtrain)
        
        for i in range(epochs):
            err = 0
            
            for j in range(samples):
                
                # forward propagation
                output = xtrain[j]
                
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                    
                # compute the loss
                err += self.loss(ytrain[j], output)
                
                
                # backward propagation
                
                error = self.loss_prime(ytrain[j], output)
                
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)
                
            err /= samples
            print('epoch %d/%d error=%f' % (i+1, epochs, err))

## Use XOR example to train the NN

In [None]:
0 xor 0 - 0
0 xor 1 - 1
1 xor 0 - 1
1 xor 1 - 0

In [7]:
import numpy as np

xtrain = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
ytrain = np.array([[[0]], [[1]], [[1]], [[0]]])

In [12]:
# create the network

net = Network()

net.add(FClayer(2,3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FClayer(3,1))
net.add(ActivationLayer(tanh, tanh_prime))

net.useLoss(mse, mse_prime)

net.fit(xtrain, ytrain, epochs=1000, learning_rate=0.1)

epoch 1/1000 error=0.315613
epoch 2/1000 error=1.108969
epoch 3/1000 error=2.105924
epoch 4/1000 error=2.326890
epoch 5/1000 error=2.393401
epoch 6/1000 error=2.424196
epoch 7/1000 error=2.441679
epoch 8/1000 error=2.452853
epoch 9/1000 error=2.460570
epoch 10/1000 error=2.466199
epoch 11/1000 error=2.470475
epoch 12/1000 error=2.473827
epoch 13/1000 error=2.476521
epoch 14/1000 error=2.478731
epoch 15/1000 error=2.480575
epoch 16/1000 error=2.482136
epoch 17/1000 error=2.483473
epoch 18/1000 error=2.484630
epoch 19/1000 error=2.485642
epoch 20/1000 error=2.486532
epoch 21/1000 error=2.487323
epoch 22/1000 error=2.488029
epoch 23/1000 error=2.488663
epoch 24/1000 error=2.489235
epoch 25/1000 error=2.489754
epoch 26/1000 error=2.490227
epoch 27/1000 error=2.490660
epoch 28/1000 error=2.491057
epoch 29/1000 error=2.491423
epoch 30/1000 error=2.491761
epoch 31/1000 error=2.492074
epoch 32/1000 error=2.492365
epoch 33/1000 error=2.492636
epoch 34/1000 error=2.492889
epoch 35/1000 error=2.4

epoch 390/1000 error=2.499509
epoch 391/1000 error=2.499511
epoch 392/1000 error=2.499512
epoch 393/1000 error=2.499513
epoch 394/1000 error=2.499514
epoch 395/1000 error=2.499516
epoch 396/1000 error=2.499517
epoch 397/1000 error=2.499518
epoch 398/1000 error=2.499520
epoch 399/1000 error=2.499521
epoch 400/1000 error=2.499522
epoch 401/1000 error=2.499523
epoch 402/1000 error=2.499525
epoch 403/1000 error=2.499526
epoch 404/1000 error=2.499527
epoch 405/1000 error=2.499528
epoch 406/1000 error=2.499530
epoch 407/1000 error=2.499531
epoch 408/1000 error=2.499532
epoch 409/1000 error=2.499533
epoch 410/1000 error=2.499534
epoch 411/1000 error=2.499536
epoch 412/1000 error=2.499537
epoch 413/1000 error=2.499538
epoch 414/1000 error=2.499539
epoch 415/1000 error=2.499540
epoch 416/1000 error=2.499541
epoch 417/1000 error=2.499543
epoch 418/1000 error=2.499544
epoch 419/1000 error=2.499545
epoch 420/1000 error=2.499546
epoch 421/1000 error=2.499547
epoch 422/1000 error=2.499548
epoch 423/

epoch 723/1000 error=2.499743
epoch 724/1000 error=2.499743
epoch 725/1000 error=2.499744
epoch 726/1000 error=2.499744
epoch 727/1000 error=2.499745
epoch 728/1000 error=2.499745
epoch 729/1000 error=2.499745
epoch 730/1000 error=2.499746
epoch 731/1000 error=2.499746
epoch 732/1000 error=2.499746
epoch 733/1000 error=2.499747
epoch 734/1000 error=2.499747
epoch 735/1000 error=2.499747
epoch 736/1000 error=2.499748
epoch 737/1000 error=2.499748
epoch 738/1000 error=2.499749
epoch 739/1000 error=2.499749
epoch 740/1000 error=2.499749
epoch 741/1000 error=2.499750
epoch 742/1000 error=2.499750
epoch 743/1000 error=2.499750
epoch 744/1000 error=2.499751
epoch 745/1000 error=2.499751
epoch 746/1000 error=2.499751
epoch 747/1000 error=2.499752
epoch 748/1000 error=2.499752
epoch 749/1000 error=2.499752
epoch 750/1000 error=2.499753
epoch 751/1000 error=2.499753
epoch 752/1000 error=2.499753
epoch 753/1000 error=2.499754
epoch 754/1000 error=2.499754
epoch 755/1000 error=2.499754
epoch 756/

In [15]:
net.predict(xtrain)

[array([[-0.99987665]]),
 array([[-0.99996048]]),
 array([[-0.99993275]]),
 array([[-0.9999706]])]

# Assignment

1. Use this NN on MNIST Dataset first

2. Pick a dataset from kaggle and train this NN