# Gradient Descent and Backpropagation
### Understanding the math behind gradient descent and how to implement backprogagation in python
--------------------
**Author: Jay Mody**

**Required Knowledge:**
- Basic Python Skills
- Numpy
- Calculus (derivatives, gradients, chain rule)
- Linear Algebra (matrices, matrix multiplication)

In [None]:
#### Imports ####
import numpy as np

In [None]:
#### Neural Network Class ####
class NeuralNetwork:
    ##### Constructor ####
    def __init__(self, n_input_nodes, n_hidden_nodes, n_output_nodes, lr):
        ## Network ##
        self.n_input_nodes = n_input_nodes
        self.n_hidden_nodes = n_hidden_nodes
        self.n_output_nodes = n_output_nodes
        
        ## Weights ##
        # Weights are typical initialized using a some kind of distribution (random uniform, normal)
        # Here im going to use a normal distribution with a standard deviation of 1
        self.weights_i_h = np.random.normal(0.0, 1.0, (self.n_input_nodes, self.n_hidden_nodes))
        self.weights_h_o = np.random.normal(0.0, 1.0, (self.n_hidden_nodes, self.n_output_nodes))
        
        ## Learning Rate ##
        self.lr = lr
        
        ## Activation Functions ##
        # Relu Activation
        self.relu = lamda x: x if x > 0 else 0
        self.d_relu = lamda x: 1 if x > 0 else 0
            
        # Sigmoid Activation
        self.sigmoid = lamda x: 1 / (1 + np.exp(-x))
        self.d_sigmoid = lamda x: self.sigmoid(x) * (1 - self.sigmoid(x))
        
    def feed_forward(self, X):
        # Hidden Layer
        hidden_input = np.dot(X, self.weights_i_h)
        hidden_outputs = self.relu(hidden_inputs)
        
        # Output Layer
        y_hat_raw = np.dot(hidden_outputs, self.weights_h_o)
        y_hat = self.sigmoid(y_hat_raw)
    
        return hidden_outputs, y_hat