## Project 3: Digit Recognition

Good programmers can use neural nets. Great programmers can make them. This section will guide you through the implementation of a simple neural net with an architecture as shown in the figure below. You will implement the net from scratch (you will probably never do this again, don't worry) so that you later feel confident about using libraries. We provide some skeleton code in neural_nets.py for you to fill in.

![neural_net](../Media/images_neuralnet.png)

In [2]:
import numpy as np
import matplotlib.pyplot as plt

### 3. Activation Functions

#### Rectified Linear Unit

In [3]:
def rectified_linear_unit(x):
    """ Returns the ReLU of x, or the maximum between 0 and x."""
    return np.maximum(x, 0)

#### Taking the Derivative

In [62]:
def rectified_linear_unit_derivative(x):
    """ Returns the derivative of ReLU."""

    x[x > 0] = 1
    x[x <= 0] = 0

    # ReLu returns 1 for all positive values and 0 for all negative values
    # (Returns 0 when the value is equal to 0 as well)
    return x

------
### 4. Training the Network

In [92]:
def output_layer_activation(x):
    """ Linear function, returns input as is. """
    return x

def output_layer_activation_derivative(x):
    """ Returns the derivative of a linear function: 1. """
    return 1


class NeuralNetwork():
    """
        Contains the following functions:
            -train: tunes parameters of the neural network based on error obtained from forward propagation.
            -predict: predicts the label of a feature vector based on the class's parameters.
            -train_neural_network: trains a neural network over all the data points for the specified number of epochs during initialization of the class.
            -test_neural_network: uses the parameters specified at the time in order to test that the neural network classifies the points given in testing_points within a margin of error.
    """

    def __init__(self):

        # DO NOT CHANGE PARAMETERS (Initialized to floats instead of ints)
        self.input_to_hidden_weights = np.matrix('1. 1.; 1. 1.; 1. 1.')  # (3,2)
        self.hidden_to_output_weights = np.matrix('1. 1. 1.')
        self.biases = np.matrix('0.; 0.; 0.')
        self.learning_rate = .001
        self.epochs_to_train = 10
        self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
        self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]



    def train(self, x1 : float, x2: float, y):

        ### Forward propagation ###
        input_values = np.matrix([[x1],[x2]]) # 2 by 1

        # Calculate the input and activation of the hidden layer
        hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values)        # (3,2) * (2x1) = (3,1)
        hidden_layer_activation = rectified_linear_unit(hidden_layer_weighted_input)            # (3,1)

        output = np.dot(self.hidden_to_output_weights, hidden_layer_activation) + self.biases  # (1,3) * (3,1) = (1,1)
        activated_output = output_layer_activation(output)

        ### Backpropagation ###

        # Compute gradients
        output_layer_error = (y - activated_output)                                                 # Derivative of cost function                                                        # Scalar (Also called C)
        hidden_layer_error = output_layer_activation_derivative(output) *  output_layer_error       # (3 by 1 matrix)

        bias_gradients = hidden_layer_error * 1                                                     # Derivative of Z with respect of the bias is 1 (Z = W*a + b) 
        hidden_to_output_weight_gradients = np.dot(hidden_layer_error, hidden_layer_activation.T)   # Derivative of Z with respect of the weights is the weighted input of the layer (Z' = a)
        
        dy_du = hidden_layer_error
        du_da = self.hidden_to_output_weights
        da_dz = rectified_linear_unit_derivative(hidden_layer_weighted_input)
        dz_dw = input_values

        # Going step by step backwards
        print("U Level:", du_da.T.shape, "*", dy_du.shape)
        u_level = du_da @ dy_du
        print("=", u_level.shape)
        print("A Level:", da_dz.shape, "*", u_level.shape)
        a_level = da_dz @ u_level
        print("=", a_level.shape)
        print("Z Level:", dz_dw.shape, "*", a_level.shape)
        z_level = a_level @ dz_dw.T
        print("=", z_level.shape)
        input_to_hidden_weight_gradients = z_level.copy()
        print("======================")
        
        # Use gradients to adjust weights and biases using gradient descent
        self.biases = self.biases - self.learning_rate * bias_gradients
        self.input_to_hidden_weights = self.input_to_hidden_weights - self.learning_rate * input_to_hidden_weight_gradients
        self.hidden_to_output_weights = self.hidden_to_output_weights - self.learning_rate * hidden_to_output_weight_gradients
    



    def predict(self, x1, x2):

        input_values = np.matrix([[x1],[x2]])

        # Compute output for a single input(should be same as the forward propagation in training)
        hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values)
        hidden_layer_activation = rectified_linear_unit(hidden_layer_weighted_input)
        output = np.dot(self.hidden_to_output_weights, hidden_layer_activation) + self.biases

        activated_output = output_layer_activation(output)

        return activated_output.item()

    # Run this to train your neural network once you complete the train method
    def train_neural_network(self):

        for epoch in range(self.epochs_to_train):
            for x,y in self.training_points:
                self.train(x[0], x[1], y)

    # Run this to test your neural network implementation for correctness after it is trained
    def test_neural_network(self):

        for point in self.testing_points:
            print("Point,", point, "Prediction,", self.predict(point[0], point[1]))
            if abs(self.predict(point[0], point[1]) - 7*point[0]) < 0.1:
                print("Test Passed")
            else:
                print("Point ", point[0], point[1], " failed to be predicted correctly.")
                return

In [93]:
x = NeuralNetwork()
x.train_neural_network()
# x.test_neural_network()

Train: [[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]]
Train: [[18.74182256 18.74182256 18.74182256]
 [18.74182256 18.74182256 18.74182256]
 [18.74182256 18.74182256 18.74182256]]
Train: [[56.22961604 56.22961604 56.22961604]
 [56.22961604 56.22961604 56.22961604]
 [56.22961604 56.22961604 56.22961604]]
Train: [[134.98720902 134.98720902 134.98720902]
 [134.98720902 134.98720902 134.98720902]
 [134.98720902 134.98720902 134.98720902]]
Train: [[10.69980348 10.69980348 10.69980348]
 [10.69980348 10.69980348 10.69980348]
 [10.69980348 10.69980348 10.69980348]]
Train: [[46.8125754 46.8125754 46.8125754]
 [46.8125754 46.8125754 46.8125754]
 [46.8125754 46.8125754 46.8125754]]
Train: [[106.91963496 106.91963496 106.91963496]
 [106.91963496 106.91963496 106.91963496]
 [106.91963496 106.91963496 106.91963496]]
Train: [[147.84090212 147.84090212 147.84090212]
 [147.84090212 147.84090212 147.84090212]
 [147.84090212 147.84090212 147.84090212]]
Train: [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Train: [[0. 0. 0.]
