<a href="https://colab.research.google.com/github/avikumart/LLM-GenAI-Transformers-Notebooks/blob/main/DeepLearningFiles/nn1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""nn.ipynb

Automatically generated by Colaboratory.
"""

import math
from typing import List

import numpy as np
from numpy import exp

class SimpleNetwork:
    """A simple feedforward network where all units have sigmoid activation.
    """

    @classmethod
    def random(cls, *layer_units: int):
        """Creates a feedforward neural network with the given number of units
        for each layer.

        :param layer_units: Number of units for each layer
        :return: the neural network
        """

        def uniform(n_in, n_out):
            epsilon = math.sqrt(6) / math.sqrt(n_in + n_out)
            return np.random.uniform(-epsilon, +epsilon, size=(n_in, n_out))

        pairs = zip(layer_units, layer_units[1:])
        return cls(*[uniform(i, o) for i, o in pairs])

    def __init__(self, *layer_weights: np.ndarray):
        """Creates a neural network from a list of weight matrices.
        The weights correspond to transformations from one layer to the next, so
        the number of layers is equal to one more than the number of weight
        matrices.

        :param layer_weights: A list of weight matrices
        """
        self.weights = list(layer_weights)
        self.num_layers = len(layer_weights) + 1

        for i in range(len(self.weights) - 1):
            curr_weight = self.weights[i]
            next_weight = self.weights[i + 1]
            if curr_weight.shape[1] != next_weight.shape[0]:
                raise ValueError(f"incompatable dimentions between {i} and {i+1}")

    def forward_propagation(self, input_matrix: np.ndarray) -> List[np.ndarray]:
        """Performs forward propagation over the neural network
              starting with
           the given input matrix and returns all intermediate
            activations"""
         ##YOUR CODE HERE##
        def sigmoid(x):
          return 1 / (1 + exp(-x))
        activations = [input_matrix]
        current_activation = input_matrix

        for i,w in enumerate(self.weights):
          output = np.dot(current_activation, w)
          current_activation = sigmoid(output)
          activations.append(current_activation)

        return activations

    def predict(self, input_matrix: np.ndarray) -> np.ndarray:
        """Performs forward propagation over the neural network starting with
        the given input matrix.

        Each unit's output should be calculated by taking a weighted sum of its
        inputs (using the appropriate weight matrix) and passing the result of
        that sum through a logistic sigmoid activation function.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :return: A matrix of predictions, where each row is the predicted
        outputs - each in the range (0, 1) - for the corresponding row in the
        input matrix.
        """
        ##YOUR CODE HERE##
        activations = self.forward_propagation(input_matrix)
        return activations[-1]

    def predict_zero_one(self, input_matrix: np.ndarray) -> np.ndarray:
        """Performs forward propagation over the neural network starting with
        the given input matrix, and converts the outputs to binary (0 or 1).

        Outputs will be converted to 0 if they are less than 0.5, and converted
        to 1 otherwise.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :return: A matrix of predictions, where each row is the predicted
        outputs - each either 0 or 1 - for the corresponding row in the input
        matrix.
        """
        ##YOUR CODE HERE##
        activations = self.forward_propagation(input_matrix)
        return np.where(activations[-1] < 0.5, 0, 1)

    def gradients(self,
                  input_matrix: np.ndarray,
                  output_matrix: np.ndarray) -> List[np.ndarray]:
        """Performs back-propagation to calculate the gradients for each of
        the weight matrices.

        This method first performs a pass of forward propagation through the
        network, then applies the following procedure to calculate the
        gradients. In the following description, × is matrix multiplication,
        ⊙ is element-wise product, and ⊤ is matrix transpose.

        First, calculate the error, error_L, between last layer's activations,
        h_L, and the output matrix, y:

        error_L = h_L - y

        Then, for each layer l in the network, starting with the layer before
        the output layer and working back to the first layer (the input matrix),
        calculate the gradient for the corresponding weight matrix as follows.
        First, calculate g_l as the element-wise product of the error for the
        next layer, error_{l+1}, and the sigmoid gradient of the next layer's
        weighted sum (before the activation function), a_{l+1}.

        g_l = (error_{l+1} ⊙ sigmoid'(a_{l+1}))⊤

        Then calculate the gradient matrix for layer l as the matrix
        multiplication of g_l and the layer's activations, h_l, divided by the
        number of input examples, N:

        grad_l = (g_l × h_l)⊤ / N

        Finally, calculate the error that should be backpropagated from layer l
        as the matrix multiplication of the weight matrix for layer l and g_l:

        error_l = (weights_l × g_l)⊤

        Once this procedure is complete for all layers, the grad_l matrices
        are the gradients that should be returned.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :param output_matrix: A matrix of expected outputs, where each row is
        the expected outputs - each either 0 or 1 - for the corresponding row in
        the input matrix.
        :return: the gradient matrix for each weight matrix
        """
        ##YOUR CODE HERE##
        activations = self.forward_propagation(input_matrix) # get all the activations
        h_l = self.predict(input_matrix)
        error_l = h_l - output_matrix # error of the output layer

        gradients = [None]*len(self.weights) # empty gradient matrix for each layer
        next_layer_error = error_l

        for l in range(len(self.weights) -1, -1, -1): # loop over the layer weights to calculate gradients
          sig_derivative = activations[l+1] * (1 - activations[l+1])
          g_l = np.multiply(next_layer_error, sig_derivative)
          grad_l = np.dot(activations[l].T,g_l)
          gradients[l] = grad_l / input_matrix.shape[0]
          if l > 0:
            next_layer_error = np.dot(g_l, self.weights[l].T)
        return gradients


    def train(self,
              input_matrix: np.ndarray,
              output_matrix: np.ndarray,
              iterations: int = 10,
              learning_rate: float = 0.1) -> None:
        """Trains the neural network on an input matrix and an expected output
        matrix.

        Training should repeatedly (`iterations` times) calculate the gradients,
        and update the model by subtracting the learning rate times the
        gradients from the model weight matrices.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :param output_matrix: A matrix of expected outputs, where each row is
        the expected outputs - each either 0 or 1 - for the corresponding row in
        the input matrix.
        :param iterations: The number of gradient descent steps to take.
        :param learning_rate: The size of gradient descent steps to take, a
        number that the gradients should be multiplied by before updating the
        model weights.
        """
        ##YOUR CODE HERE##
        for i in range(iterations):
          gradients = self.gradients(input_matrix, output_matrix)
          for l in range(len(self.weights)):
            self.weights[l] -= learning_rate * gradients[l]