# Task-1:
Choose any one deep learning algorithm of your choice and try to
implement it from scratch.
Important points:
<ul>
● You can use modules like JAX, Numpy, etc. for your
implementations.<br>
● You should be able to explain the mathematical concept
behind your implementation.<br>
 </ul>
Judging Criteria:<br>
● Structure of your code.<br>
● Math and code implementation.<br>
<br>
Take the given Implementation as a reference: <a href="https://github.com/Math-behind-AI/ScratchAI/tree/main/traditional_ML_algorithms">Link </a><br>

Note: For your reference, the link attached herewith shows implementations
of a few ML algorithms. However, the task is to implement DL algorithms
from scratch.<br>

A few of the DL algorithms you can implement but are not limited
to are Multi-layer perceptron, Convolutional Neural Nets,
Recurrent Neural Nets, etc.

## What is Multi-Layer Perceptron (MLP)

## Working of MLP

The Basic steps are as follows:<br>
<ol>
    <li>Initialize the weights and bias with small-randomized values</li>
    <li>Propagate all values in the input layer until output layer(Forward Propagation)</li>
    <li>Update weight and bias in the inner layers(Backpropagation)</li>
    <li>Do it until that the stop criterion is satisfied !</li>
</ol>

In [26]:
import numpy as np
import pandas as pd
import math
import random
from sklearn.datasets import load_digits

In [127]:
class MultiLayerPerceptron:
     
    def __init__(self,hidden_layer, epoch, learning_rate, verbose=False):
        self.hidden_layer = hidden_layer
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.verbose = verbose
        
    # Initializing the weights    
    def initial_weights(self, X, y):
        n_sample, n_features = X.shape
        n_output = y.shape[1]
        
        limit_hidden = 1/math.sqrt(n_features)
        self.hiddenWeight = np.random.uniform(-limit_hidden,limit_hidden, (n_features, self.hidden_layer))        
        self.BiasHidden = np.zeros((1,self.hidden_layer))
        
        limit_out = 1/ math.sqrt(self.hidden_layer)
        self.outputWeight = np.random.uniform(-limit_out,limit_out, (self.hidden_layer, n_output))
        self.BiasOutput = np.zeros((1, n_output))
     
    #Sigmoid Function
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    #Sigmoid Derivative Function
    def sigmoid_derivative(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))
     
    #SoftMax Function (Output Layer)    
    def softmax(self, z):
        e_x = np.exp(z - np.max(z, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)
    
    #SoftMax Gradient Function
    def softmax_gradient(self, z):
        return self.softmax(z) * (1 - self.softmax(z))
    
    #Cross-Entropy Loss Function
    def loss(self, h, y):
        h = np.clip(h, 1e-15, 1 - 1e-15)
        return (-y * np.log(h) - (1 - y) * np.log(1 - h))
    
    #Cross-Entropy Loss Gradient Function
    def loss_gradient(self, h, y):
        h = np.clip(h, 1e-15, 1 - 1e-15)
        return -(h/y) + (1-h)/(1-y)
    
    #Accuracy Score Function
    def accuracy_score(self, y_true, y_pred):
        accuracy = np.sum(y_true == y_pred, axis=0) / len(y_true)
        return accuracy
    
    #Prediction Function
    def predict(self, X):
        hidden_input = X.dot(self.hiddenWeight) + self.BiasHidden
        hidden_output = self.sigmoid(hidden_input)
        output_layer_input = hidden_output.dot(self.outputWeight) + self.BiasOutput
        y_pred = self.softmax(output_layer_input)
        return y_pred
    
    #Fit Function
    def fit(self, X, y):
        self.initial_weights(X, y)
        n_epoch = 1
        
        while(n_epoch <= self.epoch):
            
            # Forward Propogation
            #hidden Layer
            hidden_input = X.dot(self.hiddenWeight) + self.BiasHidden
            hidden_output = self.sigmoid(hidden_input)
            #output layer
            output_layer_input = hidden_output.dot(self.outputWeight) + self.BiasOutput
            y_pred = self.softmax(output_layer_input)
            
            #Backward Propogation
            #Output Layer Gradient
            grad_out_input = self.loss_gradient(y, y_pred) * self.softmax_gradient(output_layer_input)
            grad_output = hidden_output.T.dot(grad_out_input)
            grad_biasoutput = np.sum(grad_out_input,axis=0,keepdims=True)
            #Hidden Layer Gradient
            grad_input_out = grad_out_input.dot(self.outputWeight.T) * self.sigmoid_derivative(hidden_input)
            grad_input = X.T.dot(grad_input_out)
            grad_biasinput = np.sum(grad_input_out, axis=0, keepdims=True)
            
            #Updating Weights
            self.outputWeight -= self.learning_rate * grad_output
            self.BiasOutput -= self.learning_rate *grad_biasoutput
            self.hiddenWeight -= self.learning_rate * grad_input
            self.BiasHidden -= self.learning_rate * grad_biasinput
                        
            
            n_epoch += 1
            
       

In [128]:
def main():
    data = load_digits()
    X = data.data
    y = data.target
    
    #Normalize X
    X1 = X.copy()
    X1 = (X1 - X1.mean())/X1.std()
    
    #Categorize Y
    y1 = np.zeros((y.shape[0], (np.amax(y)+1)))
    y1[np.arange(y.shape[0]), y] = 1
    
    #Train-Test Split
    split_i = len(y1) - int(len(y1) // (1 / 0.2))
    X_train, X_test = X1[:split_i], X1[split_i:]
    y_train, y_test = y1[:split_i], y1[split_i:]
    
    
    clf = MultiLayerPerceptron(hidden_layer = 10, epoch=1000, learning_rate=0.01, verbose=True)

    clf.fit(X_train, y_train)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = clf.accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

In [129]:
if __name__ == "__main__":
    main()

ValueError: operands could not be broadcast together with shapes (359,) (64,10) 