In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Some Helper Functions

In [2]:
'''a simple one_hot_encoder which encode the numbers from 0 to 9 in vector
for example: if y = 3  then one hot vector is [0,0,0,1,0,0,0,0,0,0]'''

def one_hot_encode(y, num_classes=10):
    m = y.shape[0]
    one_hot = np.zeros((m, num_classes))
    one_hot[np.arange(m), y] = 1
    return one_hot

In [3]:
'''ReLU (Rectified Linear Unit) its a simple to introduce non-linearity means it just convert y = x if x>0 and y = 0 if x < 0 '''

def relu(Z):
    return np.maximum(0,Z)   #if Z<0 then return 0 otherwise return Z 

In [11]:


def softmax(Z):
    max_val = np.max(Z, axis=0, keepdims=True)
    Z_stable = Z - max_val
    exp_vals = np.exp(Z_stable)
    total = np.sum(exp_vals, axis=0, keepdims=True)
    return exp_vals / total


In [None]:
"""here i am trying to explain the softmax fucntion and its use: 

Suppose for one sample (a single column in Z2) the output is:

Z2 = [2.3,0.5,-1.2,0.0,1.8,-0.7,-0.2,0.3,-1.0,0.9] ---->logits

The logit for class 0 is 2.3, for class 1 is 0.5, for class 2 is -1.2, and so on.

These numbers indicate how strongly the network "favors" each class before normalization

after softmax function .....

This is how the softmax function transforms the raw logits into a meaningful output probability vector 

A2 = [0.40766853, 0.06738715, 0.01231052, 0.04087238, 0.24726346,0.02029662, 0.03346347, 0.05517194, 0.01503611, 0.10052982]

A2[0]≈0.4076: The network estimates a 40.76% probability for class 0.

A2[4]≈0.2473: About 24.73% for class 4.

"""

'here i am trying to explain the softmax fucntion and its use'

In [12]:
Z2 = [2.3,0.5,-1.2,0.0,1.8,-0.7,-0.2,0.3,-1.0,0.9]
A2 = softmax(Z2)
A2

array([0.40766853, 0.06738715, 0.01231052, 0.04087238, 0.24726346,
       0.02029662, 0.03346347, 0.05517194, 0.01503611, 0.10052982])

In [4]:
def cross_entropy_loss(Y_pred, Y_true):
    pass

## Neural Network Class: Neural_Network

### This class encapsulates the network architecture, forward pass, backward propagation, parameter updates, training, and prediction.

In [None]:
"""
input_size = no of input neurons 784 (28*28)

output_size = no of class (10)  from 0 to 9

hidden_size = number of neurons in the hidden layer

learning_rate =  Step size for gradient descent updates


"""

input_size = 784      
hidden_size = 64      
output_size = 10      
learning_rate = 0.1


In [None]:
class Neural_Network:
    '''np.random.randn(hidden_size, input_size)  because for each hidden neuron we need x weights and here x no of input so fo y neuron we need xy weights

    so here using np i create a 2d matrix of x row and y cols 

    the scalling factor is selected only becuase its perfect for relu and you can change it ..... 
    
    
    '''
    
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        self.learning_rate = learning_rate
        
        #from input layer to hidden layer
        self.W1 = np.random.randn(hidden_size, input_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((hidden_size, 1))
        
        #from hidden layer to ouput layer
        self.W2 = np.random.randn(output_size, hidden_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((output_size, 1))

    
    def forward(self,X):
        #simple forward pass  Z1 = W1*X +B1
        Z1 = np.dot(self.W1,X)+self.b1
        A1 = relu(Z1)


        #simple forward pass  Z2 = W1*A1 +B2
        Z2 = np.dot(self.W2,A1)+self.b2
        
        
        A2 = softmax(Z2)

        #cache: Save intermediate values needed for backpropagation.
        cache = (X, Z1, A1, Z2, A2)
        
        return A2, cache


        



