In [10]:
import numpy as np



# activation function
def sigmoid(x):
    return 1/(1+np.exp(-x))
# derivative of activation function
def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

class NeuralNetwork:
    def __init__(self):
        self.parameters = None
        self.no_of_hidden_layers = 0
        self.epochs=0
        self.learning_rate = 0.1
    
    # ask the user for number of Hidden layers and neurons for each layer
    def get_neurons_per_layer(self):
        neurons_per_layer = []
        no_of_layers = 0
        while no_of_layers <= 0:
            no_of_layers = int(input("Enter number of hidden layers: "))
        
        # input layer
        neurons_per_layer.append(2)
        
        # for loop for hidden layers
        for i in range(no_of_layers):
            no_of_neurons = 0
            while no_of_neurons <= 0:
                no_of_neurons = int(input(f"Enter number of neurons for layer {i + 1}: "))
            neurons_per_layer.append(no_of_neurons)
        
        # output layer
        neurons_per_layer.append(1)
        
        return neurons_per_layer
    
    def initialize_parameters(self, neurons_per_layer, low=-10, high=10):
        parameters = {}
        no_of_layers = len(neurons_per_layer)
    
        for i in range(1, no_of_layers):
            neurons_in_current_layer = neurons_per_layer[i]
            neurons_in_previous_layer = neurons_per_layer[i - 1]
            parameters['W' + str(i)] = np.random.uniform(low, high, (neurons_in_current_layer, neurons_in_previous_layer))
            parameters['b' + str(i)] = np.ones((neurons_in_current_layer, 1))
        
    
        return parameters
    

    def initialize(self):
        neurons_per_layer = self.get_neurons_per_layer()
        self.parameters = self.initialize_parameters(neurons_per_layer)
        self.no_of_hidden_layers = len(self.parameters) // 2
        while True:
            self.epochs = input("Enter number of epochs: ")
            if self.epochs.isdigit() and int(self.epochs) > 0:
                self.epochs = int(self.epochs)
                break
            else:
                print("Please enter a valid number.")


    def forward(self,X):
        cache = {'A0':X}
        A=X
        # A is the activation of the neuron
        # Z is the sum output of the neuron
        # cache stores the activation and sum output of the neuron

        for i in range(1,self.no_of_hidden_layers+1):
            W=self.parameters['W'+str(i)]
            b=self.parameters['b'+str(i)]
            Z = np.dot(W,A)+b
            A = sigmoid(Z)
            cache['A'+str(i)]=A
            cache['Z'+str(i)]=Z
        
        return A,cache
    
    def backward(self, X, Y, cache):
        
        print(f"=============Gradients===================")
        for i in range(self.no_of_hidden_layers,0,-1):
            # error calculation
            if i==self.no_of_hidden_layers:
                # for output layer
                # Output - Y
                dZ = cache['A'+str(i)]-Y
            else:
                # for hidden layers
                # weights*error*sigmoid derivative
                dZ = np.dot(self.parameters['W'+str(i+1)].T,dZ)*sigmoid_derivative(cache['Z'+str(i)])
                          

            #activation
            A_prev = cache['A'+str(i-1)]

            # we take average because we take all four possibilities at once
            # activation*error
            dW = np.dot(dZ,A_prev.T)/X.shape[1]
            # axis=1 will sum the rows
            # keepdims=True will keep the dimensions of the array same
            
            db = np.sum(dZ,axis=1,keepdims=True)/X.shape[1]

            print(f"dW{i}:\n{dW}")
            print(f"db{i}:\n{db}")
            
            self.parameters['W'+str(i)] -= self.learning_rate*dW
            self.parameters['b'+str(i)] -= self.learning_rate*db
        print(f"================================")

    def display_weights(self):
        for i in range(1, self.no_of_hidden_layers + 1):
            print(f"Weights for  W{i}:")
            print(self.parameters['W' + str(i)])
            print(f"Bias for  b{i}:")
            print(self.parameters['b' + str(i)])
    
    def train(self, X, Y):
        i=0
        while i<self.epochs:
            print(f"============================Epoch {i}================================")
            self.display_weights()
            A,cache = self.forward(X)
            self.backward(X,Y,cache)
            print(f"Output: {A}")
            if np.mean(np.abs(A-Y))<0.01:
                break
            i+=1
        

        


nn = NeuralNetwork()
nn.initialize()
X = np.array([[0,0,1,1],[0,1,0,1]])
Y = np.array([[0,1,1,0]])
nn.train(X, Y)

 





        


Weights for  W1:
[[ 3.41275739 -5.79234878]
 [-7.42147405 -3.69143298]
 [-2.72578458  1.40393541]]
Bias for  b1:
[[1.]
 [1.]
 [1.]]
Weights for  W2:
[[-1.22796973  9.76747676 -7.95910379]]
Bias for  b2:
[[1.]]
dW2:
[[-0.04938799  0.13116905 -0.10418432]]
db2:
[[-0.23078217]]
dW1:
[[-0.00052159 -0.00093839]
 [-0.00316422 -0.14448111]
 [ 0.1707604   0.11677584]]
db1:
[[-0.04648987]
 [ 0.23879546]
 [ 0.00637613]]
Output: [[0.80601301 0.00336968 0.19777735 0.06971126]]
Weights for  W1:
[[ 3.41280955 -5.79225494]
 [-7.42115763 -3.67698487]
 [-2.74286062  1.39225782]]
Bias for  b1:
[[1.00464899]
 [0.97612045]
 [0.99936239]]
Weights for  W2:
[[-1.22303093  9.75435986 -7.94868535]]
Bias for  b2:
[[1.02307822]]
dW2:
[[-0.04767362  0.12975627 -0.10337024]]
db2:
[[-0.22828848]]
dW1:
[[-0.00084784 -0.00121134]
 [-0.00305682 -0.14308782]
 [ 0.16355598  0.11559459]]
db1:
[[-0.04632012]
 [ 0.24238572]
 [ 0.00214372]]
Output: [[0.80263671 0.00348521 0.20534481 0.07537937]]
Weights for  W1:
[[ 3.412894

<h1>Vanishing Gradient Problem</h1>
This occurs when we use sigmoid across multiple layers. This results in the value of sigmoid derivative very close to zero and hence converges to points called Saturation zones