base class for Gates 

In [609]:
import numpy as np
# Define the base class for Gates 
class Gate: 
    def forward(self): 
        raise NotImplementedError 
    def backward(self): 
        raise NotImplementedError  

AddGate

In [610]:
# Example of an AddGate class inheriting from the Gate class 
class AddGate(Gate): 
    def forward(self, x, y): 
        self.x = x 
        self.y = y 
        return x + y 
    def backward(self, dz): 
        dx = dz * np.ones_like(self.x) 
        dy = dz * np.ones_like(self.y) 
        return dx, dy 

 MultiplyGate

In [611]:
# Example of a MultiplyGate class inheriting from the Gate class
class MultiplyGate(Gate):
    def forward(self, x, y):
        self.x = x
        self.y = y
        return np.matmul(x,y)

    def backward(self, dz,ax):
        dx = np.matmul(dz,ax)
        dy = dz * self.x.T
        return dx, dy

Linear activation function

In [612]:

# Example of a Linear activation function
class LinearActivation(Gate):
    def forward(self, x):
        self.x = x
        return x

    def backward(self, dz):
        dx = dz
        return dx

ReLU activation function

In [613]:
# Example of a ReLU activation function
class ReLUActivation(Gate):
    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, dz):
        dx = dz * np.where(self.x > 0, 1, 0)
        return dx

Sigmoid activation function

In [614]:
# import numpy as np

# class SigmoidActivation:
#     def __init__(self):
#         self.x = None

#     def forward(self, x):
#         self.x = x
#         return 1 / (1 + np.exp(-x))

#     def backward(self, dz):
#         sigmoid_x = 1 / (1 + np.exp(-self.x))
#         dx = dz * sigmoid_x * (1 - sigmoid_x)
#         return dx
class SigmoidActivation:
    def __init__(self):
        self.sigmoid_x = None

    def forward(self, x):
        self.sigmoid_x = 1 / (1 + np.exp(-x))
        return self.sigmoid_x

    def backward(self, dz):
        if self.sigmoid_x is None:
            raise ValueError("Forward method must be called before backward method.")
        dx = dz * self.sigmoid_x * (1 - self.sigmoid_x)
        return dx


Softmax activation function

In [615]:
# Example of a Softmax activation function
class SoftmaxActivation(Gate):
    def forward(self, x):
        self.x = x
        exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

    def backward(self, dz):
        softmax_x = self.forward(self.x)
        dx = dz * softmax_x * (1 - softmax_x)
        return dx

Tanh activation function

In [616]:
# Example of a Tanh activation function
class TanhActivation(Gate):
    def forward(self, x):
        self.x = x
        return np.tanh(x)

    def backward(self, dz):
        tanh_x = np.tanh(self.x)
        dx = dz * (1 - tanh_x ** 2)
        return dx

 Binary Cross-Entropy (BCE) loss function

In [617]:
# Example of Binary Cross-Entropy (BCE) loss function
class BinaryCrossEntropyLoss(Gate):
    def forward(self, y_pred, y_true):
        self.y_pred = y_pred
        self.y_true = y_true
        return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

    def backward(self):
        dx = (self.y_pred - self.y_true) / (self.y_pred * (1 - self.y_pred))
        return dx

L2 loss function

In [618]:
# Example of L2 loss function
class L2Loss(Gate):
    def forward(self, y_pred, y_true):
        self.y_pred = y_pred
        self.y_true = y_true
        return 0.5 * np.mean((y_pred - y_true) ** 2)

    def backward(self):
        dx = self.y_pred - self.y_true
        return dx

Implementing Computational Graph / Model 

In [619]:
class Model:
    def __init__(self, layers_dim, activation_func, loss):
        self.layers_dim = layers_dim
        self.activation_func = activation_func
        self.loss = loss
        self.parameters = {}
        self.activations = {}
        self.gradients = {}
        self.add = AddGate()
        self.multiply = MultiplyGate()

        # Initialize weights and biases using Xavier initialization
        for i in range(1, len(layers_dim)):
            prev_dim = layers_dim[i - 1]
            curr_dim = layers_dim[i]
            self.parameters["W" + str(i)] = np.random.randn(curr_dim, prev_dim) * np.sqrt(1 / prev_dim)
            self.parameters["b" + str(i)] = np.zeros((curr_dim, 1))  # Initialize biases as column vectors

    def predict(self, X):
        self.activations["A0"] = X

        for i in range(1, len(self.layers_dim)):
            prev_a = self.activations["A" + str(i - 1)]
            W = self.parameters["W" + str(i)]
            b = self.parameters["b" + str(i)]

            activation_func = self.activation_func()  # Create an instance of the activation function
            print(f"w:{W}")
            print(f"a:{prev_a}")
            Z = self.add.forward(self.multiply.forward(W, prev_a) , b)
            A = activation_func.forward(Z)

            self.activations["A" + str(i)] = A
            self.activations["Z" + str(i)] = Z

        return self.activations["A" + str(len(self.layers_dim) - 1)]

    def train(self, X, y, num_epochs, learning_rate):
        for epoch in range(num_epochs):
            # Forward propagation
            A = self.predict(X)

            # Compute loss
            loss = self.loss.forward(A, y)
            mse = np.mean((A - y) ** 2)

            # Backward propagation
            dA = self.loss.backward()
            self.gradients["dA" + str(len(self.layers_dim) - 1)] = dA

            for i in reversed(range(1, len(self.layers_dim))):
                activation_func = self.activation_func()  # Create an instance of the activation function
                A_prev = self.activations["A" + str(i - 1)]
                W = self.parameters["W" + str(i)]
                b = self.parameters["b" + str(i)]

                Z = self.activations["Z" + str(i)]  # Retrieve Z from stored activations
                activation_func.forward(Z)  # Call forward method to compute self.sigmoid_x
                dZ = activation_func.backward(self.gradients["dA" + str(i)])  # Pass Z to backward method
                dt,db = self.add.backward(dZ)
                dW,dx= self.multiply.backward(dt,A_prev.T)
                #dW = np.dot(dZ, A_prev.T)
                db = np.sum(db, axis=1, keepdims=True)

                self.gradients["dA" + str(i - 1)] = np.dot(W.T, dZ)
                self.gradients["dW" + str(i)] = dW
                self.gradients["db" + str(i)] = db

            # Update parameters
            for i in range(1, len(self.layers_dim)):
                self.parameters["W" + str(i)] -= learning_rate * self.gradients["dW" + str(i)]
                self.parameters["b" + str(i)] -= learning_rate * self.gradients["db" + str(i)]

            print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss}, MSE: {mse}")
            print("Weights:")
            for i in range(1, len(self.layers_dim)):
                print(f"Layer {i}:")
                print(f"W{i}:")
                print(self.parameters["W" + str(i)])
            print()


 Example usage
 

In [620]:
# Example usage:
layers_dim = [2, 3, 1]  # Example: 2 input units, 3 units in the hidden layer, 1 output unit
activation_func = SigmoidActivation  # Use Sigmoid activation function
loss = L2Loss()  # Use L2 loss function
model = Model(layers_dim, activation_func, loss)

# Assuming X and y are your input and output data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

num_epochs = 100  # Specify the number of epochs
learning_rate = 0.1  # Specify the learning rate
model.train(X.T, y.T, num_epochs, learning_rate)

w:[[ 0.71174315 -0.28940041]
 [-1.57158592  0.79160392]
 [ 0.22955776 -1.01997075]]
a:[[0 0 1 1]
 [0 1 0 1]]
w:[[ 0.3141976  -0.66227644  0.90748887]]
a:[[0.5        0.42815066 0.67078622 0.60404371]
 [0.5        0.68817562 0.17199042 0.31432377]
 [0.5        0.2650331  0.55713874 0.31208   ]]
Epoch 1/100, Loss: 0.13002443846432957, MSE: 0.26004887692865913
Weights:
Layer 1:
W1:
[[ 0.71125976 -0.28944625]
 [-1.57036466  0.79174289]
 [ 0.22866064 -1.02038388]]
Layer 2:
W2:
[[ 0.30980432 -0.66330092  0.90411704]]

w:[[ 0.71125976 -0.28944625]
 [-1.57036466  0.79174289]
 [ 0.22866064 -1.02038388]]
a:[[0 0 1 1]
 [0 1 0 1]]
w:[[ 0.30980432 -0.66330092  0.90411704]]
a:[[0.49985466 0.42799711 0.67055105 0.60377806]
 [0.50042195 0.68856749 0.1724051  0.31498106]
 [0.49955758 0.26460812 0.55648065 0.31141916]]
Epoch 2/100, Loss: 0.1298111596818156, MSE: 0.2596223193636312
Weights:
Layer 1:
W1:
[[ 0.71079348 -0.28948329]
 [-1.56915715  0.79186949]
 [ 0.22780198 -1.02077391]]
Layer 2:
W2:
[[ 0.30