In [226]:
import numpy as np
import math 

In [227]:
# set random seed for reproducibility 
np.random.seed(42)

In [228]:
from activation_function import ActivationFunction
from loss_function import LossFunction

In [229]:
class Neuron:
    """
    A simple artificial neuron that computes a weighted sum of its inputs, applies an activation function, and produces an output.
    """

    def __init__(self, num_inputs, activation ='relu'):
        """
        Initialize the neuron with random weights and bias.
        """

        # xavier initialization for weights
        limit = 1/math.sqrt(num_inputs)
        self.weights = np.random.uniform(-limit,limit,num_inputs)

        # Bias initialization
        self.bias = np.random.uniform(-limit, limit)

        self.inputs = None
        self.output = None

        self.activation = ActivationFunction.get_activation(activation)
        self.activation_derivative = ActivationFunction.get_activation_derivative(activation)

    
    def forward(self, inputs):
        """
        Compute output of the neurons in the layer given the inputs.
        """

        # preserve copy of original inputs 
        self.inputs = np.array(inputs)
        # compute weighted sum
        weighted_sum = np.dot(self.weights, self.inputs) + self.bias

        # applying ReLU by default
        self.output = self.activation(weighted_sum)

        return self.output

    def update_weights(self, dl_dw, dl_db, learning_rate):
        """
        Update weights and bias using gradients and learning rate.
        """
        # Ensure dl_dw is a NumPy array
        dl_dw = np.array(dl_dw)

        # Update weights
        self.weights -= learning_rate * dl_dw

        # Update bias
        self.bias -= learning_rate * dl_db

    
    def backward(self, dL_dy, lenning_rate=0.01):
        """
        Backward pass to compute gradients and update weights.
        Args:
            dL_dy: Gradient of loss with respect to the neuron's output.
            learning_rate: Learning rate for weight updates.


        Returns:
            dL_dx: Gradient of loss with respect to the neuron's inputs.

        Info:
        Using chain rule:
        dL_dx = dL_dy * dy_dz * dz_dx

        where:
        - dL_dy: Gradient of loss with respect to the neuron's output (from next layer) 
        - dy_dz: Derivative of activation function at weighted sum
        - dz_dx: Weights of the neuron

        Gradients for weights and bias:
        - dl_dw = dL_dz * inputs
        - dl_db = dL_dz * 1
        """

        dy_dz = self.activation_derivative(self.output)
        dL_dz = dL_dy * dy_dz  

        dl_dw = dL_dz * self.inputs  # gradient for weights
        dl_dw = dl_dw
        dl_db = dL_dz * 1 # gradient for bias

        self.update_weights(dl_dw, dl_db, lenning_rate)

        dL_dx = dL_dz * self.weights  # gradient with respect to inputs to pass to previous layer

        return dL_dx




    def __str__(self):
        """String representation of the neuron"""
        return f"Neuron(weights={[round(w, 3) for w in self.weights]}, bias={round(self.bias, 3)})"



In [230]:
# Create a neuron with 3 inputs
neuron = Neuron(num_inputs=3, activation='relu')

# Print initial weights and bias
print("Initial Weights:", neuron.weights)
print("Initial Bias:", neuron.bias)

print("Shape of Weights:", neuron.weights.shape)
print("Shape of Bias:", np.array(neuron.bias).shape)

# Example batch of inputs and gradients
inputs = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])  # Batch of 3 input vectors
dL_dy = np.array([0.1, 0.2, 0.3])  # Gradients for each input vector
learning_rate = 0.01

# Perform forward pass
for single_input in inputs:
    output = neuron.forward(single_input)
    print("Output:", output)
print("Outputs:", output)

# Perform backward pass
neuron.backward(dL_dy, learning_rate)

# Print updated weights and bias
print("Updated Weights:", neuron.weights)
print("Updated Bias:", neuron.bias)

Initial Weights: [-0.14486859  0.52044005  0.26788353]
Initial Bias: 0.11392100481799905
Shape of Weights: (3,)
Shape of Bias: ()
Output: 0.2838872148932628
Output: 0.4769237117328132
Output: 0.6699602085723636
Outputs: 0.6699602085723636
Updated Weights: [-0.14556859  0.51884005  0.26518353]
Updated Bias: [0.112921 0.111921 0.110921]


In [231]:
class Layer:
    """
    A layer of neurons in a neural network.
    """

    def __init__(self, num_neurons, num_inputs_per_neuron=None, activation='relu', is_output=False):
        """
        Initialize the layer with given number of neurons, each with specified number of inputs.

        Args: 
            num_neurons (int): Number of neurons in the layer.
            num_inputs_per_neuron (int): Number of inputs each neuron receives.
            is_output (bool): Flag indicating if this layer is the output layer.
        """

        self.num_neurons = num_neurons
        self.num_inputs_per_neuron = num_inputs_per_neuron
        self.is_output = is_output

        # creating neurons for the layer
        self.neurons = [Neuron(num_inputs_per_neuron, activation) for _ in range(num_neurons)]
        self.inputs = None
        self.outputs = None

        
    def forward(self, inputs):
        """
        Forwards pass through layers sequentially by computing outputs of all neurons in the layer.
        """

        self.inputs = np.array(inputs)

        # get output from each neuron
        self.outputs = np.array([neuron.forward(inputs) for neuron in self.neurons])

        return self.outputs
    
    def backward(self, dL_dy, learning_rate):
        """
        Backward pass to compute gradients and update weights for all neurons in the layer.

        dL_dy: Gradient of loss with respect to the layer's outputs.
        dL_dx : Gradient of loss with respect to the layer's inputs.
        Returns dL_dx to propagate to previous layer.
        """

        dL_dy_current = np.zeros(self.num_inputs_per_neuron) # Gradient accumulator for inputs to this layer

        for i, neuron in enumerate(self.neurons):
            dL_dx = neuron.backward(dL_dy[i], learning_rate)  # get gradient w.r.t inputs from each neuron
            dL_dy_current += dL_dx # summation of gradients 

        return dL_dy_current   # propagate gradient to previous layer



    def __str__(self):
        """String representation of the layer"""
        layer_type = "Output" if self.is_output else "Hidden"
        return f"{layer_type} Layer ({self.num_neurons} neurons, {self.num_inputs_per_neuron} inputs each)"
        

In [232]:
class Network:
    """
        Neural Network consisting of multiple layers.
        Basic idea of Multi-Layer perceptron
    """

    def __init__(self, loss_name='mse'):


        self. layers = []
        self.loss_fn = LossFunction.get_loss_function(loss_name)
        self.loss_fn_derivative = LossFunction.get_loss_derivative(loss_name)

    def add_layer(self, num_neurons, num_inputs=None, activation = 'relu',is_output=False):
        """
        Initialize layers and add to the network.

        Args:
            num_neurons (int): Number of neurons in the layer.
            num_inputs (int): Number of inputs each neuron receives. Required for the first layer.
            activation (str): Activation function to be used in the layer.
            is_output (bool): Flag indicating if this layer is the output layer.
        """

        if not self.layers and num_inputs is None:
            raise ValueError("Number of inputs must be specified for the first layer.")
        
        num_inputs_per_neuron = num_inputs if not self.layers else self.layers[-1].num_neurons  # get from previous layer
        layer = Layer(num_neurons, num_inputs_per_neuron, activation, is_output)
        self.layers.append(layer)

    def forward(self, inputs):
        """
        Forward pass through the entire network.
        """

        current_input = np.array(inputs)
        for layer in self.layers: 
            current_input = layer.forward(current_input)
        return current_input

    def predict(self, X):
        """Predict outputs for given inputs X."""
        X = np.array(X)
        if X.ndim == 1:
            # Single input vector
            return self.forward(X)
        else:
            # Batch of input vectors
            return np.array([self.forward(x) for x in X])
    
    def backward(self, predictions, targets):
        """
        Backward pass to compute gradients and update weights.
        dL_dy: Gradient of loss with respect to the network's output.
        """

        dL_dy = self.loss_fn_derivative(predictions, targets)

        for layer in reversed(self.layers):
            dL_dy = layer.backward(dL_dy, self.learning_rate)
        
        
    
    # fit method for training 
    def fit(self, X,y, epochs =100, learning_rate =0.01):
        """
        Train the network using Gradient descent

        Args:
            X (array-like): Input data.
            y (array-like): Target labels.
            epochs (int): Number of training epochs.
            learning_rate (float): Learning rate for weight updates.

        Info:
            For each epoch, perform forward pass, compute loss, and backward pass to update weights.

        """

        self.learning_rate = learning_rate
        for epoch in range(epochs):
            total_loss = 0
            for inputs, targets in zip(X,y):
                targets = np.array(targets).reshape(-1) 
                # forward pass
                predictions = self.predict(inputs)

                # compute loss 
                loss = self.loss_fn(predictions, targets)
                total_loss += loss

                # backward pass 
                self.backward(predictions, targets)

            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {total_loss/len(X)}")

        
        # final outcome 
        print(f"Final Loss: {total_loss/len(X)}")

    

    def __str__(self):
        """String representation of the network"""
        return f"Network(layers={self.num_layers}, layer_sizes={self.layer_sizes})"


In [237]:
nw = Network(loss_name='huber')

nw.add_layer(num_neurons=2, num_inputs=2, activation='relu') 
nw.add_layer(num_neurons=3, activation='tanh')
nw.add_layer(num_neurons=1, activation='linear', is_output=True)


print("Network Architecture:")
for i, layer in enumerate(nw.layers):
    print(f"Layer {i+1}: {layer}")
    for j, neuron in enumerate(layer.neurons):
        print(f"  Neuron {j+1}: {neuron}")


Network Architecture:
Layer 1: Hidden Layer (2 neurons, 2 inputs each)
  Neuron 1: Neuron(weights=[-0.189, -0.062], bias=0.403)
  Neuron 2: Neuron(weights=[-0.425, 0.02], bias=0.131)
Layer 2: Hidden Layer (3 neurons, 2 inputs each)
  Neuron 1: Neuron(weights=[-0.641, 0.152], bias=-0.466)
  Neuron 2: Neuron(weights=[-0.615, 0.635], bias=0.659)
  Neuron 3: Neuron(weights=[0.436, -0.276], bias=-0.569)
Layer 3: Output Layer (1 neurons, 3 inputs each)
  Neuron 1: Neuron(weights=[0.213, -0.069, -0.436], bias=-0.006)


In [238]:
input_data = np.array([0.5, -1.5])
output = nw.predict(input_data)
print(f"Network output: {output}")

# You can also test layer by layer:
print("\nLayer by layer:")
layer1_output = nw.layers[0].forward(input_data)
print(f"Layer 1 output: {layer1_output}")

layer2_output = nw.layers[1].forward(layer1_output)
print(f"Layer 2 (final) output: {layer2_output}")


# shape of neurons weight and bias 
for i, layer in enumerate(nw.layers):
    for j, neuron in enumerate(layer.neurons):
        print(f"Layer {i+1}, Neuron {j+1} - Weights shape: {neuron.weights.shape}, Bias shape: {np.array(neuron.bias).shape}")

Network output: [-0.00077757]

Layer by layer:
Layer 1 output: [0.4019929 0.       ]
Layer 2 (final) output: [-0.61925385  0.38951937 -0.37450472]
Layer 1, Neuron 1 - Weights shape: (2,), Bias shape: ()
Layer 1, Neuron 2 - Weights shape: (2,), Bias shape: ()
Layer 2, Neuron 1 - Weights shape: (2,), Bias shape: ()
Layer 2, Neuron 2 - Weights shape: (2,), Bias shape: ()
Layer 2, Neuron 3 - Weights shape: (2,), Bias shape: ()
Layer 3, Neuron 1 - Weights shape: (3,), Bias shape: ()


In [239]:
X_train = np.array([[0.1, 0.2], [0.3, 0.4]])
y_train = np.array([0.3, 0.7])

nw.fit(X_train, y_train, epochs=50, learning_rate=0.01)



Epoch 0, Loss: 0.13513185374227343
Epoch 10, Loss: 0.07190008716524127
Epoch 20, Loss: 0.04317580745705396
Epoch 30, Loss: 0.03029933226451267
Epoch 40, Loss: 0.02459714894995591
Final Loss: 0.022263918997647927


In [240]:
nw.predict([[0.2, 0.4],[0.5,0.6]])

array([[0.44004726],
       [0.445667  ]])