In [None]:
## Chain rule is the backbone of the backproppogation algorithm

## how to optimize the weights and biases of a neuron so that the output is close to 0

# For a single neuron, we need to find how to best optimize weights and biases such that the loss is minimized
# i.e. we need to move in negative gradient direction - find the derivative of the loss w.r.t. weights and biases

# Coding the Backpropogation algorithm
import numpy as np

def relu(input):
    return np.maximum(0, input)

def derivativeRelu(input):
    return np.where(input > 0, 1, 0)

weights = np.array([-3.0, -1.0, 2.0])  # Initialize as float
bias = 1.0  # Initialize as float
inputs = np.array([1.0, -2.0, 3.0])
target = 0.0
learning_rate = 0.001

for i in range(1000000):
    # Forward pass
    output = np.dot(inputs, weights) + bias
    relu_output = relu(output)
    loss = (relu_output - target) ** 2

    # Backward pass
    dLoss_dReluOutput = 2 * (relu_output - target)
    dReluOut_dOutput = derivativeRelu(output)
    dOutput_dWeights = inputs
    dOutput_dBias = 1

    dloss_dlinear = dLoss_dReluOutput * dReluOut_dOutput
    dloss_dweights = dloss_dlinear * dOutput_dWeights
    dloss_dbias = dloss_dlinear * dOutput_dBias

    weights -= learning_rate * dloss_dweights
    bias -= learning_rate * dloss_dbias

    if i % 1000 == 0:  # Print loss every 1000 iterations
        print(f"Iteration {i + 1}, Loss: {loss}")
    
print(f"Final weights: {weights}")
print(f"Final Bias: {bias}")

In [None]:
import numpy as np

# Define the ReLU function and its derivative
def relu(input):
    return np.maximum(0, input)

def relu_derivative(input):
    return np.where(input > 0, 1, 0)

# Initialize inputs, weights, biases, and learning rate
inputs = np.array([1.0, 2.0, 3.0, 4.0])
weights = np.array([[0.1, 0.2, 0.3, 0.4], 
                    [0.1, 0.2, 0.3, 0.4], 
                    [0.1, 0.2, 0.3, 0.4]])
biases = np.array([1.0, 1.0, 1.0])
learning_rate = 0.001

# Training loop
for i in range(10000):
    # Forward Pass
    output = np.dot(inputs, weights.T) + biases
    activation_output = relu(output)
    y = np.sum(activation_output)
    Loss = y ** 2

    # Backward Pass
    dloss_dy = 2 * y
    dy_dcombineact = np.ones_like(activation_output)
    dloss_dcombineact = dloss_dy * dy_dcombineact

    dcombineact_drelu = relu_derivative(output)
    dloss_drelu = dloss_dcombineact * dcombineact_drelu

    # Compute gradients
    dloss_dweights = np.outer(dloss_drelu, inputs)  # Gradient of weights
    dloss_dbiases = dloss_drelu  # Gradient of biases

    # Update weights and biases
    weights -= learning_rate * dloss_dweights
    biases -= learning_rate * dloss_dbiases

    # Print loss every 20 iterations
    if i % 20 == 0:
        print(f"Iteration {i}: Loss = {Loss}")

# Print final weights and biases
print(f"Final Weights: {weights}")
print(f"Final Biases: {biases}")


In [7]:
## hard coding the process of calculating the gradient matrix of loss wrt weights
import numpy as np
dvalues = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]) ## 3 by 3 matrix 
inputs = np.array([[1, 2, 3, 2.5], [2., 5., -1, 2], [-1.5, 2.7, 3.3, -0.8]])

dweights = np.dot(inputs.T, dvalues)
print(dweights)

[[ 0.5  0.5  0.5]
 [20.1 20.1 20.1]
 [10.9 10.9 10.9]
 [ 4.1  4.1  4.1]]


In [9]:
## hard coding the process of calculating the gradient matrix of loss wrt biases
dvalues = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]) ## 3 by 3 matrix 

dbiases = np.sum(dvalues, axis=0, keepdims=True)
print(dbiases)

[[6. 6. 6.]]


In [19]:
## hard coding the process of calculating the gradient matrix of loss wrt inputs 
dvalues = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]) ## 3 by 3 matrix 
weights = np.array([[0.2, 0.8, -0.5, 1.], [0.5, -0.91, 0.26, -0.5], [-0.26, -0.27, 0.17, 0.87]]).T

dinputs = np.dot(dvalues, weights.T)
print(dinputs)


[[ 0.44 -0.38 -0.07  1.37]
 [ 0.88 -0.76 -0.14  2.74]
 [ 1.32 -1.14 -0.21  4.11]]


In [33]:
class Layer_Dense:
    def __init__(self, num_features, num_neurons):
        self.weights = 0.01 * np.random.randn(num_features, num_neurons)
        self.bias = np.zeros((1, num_neurons))
        
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.bias

    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis = 0, keepdims=True)
        self.dinputs = np.dot(dvalues, weights.T)

In [35]:
### RELU Class
class RELU: 
    def forward(self, inputs): 
        self.inputs = inputs
        self.output = np.maximum(0, inputs) ### Output will have the same shape as the input

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

In [37]:
class Loss:
    def __init__(self):
        pass

    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)

        # Clip predictions to avoid log(0) errors
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        # Negative log likelihood
        negative_log = -np.log(correct_confidences)
        return negative_log

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        labels = len(dvalues[0])
        if (len(y_true.shape) == 1):
            y_true = np.eye(labels)[y_true]
        self.dinputs = -y_true/dvalues
        self.dinputs = self.dinputs/samples

In [None]:
### Softmax Activation Function
import numpy as np
class Softmax: 
    def forward(self, inputs): 
        ## Find the raw exponentials
        raw_exp = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        ## Find the probabilistic measure 
        self.output = raw_exp / np.sum(raw_exp, axis = 1, keepdims=True)

In [39]:
class Activation_Softmax_Loss_CategoricalCrossentropy:
    def __init__(self):
        self.activation = Softmax()
        self.loss = Loss_CategoricalEntropy()

    def forward(self, inputs, y_true):
        self.activation.forward(inputs)
        self.output = self.activation.output
        return self.loss.calculate(self.output, y_true)

    def backward(self, dvalues,y_true):
        ## Here, dvalues is the predicted output
        samples = len(dvalues)
        if (len(y_true.shape) == 2):
            y_true = np.argmax(y_true, axis = 1)
        self.dinputs = dvalues.copy()
        ## Finding the gradients
        self.dinputs[range(samples), y_true] -= 1
        ## Normalization
        self.dinputs = self.dinputs/samples