# Neural Network from Scratch

This notebook implements forward propagation and backpropagation in a simple neural network with ReLU and softmax, trained using gradient descent.

- 6 layers
    - 1 input layer with 2 nodes
    - 4 hidden layer with 4 nodes
    - 1 output layer with 2 nodes
- Used ReLU and Softmax activation function
- For loss function - Mean Squared Error is being used

In [32]:
import random
import math

# Activation Functions

ReLU(z) = max(0, z)

In [33]:
def relu(x):
    return [max(0, elem) for elem in x]

Derivative:  
d(ReLU)/dz = {  
    1 if z > 0    
    0 if z <= 0  
}

In [34]:
def relu_derivative(x):
    return [1 if elem > 0 else 0 for elem in x]

softmax(zᵢ) = e^(zᵢ) / Σⱼ e^(zⱼ)

If combined with cross-entropy, simplifies gradient:\
dL/dzᵢ = y_predᵢ - y_trueᵢ

In [35]:
def softmax(x):
    exp_values = [math.exp(i) for i in x]
    sum_exp = sum(exp_values)
    if sum_exp == 0:
        # To avoid division by zero, add a small epsilon or handle gracefully
        sum_exp = 1e-9
    return [i / sum_exp for i in exp_values]

# Weight initialization

In [36]:
# Code to multiply 2x2 matrix 

a = [[1, 2],
     [3, 4]]

b = [[8, 10],
     [12, 14]]

result = [[0 for _ in range(len(b[0]))] for _ in range(len(a))]
     
for i in range(len(a)):
    for j in range(len(b[0])):
        for k in range(len(b)):
            # print(i,j,k)
            print(a[i][j], b[k][j])
            result[i][j] += a[i][k] * b[k][j]
            
print(result)

1 8
1 12
2 10
2 14
3 8
3 12
4 10
4 14
[[32, 38], [72, 86]]


In [37]:
def initialize_weights(sizes):
    weights = []
    biases = []
    for i in range(len(sizes)-1):
        # Random weights and zero biases
        layer_weights = [[random.uniform(-1, 1) for _ in range(sizes[i])] for _ in range(sizes[i+1])]
        layer_biases = [0 for _ in range(sizes[i+1])]
        weights.append(layer_weights)
        biases.append(layer_biases)
    return weights, biases

In [38]:
x = (2, 4, 4, 4, 4, 2)
initialize_weights(x)

([[[0.7909436473037181, 0.11450365738236146],
   [-0.1193163242275408, -0.6832460558832945],
   [-0.16114277645416109, 0.06970576655575966],
   [-0.28154998635479744, 0.19659055754753973]],
  [[0.8292025448370484,
    0.13616656774113634,
    0.39250745273014886,
    0.7859096761723436],
   [-0.857805933983323,
    -0.5873495581705399,
    0.7495955093550741,
    0.8405350828501246],
   [-0.6214864596754195,
    -0.5566016515862193,
    0.5510933504868629,
    -0.18895771636345216],
   [0.5743615469340049,
    -0.8523353327558694,
    -0.5049116560477478,
    -0.4138667515006571]],
  [[-0.6415352729087311,
    0.40324624742270987,
    0.4811426637732912,
    0.7463139273963548],
   [0.3560034450562801,
    0.7698927458096925,
    0.3979840602331959,
    -0.3820489171492851],
   [0.16017038409018913,
    -0.7297960598568896,
    0.2920237722941521,
    0.9747455635357556],
   [-0.4256159426289965,
    0.4622847516603863,
    -0.4287829373147065,
    0.9621058000851899]],
  [[0.397319038

In [39]:
def dot_product(weights, inputs, biases):
    output = []
    for w_row, b in zip(weights, biases):
        output.append(sum(w * i for w, i in zip(w_row, inputs)) + b)
    return output

## Forward Propagation

For one layer:
- z = W·x + b
- a = activation(z)

For multiple layers:
- z⁽ˡ⁾ = W⁽ˡ⁾·a⁽ˡ⁻¹⁾ + b⁽ˡ⁾
- a⁽ˡ⁾ = activation(z⁽ˡ⁾)

Here:\
l = index of the layer    

In [40]:
def forward_pass(x, weights, biases):
    activations = [x]
    inputs = x
    for i in range(len(weights)-1):
        z = dot_product(weights[i], inputs, biases[i])
        inputs = relu(z)
        activations.append(inputs)
        # ---
    # Final layer: softmax
    z = dot_product(weights[-1], inputs, biases[-1])
    output = softmax(z)
    activations.append(output)
    return activations

L = (1/m) * Σ (y_true - y_pred)²

dL/dy_pred = 2 * (y_pred - y_true)

In [41]:
def mse_loss(pred, target):
    n = len(pred)
    print([(p - t) ** 2 for p, t in zip(pred, target)])
    return sum((p - t) ** 2 for p, t in zip(pred, target)) / n

In [42]:
pred = [0.7, 0.3]   # predicted probabilities from model output
target = [1, 0]     # one-hot encoded true label

loss = mse_loss(pred, target)
print("MSE Loss:", loss)

[0.09000000000000002, 0.09]
MSE Loss: 0.09000000000000001


# Backward Propagation

dL/dW = dL/dA * dA/dZ * dZ/dW

Where:
- dA/dZ = activation'(Z)
- dZ/dW = A_prev.T


In [47]:
def backward_pass(activations, weights, biases, y_true, learning_rate):
    deltas = [None] * len(weights)

    # Output layer delta (softmax + cross-entropy)
    # for a, y in zip(activations[-1], y_true):
        # print(a, y)
    delta = [a - y for a, y in zip(activations[-1], y_true)]
    deltas[-1] = delta

    # Back propagate through hidden layers
    for l in range(len(weights)-2, -1, -1):
        layer_weights = weights[l+1]
        next_delta = deltas[l+1]
        z = activations[l+1]
        relu_deriv = relu_derivative(z)

        # Weighted sum of next layer deltas
        delta = []
        for i in range(len(weights[l])):
            error = sum(layer_weights[j][i] * next_delta[j] for j in range(len(next_delta)))
            delta.append(error * relu_deriv[i])
        deltas[l] = delta

    # Update weights and biases
    for l in range(len(weights)):
        for i in range(len(weights[l])):
            for j in range(len(weights[l][i])):
                weights[l][i][j] -= learning_rate * deltas[l][i] * activations[l][j]
            biases[l][i] -= learning_rate * deltas[l][i]


In [44]:
def train(X, Y, epochs=1000, learning_rate=0.01):
    sizes = [2, 4, 4, 4, 4, 2]
    weights, biases = initialize_weights(sizes)
    
    for epoch in range(epochs):
        total_loss = 0
        for x, y in zip(X, Y):
            activations = forward_pass(x, weights, biases)
            loss = mse_loss(activations[-1], y)
            total_loss += loss
            backward_pass(activations, weights, biases, y, learning_rate)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Loss = {total_loss:.4f}")
    
    return weights, biases


In [50]:
# XOR-like dummy dataset
X = [[0,0], [0,1], [1,0], [1,1]]
Y = [[1,0], [0,1], [0,1], [1,0]]

weights, biases = train(X, Y, epochs=1000, learning_rate=0.05)

# Test
for x in X:
    prediction = forward_pass(x, weights, biases)[-1]
    print(f"Input: {x} -> Prediction: {prediction}")


[0.25, 0.25]
[0.25589775836552753, 0.2558977583655274]
[0.21346130834832472, 0.21346130834832477]
[0.33662603850210354, 0.33662603850210365]
Epoch 0: Loss = 1.0560
[0.2775978113734148, 0.2775978113734148]
[0.24378896031599498, 0.24378896031599503]
[0.22452393375767118, 0.22452393375767113]
[0.32411653468901896, 0.32411653468901896]
[0.2674386185291159, 0.267438618529116]
[0.244503846799719, 0.24450384679971895]
[0.23514851053532299, 0.2351485105353231]
[0.31262884691312026, 0.31262884691312026]
[0.2586167171810766, 0.2586167171810765]
[0.2504130019718179, 0.2504130019718179]
[0.24053855852228653, 0.24053855852228653]
[0.30695558566829934, 0.30695558566829945]
[0.2539385013661587, 0.2539385013661587]
[0.25271446171946693, 0.2527144617194668]
[0.24650971278694486, 0.24650971278694486]
[0.3008135829017182, 0.3008135829017182]
[0.24910683872932277, 0.24910683872932265]
[0.25461105418029206, 0.25461105418029206]
[0.2508273009457386, 0.2508273009457386]
[0.29647103891782306, 0.29647103891782

In [None]:
class A:
    x = 10

a = A()
print(getattr(a, 'x'))  # 10
print(getattr(a, 'y', 'Not Found'))  # 'Not Found'

10
Not Found


In [None]:
setattr(a, 'y', 20)
print(a.y)  # 20

20


In [None]:
print(hasattr(a, 'x')) 

True


In [None]:
code = """
def hello():
    print("Hello from exec!")
"""

exec(code)
hello()

Hello from exec!


In [None]:
x = 10
y = 20
print(eval("x + y"))  # 30

30


In [None]:
# Original class
class A:
    def greet(self):
        print("Hello")

# Monkey patching greet method
def new_greet(self):
    print("Namaste!")

A.greet = new_greet

a = A()
a.greet()  # Namaste!

Namaste!
