In [8]:
import numpy as np

# Neural Network Architecture: Input(3) -> Hidden1(3) -> Hidden2(3) -> Output(1)

# Fixed initial weights and biases
# Input to Hidden Layer 1 (3x3)
w_input_hidden1 = [
    [0.9, 0.2, 0.3],
    [0.2, 0.7, 0.4],
    [0.3, 0.1, 0.5]
]

# Hidden Layer 1 to Hidden Layer 2 (3x3)
w_hidden1_hidden2 = [
    [0.1, 0.2, 0.3],
    [0.2, 0.3, 0.4],
    [0.3, 0.4, 0.5]
]

# Hidden Layer 2 to Output (3x1)
w_hidden2_output = [0.1, 0.2, 0.3]

# Biases
b_hidden1 = [0.1, 0.2, 0.3]
b_hidden2 = [0.1, 0.2, 0.3]
b_output = 0.5

# Input values
x = [0.5, 0.6, 0.7]
y_target = 0.8
learning_rate = 0.46

# ReLU functions
def relu(x):
    return max(0, x)

def relu_derivative(x):
    return 1 if x > 0 else 0

# Forward pass
print("=== FORWARD PASS ===")

# Hidden Layer 1
hidden1_in = [sum(w_input_hidden1[i][j] * x[i] for i in range(3)) + b_hidden1[j] for j in range(3)]
hidden1_out = [relu(h) for h in hidden1_in]
print(f"Hidden1 input: {[round(h, 3) for h in hidden1_in]}")
print(f"Hidden1 output: {[round(h, 3) for h in hidden1_out]}")

# Hidden Layer 2
hidden2_in = [sum(w_hidden1_hidden2[i][j] * hidden1_out[i] for i in range(3)) + b_hidden2[j] for j in range(3)]
hidden2_out = [relu(h) for h in hidden2_in]
print(f"Hidden2 input: {[round(h, 3) for h in hidden2_in]}")
print(f"Hidden2 output: {[round(h, 3) for h in hidden2_out]}")

# Output
output_in = sum(w_hidden2_output[j] * hidden2_out[j] for j in range(3)) + b_output
output_out = relu(output_in)
print(f"Output input: {round(output_in, 3)}")
print(f"Network output: {round(output_out, 3)}")
print(f"Target output: {y_target}")
print(f"Error: {round(y_target - output_out, 3)}")

# Backward pass
print("\n=== BACKWARD PASS ===")

# Output error
output_error = y_target - output_out
output_delta = output_error * relu_derivative(output_in)
print(f"Output delta: {round(output_delta, 3)}")

# Hidden Layer 2 error
hidden2_error = [output_delta * w_hidden2_output[j] for j in range(3)]
hidden2_delta = [hidden2_error[j] * relu_derivative(hidden2_in[j]) for j in range(3)]
print(f"Hidden2 deltas: {[round(h, 3) for h in hidden2_delta]}")

# Hidden Layer 1 error
hidden1_error = [sum(hidden2_delta[j] * w_hidden1_hidden2[j][i] for j in range(3)) for i in range(3)]
hidden1_delta = [hidden1_error[i] * relu_derivative(hidden1_in[i]) for i in range(3)]
print(f"Hidden1 deltas: {[round(h, 3) for h in hidden1_delta]}")

# Update weights and biases
print("\n=== WEIGHT UPDATES ===")

# Store original weights for comparison
original_w_input_hidden1 = [row[:] for row in w_input_hidden1]
original_w_hidden1_hidden2 = [row[:] for row in w_hidden1_hidden2]
original_w_hidden2_output = w_hidden2_output[:]
original_b_hidden1 = b_hidden1[:]
original_b_hidden2 = b_hidden2[:]
original_b_output = b_output

# Hidden Layer 2 to Output
w_hidden2_output = [w_hidden2_output[j] + learning_rate * output_delta * hidden2_out[j] for j in range(3)]
b_output += learning_rate * output_delta

# Hidden Layer 1 to Hidden Layer 2
w_hidden1_hidden2 = [
    [w_hidden1_hidden2[i][j] + learning_rate * hidden2_delta[j] * hidden1_out[i] for j in range(3)]
    for i in range(3)
]
b_hidden2 = [b_hidden2[j] + learning_rate * hidden2_delta[j] for j in range(3)]

# Input to Hidden Layer 1
w_input_hidden1 = [
    [w_input_hidden1[i][j] + learning_rate * hidden1_delta[j] * x[i] for j in range(3)]
    for i in range(3)
]
b_hidden1 = [b_hidden1[j] + learning_rate * hidden1_delta[j] for j in range(3)]

# Round to 2 decimal places
w_input_hidden1 = [[round(w, 2) for w in row] for row in w_input_hidden1]
w_hidden1_hidden2 = [[round(w, 2) for w in row] for row in w_hidden1_hidden2]
w_hidden2_output = [round(w, 2) for w in w_hidden2_output]
b_hidden1 = [round(b, 2) for b in b_hidden1]
b_hidden2 = [round(b, 2) for b in b_hidden2]
b_output = round(b_output, 2)

# Display results
print("\n=== FINAL RESULTS ===")
print("ORIGINAL WEIGHTS & BIASES:")
print(f"Input→Hidden1 weights: {original_w_input_hidden1}")
print(f"Hidden1→Hidden2 weights: {original_w_hidden1_hidden2}")
print(f"Hidden2→Output weights: {original_w_hidden2_output}")
print(f"Hidden1 biases: {original_b_hidden1}")
print(f"Hidden2 biases: {original_b_hidden2}")
print(f"Output bias: {original_b_output}")

print("\nUPDATED WEIGHTS & BIASES:")
print(f"Input→Hidden1 weights: {w_input_hidden1}")
print(f"Hidden1→Hidden2 weights: {w_hidden1_hidden2}")
print(f"Hidden2→Output weights: {w_hidden2_output}")
print(f"Hidden1 biases: {b_hidden1}")
print(f"Hidden2 biases: {b_hidden2}")
print(f"Output bias: {b_output}")

print(f"\nNetwork Output: {round(output_out, 3)}")
print(f"Target Output: {y_target}")
print(f"Final Error: {round(y_target - output_out, 3)}")

=== FORWARD PASS ===
Hidden1 input: [0.88, 0.79, 1.04]
Hidden1 output: [0.88, 0.79, 1.04]
Hidden2 input: [0.658, 1.029, 1.4]
Hidden2 output: [0.658, 1.029, 1.4]
Output input: 1.192
Network output: 1.192
Target output: 0.8
Error: -0.392

=== BACKWARD PASS ===
Output delta: -0.392
Hidden2 deltas: [-0.039, -0.078, -0.117]
Hidden1 deltas: [-0.055, -0.078, -0.102]

=== WEIGHT UPDATES ===

=== FINAL RESULTS ===
ORIGINAL WEIGHTS & BIASES:
Input→Hidden1 weights: [[0.9, 0.2, 0.3], [0.2, 0.7, 0.4], [0.3, 0.1, 0.5]]
Hidden1→Hidden2 weights: [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5]]
Hidden2→Output weights: [0.1, 0.2, 0.3]
Hidden1 biases: [0.1, 0.2, 0.3]
Hidden2 biases: [0.1, 0.2, 0.3]
Output bias: 0.5

UPDATED WEIGHTS & BIASES:
Input→Hidden1 weights: [[0.89, 0.18, 0.28], [0.18, 0.68, 0.37], [0.28, 0.07, 0.47]]
Hidden1→Hidden2 weights: [[0.08, 0.17, 0.25], [0.19, 0.27, 0.36], [0.28, 0.36, 0.44]]
Hidden2→Output weights: [-0.02, 0.01, 0.05]
Hidden1 biases: [0.07, 0.16, 0.25]
Hidden2 biases:

In [14]:
# Configuration
LR = 0.8 # Learning Rate
TARGET = 3.0  # The value we want the network to predict
X = [1.0, 0.5, 2.0]  # Inputs x1, x2, x3

# 1. Initialize Weights and Biases (No random lib)
# Weights from Input (3) to Hidden (5) - 15 total
w_input_hidden = [[0.5 for _ in range(5)] for _ in range(3)]
# Biases for Hidden Layer (5)
b_hidden = [0.1 for _ in range(5)]

# Weights from Hidden (5) to Output (1) - 5 total
w_hidden_output = [0.5 for _ in range(5)]
# Bias for Output Layer (1)
b_output = 0.1


def show_state(label):
    print(f"--- {label} ---")
    print(f"Hidden Layer Biases: {[round(b, 2) for b in b_hidden]}")
    print(f"Output Layer Bias: {round(b_output, 2)}")
    print("Sample Weights (Input[0] to Hidden):", [round(w_input_hidden[0][i], 2) for i in range(5)])
    print("Weights (Hidden to Output):", [round(w, 2) for w in w_hidden_output])
    print("\n")

# Show Initial State
show_state("INITIAL STATE (UNUPDATED)")

# --- FORWARD PASS ---
# Calculate Hidden Layer Neurons (x4 to x8)
# Note: Using a simple linear activation for clarity
hidden_outputs = []
for j in range(5):
    neuron_sum = b_hidden[j]
    for i in range(3):
        neuron_sum += X[i] * w_input_hidden[i][j]
    hidden_outputs.append(neuron_sum)

# Calculate Output Neuron (x10)
output = b_output
for j in range(5):
    output += hidden_outputs[j] * w_hidden_output[j]

# --- BACKWARD PASS (Manual Update) ---
# 1. Calculate Error (Target - Prediction)
error = TARGET - output

# 2. Update Output Weights and Bias
# Gradient for output weights = error * hidden_output
for j in range(5):
    delta_w_out = error * hidden_outputs[j]
    w_hidden_output[j] += LR * delta_w_out

b_output += LR * error

# 3. Update Input-to-Hidden Weights and Biases
# Gradient for hidden = error * weight_to_output * input
for j in range(5):
    # Backpropagate error through the output weight
    hidden_error = error * w_hidden_output[j]
    
    for i in range(3):
        w_input_hidden[i][j] += LR * hidden_error * X[i]
    
    b_hidden[j] += LR * hidden_error

# Show Final State
show_state("UPDATED STATE (AFTER 1 ITERATION)")
def show_state(label):
    print(f"--- {label} ---")
    print(f"Hidden Layer Biases: {[round(b, 2) for b in b_hidden]}")
    print(f"Output Layer Bias: {round(b_output, 2)}")
    print("Sample Weights (Input[0] to Hidden):", [round(w_input_hidden[0][i], 2) for i in range(5)])
    print("Weights (Hidden to Output):", [round(w, 2) for w in w_hidden_output])
    print("\n")
print(f"Final Prediction was: {round(output, 2)}")

--- INITIAL STATE (UNUPDATED) ---
Hidden Layer Biases: [0.1, 0.1, 0.1, 0.1, 0.1]
Output Layer Bias: 0.1
Sample Weights (Input[0] to Hidden): [0.5, 0.5, 0.5, 0.5, 0.5]
Weights (Hidden to Output): [0.5, 0.5, 0.5, 0.5, 0.5]


--- UPDATED STATE (AFTER 1 ITERATION) ---
Hidden Layer Biases: [2.93, 2.93, 2.93, 2.93, 2.93]
Output Layer Bias: -1.28
Sample Weights (Input[0] to Hidden): [3.33, 3.33, 3.33, 3.33, 3.33]
Weights (Hidden to Output): [-2.05, -2.05, -2.05, -2.05, -2.05]


Final Prediction was: 4.72


In [3]:
import numpy as np
import math

# Neural Network Architecture: Input(3) -> Hidden1(3) -> Hidden2(3) -> Output(1)

# Fixed initial weights and biases
# Input to Hidden Layer 1 (3x3)
w_input_hidden1 = [
    [0.9, 0.2, 0.3],
    [0.2, 0.7, 0.4],
    [0.3, 0.1, 0.5]
]

# Hidden Layer 1 to Hidden Layer 2 (3x3)
w_hidden1_hidden2 = [
    [0.1, 0.2, 0.3],
    [0.2, 0.3, 0.4],
    [0.3, 0.4, 0.5]
]

# Hidden Layer 2 to Output (3x1)
w_hidden2_output = [0.1, 0.2, 0.3]

# Biases
b_hidden1 = [0.1, 0.2, 0.3]
b_hidden2 = [0.1, 0.2, 0.3]
b_output = 0.5

# Input values
x = [0.5, 0.6, 0.7]
y_target = 0.8
learning_rate = 0.46

# Tanh functions
def tanh(x):
    return math.tanh(x)

def tanh_derivative(x):
    return 1 - math.tanh(x)**2

# Forward pass
print("=== FORWARD PASS ===")

# Hidden Layer 1
hidden1_in = [sum(w_input_hidden1[i][j] * x[i] for i in range(3)) + b_hidden1[j] for j in range(3)]
hidden1_out = [tanh(h) for h in hidden1_in]
print(f"Hidden1 input: {[round(h, 3) for h in hidden1_in]}")
print(f"Hidden1 output: {[round(h, 3) for h in hidden1_out]}")

# Hidden Layer 2
hidden2_in = [sum(w_hidden1_hidden2[i][j] * hidden1_out[i] for i in range(3)) + b_hidden2[j] for j in range(3)]
hidden2_out = [tanh(h) for h in hidden2_in]
print(f"Hidden2 input: {[round(h, 3) for h in hidden2_in]}")
print(f"Hidden2 output: {[round(h, 3) for h in hidden2_out]}")

# Output
output_in = sum(w_hidden2_output[j] * hidden2_out[j] for j in range(3)) + b_output
output_out = tanh(output_in)
print(f"Output input: {round(output_in, 3)}")
print(f"Network output: {round(output_out, 3)}")
print(f"Target output: {y_target}")
print(f"Error: {round(y_target - output_out, 3)}")

# Backward pass
print("\n=== BACKWARD PASS ===")

# Output error
output_error = y_target - output_out
output_delta = output_error * tanh_derivative(output_in)
print(f"Output delta: {round(output_delta, 3)}")

# Hidden Layer 2 error
hidden2_error = [output_delta * w_hidden2_output[j] for j in range(3)]
hidden2_delta = [hidden2_error[j] * tanh_derivative(hidden2_in[j]) for j in range(3)]
print(f"Hidden2 deltas: {[round(h, 3) for h in hidden2_delta]}")

# Hidden Layer 1 error
hidden1_error = [sum(hidden2_delta[j] * w_hidden1_hidden2[j][i] for j in range(3)) for i in range(3)]
hidden1_delta = [hidden1_error[i] * tanh_derivative(hidden1_in[i]) for i in range(3)]
print(f"Hidden1 deltas: {[round(h, 3) for h in hidden1_delta]}")

# Update weights and biases
print("\n=== WEIGHT UPDATES ===")

# Store original weights for comparison
original_w_input_hidden1 = [row[:] for row in w_input_hidden1]
original_w_hidden1_hidden2 = [row[:] for row in w_hidden1_hidden2]
original_w_hidden2_output = w_hidden2_output[:]
original_b_hidden1 = b_hidden1[:]
original_b_hidden2 = b_hidden2[:]
original_b_output = b_output

# Hidden Layer 2 to Output
w_hidden2_output = [w_hidden2_output[j] + learning_rate * output_delta * hidden2_out[j] for j in range(3)]
b_output += learning_rate * output_delta

# Hidden Layer 1 to Hidden Layer 2
w_hidden1_hidden2 = [
    [w_hidden1_hidden2[i][j] + learning_rate * hidden2_delta[j] * hidden1_out[i] for j in range(3)]
    for i in range(3)
]
b_hidden2 = [b_hidden2[j] + learning_rate * hidden2_delta[j] for j in range(3)]

# Input to Hidden Layer 1
w_input_hidden1 = [
    [w_input_hidden1[i][j] + learning_rate * hidden1_delta[j] * x[i] for j in range(3)]
    for i in range(3)
]
b_hidden1 = [b_hidden1[j] + learning_rate * hidden1_delta[j] for j in range(3)]

# Round to 2 decimal places
w_input_hidden1 = [[round(w, 2) for w in row] for row in w_input_hidden1]
w_hidden1_hidden2 = [[round(w, 2) for w in row] for row in w_hidden1_hidden2]
w_hidden2_output = [round(w, 2) for w in w_hidden2_output]
b_hidden1 = [round(b, 2) for b in b_hidden1]
b_hidden2 = [round(b, 2) for b in b_hidden2]
b_output = round(b_output, 2)

# Display results
print("\n=== FINAL RESULTS ===")
print("ORIGINAL WEIGHTS & BIASES:")
print(f"Input→Hidden1 weights: {original_w_input_hidden1}")
print(f"Hidden1→Hidden2 weights: {original_w_hidden1_hidden2}")
print(f"Hidden2→Output weights: {original_w_hidden2_output}")
print(f"Hidden1 biases: {original_b_hidden1}")
print(f"Hidden2 biases: {original_b_hidden2}")
print(f"Output bias: {original_b_output}")

print("\nUPDATED WEIGHTS & BIASES:")
print(f"Input→Hidden1 weights: {w_input_hidden1}")
print(f"Hidden1→Hidden2 weights: {w_hidden1_hidden2}")
print(f"Hidden2→Output weights: {w_hidden2_output}")
print(f"Hidden1 biases: {b_hidden1}")
print(f"Hidden2 biases: {b_hidden2}")
print(f"Output bias: {b_output}")

print(f"\nNetwork Output: {round(output_out, 3)}")
print(f"Target Output: {y_target}")
print(f"Final Error: {round(y_target - output_out, 3)}")

=== FORWARD PASS ===
Hidden1 input: [0.88, 0.79, 1.04]
Hidden1 output: [0.706, 0.658, 0.778]
Hidden2 input: [0.536, 0.85, 1.164]
Hidden2 output: [0.49, 0.691, 0.822]
Output input: 0.934
Network output: 0.732
Target output: 0.8
Error: 0.068

=== BACKWARD PASS ===
Output delta: 0.031
Hidden2 deltas: [0.002, 0.003, 0.003]
Hidden1 deltas: [0.001, 0.002, 0.001]

=== WEIGHT UPDATES ===

=== FINAL RESULTS ===
ORIGINAL WEIGHTS & BIASES:
Input→Hidden1 weights: [[0.9, 0.2, 0.3], [0.2, 0.7, 0.4], [0.3, 0.1, 0.5]]
Hidden1→Hidden2 weights: [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5]]
Hidden2→Output weights: [0.1, 0.2, 0.3]
Hidden1 biases: [0.1, 0.2, 0.3]
Hidden2 biases: [0.1, 0.2, 0.3]
Output bias: 0.5

UPDATED WEIGHTS & BIASES:
Input→Hidden1 weights: [[0.9, 0.2, 0.3], [0.2, 0.7, 0.4], [0.3, 0.1, 0.5]]
Hidden1→Hidden2 weights: [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5]]
Hidden2→Output weights: [0.11, 0.21, 0.31]
Hidden1 biases: [0.1, 0.2, 0.3]
Hidden2 biases: [0.1, 0.2, 0.3]
Output b