In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas as pd

In [2]:
iris = load_iris()
X = iris.data  #Sepal length, Sepal width, Petal length, Petal width
y = iris.target.reshape(-1, 1)  #Labels as column vector

In [3]:
print(np.unique(y))


[0 1 2]


In [4]:
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y)

In [5]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))  # σ(z) = 1 / (1 + e^(-z))

In [8]:
def sigmoid_derivative(a):
    return a * (1 - a)  # σ'(z) = σ(z) * (1 - σ(z)) its also y^

In [9]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Numerical Stability trick
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [10]:
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]  # number of samples
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / m  # L = -(1/m) Σ y_true * log(y_pred)

In [16]:
def forward_pass(X, W1, b1, W2, b2):
    # Hidden layer
    Z1 = np.dot(X, W1) + b1           # Rule: Z1 = X · W1 + b1
    A1 = sigmoid(Z1)                  # Rule: A1 = σ(Z1)

    # Output layer
    Z2 = np.dot(A1, W2) + b2           # Rule: Z2 = A1 · W2 + b2
    A2 = softmax(Z2)                   # Rule: A2 = softmax(Z2)

    return Z1, A1, Z2, A2

In [17]:
def backward_pass(X, y, Z1, A1, A2, W2):
    m = X.shape[0]

    # Output layer error
    dZ2 = A2 - y                      # Rule: ∂L/∂Z2 = A2 - y
    dW2 = (1/m) * np.dot(A1.T, dZ2)   # Rule: ∂L/∂W2 = (1/m) · A1^T · dZ2
    db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)  # Rule: ∂L/∂b2 = (1/m) Σ dZ2

    # Hidden layer error
    dA1 = np.dot(dZ2, W2.T)           # Rule: ∂L/∂A1 = dZ2 · W2^T
    dZ1 = dA1 * sigmoid_derivative(A1)  # Rule: ∂L/∂Z1 = ∂L/∂A1 ⊙ σ'(Z1)
    dW1 = (1/m) * np.dot(X.T, dZ1)    # Rule: ∂L/∂W1 = (1/m) · X^T · dZ1
    db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)  # Rule: ∂L/∂b1 = (1/m) Σ dZ1

    return dW1, db1, dW2, db2

In [18]:
np.random.seed(42)
input_size = X_train.shape[1]  # 4 features
hidden_size = 8                # neurons in hidden layer
output_size = y_train.shape[1] # 3 classes

W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

In [19]:
lr = 0.1
epochs = 500

for epoch in range(epochs):
    # Forward pass
    Z1, A1, Z2, A2 = forward_pass(X_train, W1, b1, W2, b2)

    # Loss
    loss = cross_entropy_loss(y_train, A2)

    # Backward pass
    dW1, db1, dW2, db2 = backward_pass(X_train, y_train, Z1, A1, A2, W2)

    # Update parameters (Gradient Descent Rule)
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if (epoch + 1) % 50 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

Epoch 50, Loss: 1.0981
Epoch 100, Loss: 1.0952
Epoch 150, Loss: 1.0691
Epoch 200, Loss: 0.9253
Epoch 250, Loss: 0.6961
Epoch 300, Loss: 0.5580
Epoch 350, Loss: 0.4831
Epoch 400, Loss: 0.4336
Epoch 450, Loss: 0.3958
Epoch 500, Loss: 0.3645


In [20]:
_, A1_test, _, A2_test = forward_pass(X_test, W1, b1, W2, b2)
predictions = np.argmax(A2_test, axis=1)
true_labels = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions == true_labels)

print(f"\nTest Accuracy: {accuracy*100:.2f}%")


Test Accuracy: 93.33%
