In [3]:
# Cell 1: Imports and NeuralNetwork class definition

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.01, seed=1):
        """
        layer_sizes: list, e.g. [n_features, 16, 3] 
        """
        np.random.seed(seed)
        self.learning_rate = learning_rate
        self.sizes = layer_sizes
        # He‐style initialization
        self.W = [np.random.randn(a, b) * np.sqrt(2.0/a)
                  for a, b in zip(layer_sizes[:-1], layer_sizes[1:])]
        self.b = [np.zeros((1, b)) for b in layer_sizes[1:]]

    def _relu(self, x):
        return np.maximum(0, x)

    def _relu_deriv(self, x):
        return (x > 0).astype(float)

    def _softmax(self, x):
        ex = np.exp(x - np.max(x, axis=1, keepdims=True))
        return ex / np.sum(ex, axis=1, keepdims=True)

    def _cross_entropy(self, y_true, y_pred):
        m = y_true.shape[0]
        p = np.clip(y_pred, 1e-12, 1 - 1e-12)
        return -np.sum(y_true * np.log(p)) / m

    def forward(self, X):
        a = X
        activations = [a]
        zs = []
        for W, b in zip(self.W[:-1], self.b[:-1]):
            z = a.dot(W) + b
            zs.append(z)
            a = self._relu(z)
            activations.append(a)
        z = a.dot(self.W[-1]) + self.b[-1]
        zs.append(z)
        a = self._softmax(z)
        activations.append(a)
        return zs, activations

    def backward(self, X, y_true, zs, activations):
        grads_W = [np.zeros_like(W) for W in self.W]
        grads_b = [np.zeros_like(b) for b in self.b]
        m = X.shape[0]

        # Output layer delta
        delta = activations[-1] - y_true
        grads_W[-1] = activations[-2].T.dot(delta) / m
        grads_b[-1] = np.sum(delta, axis=0, keepdims=True) / m

        # Backprop through hidden layers
        for l in range(2, len(self.sizes)):
            z = zs[-l]
            delta = delta.dot(self.W[-l+1].T) * self._relu_deriv(z)
            grads_W[-l] = activations[-l-1].T.dot(delta) / m
            grads_b[-l] = np.sum(delta, axis=0, keepdims=True) / m

        # Update parameters
        for i in range(len(self.W)):
            self.W[i] -= self.learning_rate * grads_W[i]
            self.b[i] -= self.learning_rate * grads_b[i]

    def train(self, X, y_true, epochs=500, print_every=100):
        for epoch in range(1, epochs+1):
            zs, activations = self.forward(X)
            loss = self._cross_entropy(y_true, activations[-1])
            self.backward(X, y_true, zs, activations)
            if epoch % print_every == 0 or epoch == 1:
                print(f"Epoch {epoch}/{epochs}  Loss: {loss:.4f}")

    def predict(self, X):
        _, activations = self.forward(X)
        return np.argmax(activations[-1], axis=1)

    def accuracy(self, X, y_true_labels):
        preds = self.predict(X)
        return np.mean(preds == y_true_labels)


In [4]:
# Cell 2: Load & preprocess the Iris dataset

# 1. Load data
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

# 2. One-hot encode labels (fixed parameter name)
encoder = OneHotEncoder(sparse_output=False)
Y = encoder.fit_transform(y)

# 3. Split into train/test
X_train, X_test, Y_train, Y_test, y_train_lbl, y_test_lbl = train_test_split(
    X, Y, y.ravel(), test_size=0.2, random_state=42
)

# 4. Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


In [5]:
# Cell 3: Initialize and train the network

# Architecture: 4 inputs → 8 hidden units → 3 outputs
nn = NeuralNetwork([4, 8, 3], learning_rate=0.05)
nn.train(X_train, Y_train, epochs=1000, print_every=200)


Epoch 1/1000  Loss: 1.6090
Epoch 200/1000  Loss: 0.3149
Epoch 400/1000  Loss: 0.2150
Epoch 600/1000  Loss: 0.1550
Epoch 800/1000  Loss: 0.1222
Epoch 1000/1000  Loss: 0.1029


In [6]:
# Cell 4: Evaluate on test set

acc = nn.accuracy(X_test, y_test_lbl)
print(f"\nTest set accuracy: {acc:.2%}")



Test set accuracy: 96.67%
