## Prerequisites

Ensure your `Value`, `Neuron`, `Layer`, and `MLP` classes are correctly implemented and saved in a file named: `micrograd_engine.py`

In [4]:
# A single-file script demonstrating a micrograd-style MLP training 
# on both the non-linear 'make_moons' and the more separable 'load_iris' 
# datasets, using mini-batching for speed.

import numpy as np
import matplotlib.pyplot as plt
import math
import random
from sklearn.datasets import make_moons, load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from micrograd_engine import MLP, Value

In [None]:
def get_batches(X, y, batch_size):
    """Generates mini-batches from the data."""
    num_samples = len(X)
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    
    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch_indices = indices[start:end]
        yield X[batch_indices], y[batch_indices]


def train_and_visualize(dataset_name, X_full, y_full, n_layers, n_neurons_per_layer, 
                        epochs=1000, lr=0.01, alpha=1e-4, batch_size=32):
    """
    Sets up, trains, and visualizes an MLP using mini-batch gradient descent.
    """
    print("=" * 70)
    print(f"STARTING TRAINING FOR: {dataset_name.upper()} DATASET (Batch Size: {batch_size})")
    print("=" * 70)

    # Preprocessing
    scaler = StandardScaler()
    X = scaler.fit_transform(X_full)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y_full, test_size=0.2, random_state=42)

    # Model Initialization
    n_features = X_train.shape[1]
    n_output = 1
    layer_sizes = [n_neurons_per_layer] * n_layers + [n_output]
    model = MLP(n_features, layer_sizes)

    print(f"Architecture: {n_features} -> {' -> '.join(map(str, layer_sizes))}")
    print(f"Total parameters: {len(model.parameters())}")
    
    history = []

    # --- Training Loop (Mini-Batch Gradient Descent) ---
    for epoch in range(epochs):
        
        epoch_loss = 0.0
        
        # Iterate over batches
        for X_batch, y_batch in get_batches(X_train, y_train, batch_size):
            
            # 1. Forward Pass (Graph built for current batch ONLY)
            X_batch_value = [list(map(Value, x_in)) for x_in in X_batch]
            ypred = [model(x) for x in X_batch_value] 
            
            # --- Loss Calculation ---
            
            loss_sum = Value(0.0)
            for yt, yout in zip(y_batch, ypred):
                # Ensure predicted_value is a single Value object 
                predicted_value = yout[0] if isinstance(yout, list) else yout 
                loss_sum += (predicted_value - Value(yt))**2
            data_loss = loss_sum
            
            # L2 Regularization Loss
            reg_loss = alpha * sum((p * p) for p in model.parameters())
            total_loss = data_loss + reg_loss
            
            # --- Backward Pass & Update ---
            
            for p in model.parameters(): p.grad = 0.0
            total_loss.backward()
            
            # Update parameters
            for p in model.parameters():
                p.data += -lr * p.grad
                
            epoch_loss += total_loss.data
            
        history.append(epoch_loss)

        if (epoch + 1) % 100 == 0 or epoch == epochs - 1:
            # --- Reporting / Evaluation ---
            X_epoch_value = [list(map(Value, x_in)) for x_in in X_train]
            y_pred_final = [model(x) for x in X_epoch_value] 
            
            loss_sum_final = Value(0.0)
            for yt, yout in zip(y_train, y_pred_final):
                predicted_value = yout[0] if isinstance(yout, list) else yout 
                loss_sum_final += (predicted_value - Value(yt))**2
                
            final_data_loss = loss_sum_final
            final_reg_loss = alpha * sum((p * p) for p in model.parameters())
            final_total_loss = final_data_loss + final_reg_loss
            
            y_out_data = np.array([(p[0] if isinstance(p, list) else p).data for p in y_pred_final])
            predictions = np.sign(y_out_data)
            accuracy = np.mean(predictions == y_train) * 100
            
            print(f"Epoch {epoch + 1:4d} | Total Loss: {final_total_loss.data:.4f} | Train Acc: {accuracy:.2f}%")


    # --- Visualization Functions ---

    def calculate_predictions(X_data, model):
        X_value = [list(map(Value, x_in)) for x_in in X_data]
        predictions = [model(x) for x in X_value]
        return np.array([(p[0] if isinstance(p, list) else p).data for p in predictions])

    # Plot Loss History
    plt.figure(figsize=(10, 4))
    plt.plot(history)
    plt.title(f"{dataset_name.upper()} Loss History (Final Loss: {final_total_loss.data:.4f})")
    plt.xlabel("Epoch")
    plt.ylabel("Epoch Total Loss (Sum of Batches)")
    plt.grid(True)
    plt.show()

    # Plot Decision Boundary 
    if X.shape[1] == 2:
        # Plotting for 2D data (Make Moons)
        h = 0.02
        x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
        y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

        grid_input = np.c_[xx.ravel(), yy.ravel()]
        Z = calculate_predictions(grid_input, model)
        Z = Z.reshape(xx.shape)

        plt.figure(figsize=(10, 6))
        plt.contourf(xx, yy, Z, cmap='coolwarm', alpha=0.8)
        plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='coolwarm', edgecolors='k')
        plt.title(f"{dataset_name.upper()} Decision Boundary (Batch Size: {batch_size})")
        plt.xlabel("Feature 1 (Scaled)")
        plt.ylabel("Feature 2 (Scaled)")
        plt.colorbar(label='Model Output (tanh)')
        plt.show()

    elif X.shape[1] == 4:
        # Plotting for 4D data (Iris) using the first 2 features for visualization
        h = 0.02
        x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
        y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

        # Fill non-visualized features with their training set mean
        grid_2d = np.c_[xx.ravel(), yy.ravel()]
        mean_features = X_train[:, 2:].mean(axis=0)
        grid_full = np.c_[grid_2d, np.tile(mean_features, (grid_2d.shape[0], 1))]

        Z = calculate_predictions(grid_full, model)
        Z = Z.reshape(xx.shape)
        feature_names = load_iris().feature_names

        plt.figure(figsize=(10, 6))
        plt.contourf(xx, yy, Z, cmap='coolwarm', alpha=0.8)
        plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='coolwarm', edgecolors='k')
        plt.title(f"{dataset_name.upper()} Decision Boundary (First 2 Features, Batch Size: {batch_size})")
        plt.xlabel(f"Feature 1: {feature_names[0]} (Scaled)")
        plt.ylabel(f"Feature 2: {feature_names[1]} (Scaled)")
        plt.colorbar(label='Model Output (tanh)')
        plt.show()


In [None]:
# 1. Make Moons (Non-Linear Classification)
X_moons, y_moons_int = make_moons(n_samples=200, noise=0.15, random_state=42)
y_moons = np.where(y_moons_int == 0, -1.0, 1.0).astype(float) 

train_and_visualize("make_moons", X_moons, y_moons, n_layers=2, n_neurons_per_layer=16, 
                        epochs=200, lr=0.05, batch_size=32)

In [None]:
# 2. Load Iris (Linearly Separable Classification)
iris = load_iris()
X_iris_full = iris.data
y_iris = np.where(iris.target == 0, 1.0, -1.0).astype(float)
train_and_visualize("iris", X_iris_full, y_iris, n_layers=1, n_neurons_per_layer=8, 
                        epochs=100, lr=0.01, batch_size=16)