In [4]:
import ssl
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

# --- MAC SSL FIX ---
ssl._create_default_https_context = ssl._create_unverified_context
# -------------------
# ============================================
# 1. RAW MATH HELPER FUNCTIONS
# ============================================

def one_hot_encode(y, classes=10):
    """Converts (N,) labels to (N, C) one-hot matrix."""
    m = y.shape[0]
    one_hot = np.zeros((m, classes))
    for i in range(m):
        one_hot[i, int(y[i])] = 1
    return one_hot

def softmax(z):
    """Numerically stable softmax."""
    # Subtract max to prevent overflow/underflow
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    """Computes categorical cross-entropy."""
    m = y_true.shape[0]
    # Add epsilon to prevent log(0)
    loss = -np.sum(y_true * np.log(y_pred + 1e-15)) / m
    return loss

def accuracy_score(y_true_labels, y_pred_labels):
    return np.mean(y_true_labels == y_pred_labels)

# ============================================
# 2. LOGISTIC REGRESSION CLASS
# ============================================

class LogisticRegression:
    def __init__(self, learning_rate=0.1, iterations=1000, batch_size=None, verbose=False):
        self.lr = learning_rate
        self.epochs = iterations
        self.batch_size = batch_size
        self.verbose = verbose
        self.W = None
        self.b = None
        self.cost_history = []

    def fit(self, X, y):
        # Dimensions
        m, n = X.shape
        c = y.shape[1] # Number of classes (10 for MNIST)

        # Initialize weights (Xavier-like initialization)
        self.W = np.random.randn(n, c) * 0.01
        self.b = np.zeros((1, c))

        # Gradient Descent Loop
        for i in range(self.epochs):
            
            # --- Batch Handling (SGD vs GD) ---
            if self.batch_size:
                # Stochastic/Mini-batch Gradient Descent
                indices = np.random.permutation(m)
                X_shuffled = X[indices]
                y_shuffled = y[indices]
                
                for j in range(0, m, self.batch_size):
                    X_batch = X_shuffled[j:j+self.batch_size]
                    y_batch = y_shuffled[j:j+self.batch_size]
                    self._update_weights(X_batch, y_batch)
            else:
                # Full Batch Gradient Descent
                self._update_weights(X, y)

            # Record Cost (using full set for accuracy)
            if i % 10 == 0 or i == self.epochs - 1:
                Z = np.dot(X, self.W) + self.b
                A = softmax(Z)
                cost = cross_entropy_loss(y, A)
                self.cost_history.append(cost)
                
                if self.verbose and i % 50 == 0:
                    print(f"Epoch {i}: Cost {cost:.4f}")

    def _update_weights(self, X, y):
        m = X.shape[0]
        
        # Forward
        Z = np.dot(X, self.W) + self.b
        A = softmax(Z)
        
        # Backward
        dz = A - y
        dw = (1/m) * np.dot(X.T, dz)
        db = (1/m) * np.sum(dz, axis=0, keepdims=True)
        
        # Update
        self.W -= self.lr * dw
        self.b -= self.lr * db

    def predict_proba(self, X):
        Z = np.dot(X, self.W) + self.b
        return softmax(Z)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

    def score(self, X, y_true_labels):
        pred = self.predict(X)
        # Handle if y_true is one-hot or raw labels
        if y_true_labels.ndim > 1:
            y_true_labels = np.argmax(y_true_labels, axis=1)
        return accuracy_score(y_true_labels, pred)

# ============================================
# 3. UTILITY FUNCTIONS
# ============================================

def plot_cost_history(history, title):
    
    plt.figure(figsize=(10, 6))
    plt.plot(history, label="Training Cost")
    plt.title(title)
    plt.xlabel("Iterations (x10)")
    plt.ylabel("Cost")
    plt.grid(True)
    plt.legend()

def load_and_prep_mnist():
    print("   [System] Loading MNIST from Keras...")
    (X_train_raw, y_train_raw), (X_test_raw, y_test_raw) = mnist.load_data()

    # Flatten: (60000, 28, 28) -> (60000, 784)
    X_train = X_train_raw.reshape(X_train_raw.shape[0], -1)
    X_test = X_test_raw.reshape(X_test_raw.shape[0], -1)

    # Normalize: 0-255 -> 0-1
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    # One-Hot Encode Labels
    y_train_enc = one_hot_encode(y_train_raw)
    y_test_enc = one_hot_encode(y_test_raw)

    return X_train, y_train_enc, y_train_raw, X_test, y_test_enc, y_test_raw

# ============================================
# 4. MAIN EXECUTION
# ============================================

def main():
    print("\n" + "=" * 60)
    print("LOGISTIC REGRESSION (MNIST) - FROM SCRATCH")
    print("=" * 60 + "\n")

    # 1. Load Data
    X_train, y_train, y_train_labels, X_test, y_test, y_test_labels = load_and_prep_mnist()
    
    # Subsample for speed (Optional: Comment out to use full dataset)
    mask = np.random.choice(X_train.shape[0], 10000, replace=False)
    X_train = X_train[mask]
    y_train = y_train[mask]
    y_train_labels = y_train_labels[mask]

    print(f"Training Matrix: {X_train.shape}")
    print(f"Testing Matrix:  {X_test.shape}")

    # 2. Train Models
    print("\n" + "=" * 60)
    print("TRAINING MODELS")
    print("=" * 60)

    # A. Gradient Descent
    print("\n--- 1. Batch Gradient Descent ---")
    model_gd = LogisticRegression(learning_rate=0.1, iterations=200, verbose=True)
    model_gd.fit(X_train, y_train)

    # B. Stochastic Gradient Descent (Mini-Batch)
    print("\n--- 2. Mini-Batch SGD (Batch Size=64) ---")
    model_sgd = LogisticRegression(learning_rate=0.1, iterations=10, batch_size=64, verbose=True)
    model_sgd.fit(X_train, y_train)

    # 3. Evaluation
    print("\n" + "=" * 60)
    print("EVALUATION")
    print("=" * 60)

    for name, model in [("Gradient Descent", model_gd), ("Mini-Batch SGD", model_sgd)]:
        acc = model.score(X_test, y_test_labels)
        print(f"{name} Test Accuracy: {acc*100:.2f}%")

    # 4. Visualization
    print("\nGenerating Cost History Plot...")
    plot_cost_history(model_gd.cost_history, "Cost History (Batch GD)")
    plt.savefig("mnist_cost_history.png")
    print("Saved: mnist_cost_history.png")
    
    # Show predictions
    print("\nExample Predictions:")
    preds = model_gd.predict(X_test[:10])
    actual = y_test_labels[:10]
    print(f"Predicted: {preds}")
    print(f"Actual:    {actual}")

    return model_gd, model_sgd

if __name__ == "__main__":
    main()
    plt.show()

ModuleNotFoundError: No module named 'tensorflow'