<a href="https://colab.research.google.com/github/asheta66/Machine-Learning-2024/blob/main/ELM/FFNN_ELM_Classification_Updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# diabetes_ffnn_elm_fixed.py
# ------------------------------------------------------------
# Binary classification for Diabetes/Heart dataset with:
#   - Feedforward Neural Network (FFNN, sklearn MLPClassifier in a Pipeline)
#   - Extreme Learning Machine (ELM)
#   - Proper ROC/Confusion plots saved to ./figs
# ------------------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, RocCurveDisplay
)
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import Ridge

# ----------------------------
# Repro & I/O
# ----------------------------
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
os.makedirs("figs", exist_ok=True)

# ============================================================
# 1) Load data
# ============================================================
CSV_PATH = "diabetes.csv"   # or "heart.csv"
df = pd.read_csv(CSV_PATH)
df.columns = [c.strip() for c in df.columns]

# Auto-detect target column
if "Outcome" in df.columns:
    target = "Outcome"
else:
    raise ValueError("Target column 'Outcome' not found in the dataset.")

feature_cols = [c for c in df.columns if c != target]
X = df[feature_cols].copy()
y = df[target].astype(int).copy()

numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = [c for c in X.columns if c not in numeric_cols]

# ============================================================
# 2) Preprocessing
# ============================================================
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocess = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_cols),
        ("cat", categorical_transformer, categorical_cols)
    ],
    remainder="drop"
)

# ============================================================
# 3) ELM Classifier
# ============================================================
class ELMClassifier(BaseEstimator, ClassifierMixin):
    """
    Extreme Learning Machine (binary classification)
      - Random input → hidden weights
      - Supports multiple activations
      - Output weights via ridge regression
    """
    def __init__(self, n_hidden=500, activation="relu", alpha=1e-2, random_state=RANDOM_STATE):
        self.n_hidden = n_hidden
        self.activation = activation
        self.alpha = alpha
        self.random_state = random_state

    def _act(self, Z):
        if self.activation == "relu":
            return np.maximum(0.0, Z)
        elif self.activation == "sigmoid":
            return 1.0 / (1.0 + np.exp(-Z))
        elif self.activation == "tanh":
            return np.tanh(Z)
        elif self.activation == "linear":
            return Z
        else:
            raise ValueError(f"Unknown activation: {self.activation}")

    def fit(self, X, y):
        rng = np.random.default_rng(self.random_state)
        X = np.asarray(X)
        y = np.asarray(y).astype(float).reshape(-1, 1)
        n_features = X.shape[1]

        # Random projection
        self.W_ = rng.normal(loc=0.0, scale=1.0, size=(n_features, self.n_hidden))
        self.b_ = rng.normal(loc=0.0, scale=1.0, size=(self.n_hidden,))
        H = self._act(X @ self.W_ + self.b_)

        # Ridge regression on hidden outputs
        self.ridge_ = Ridge(alpha=self.alpha, fit_intercept=False, random_state=self.random_state)
        self.ridge_.fit(H, y)
        return self

    def decision_function(self, X):
        X = np.asarray(X)
        H = self._act(X @ self.W_ + self.b_)
        return self.ridge_.predict(H).ravel()

    def predict_proba(self, X):
        scores = self.decision_function(X)
        probs1 = 1.0 / (1.0 + np.exp(-np.clip(scores, -20, 20)))
        probs0 = 1.0 - probs1
        return np.vstack([probs0, probs1]).T

    def predict(self, X):
        return (self.predict_proba(X)[:, 1] >= 0.5).astype(int)

# ============================================================
# 4) Metrics Helper
# ============================================================
def compute_metrics(model_name, y_true, y_proba, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred)  # sensitivity
    f1 = f1_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_proba[:, 1])
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    spec = tn / (tn + fp) if (tn + fp) > 0 else float("nan")
    return {
        "model": model_name, "accuracy": acc, "precision": prec,
        "recall": rec, "specificity": spec, "f1": f1, "roc_auc": auc
    }

# ============================================================
# 5) Train/Test Split
# ============================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=RANDOM_STATE
)

# Pre-fit a transformer for ELM (we’ll reuse same preprocessing)
preprocess_fit = preprocess.fit(X_train, y_train)
Xtr_enc = preprocess_fit.transform(X_train)
Xte_enc = preprocess_fit.transform(X_test)

# ============================================================
# 6) Define Models
# ============================================================
ffnn = Pipeline(steps=[
    ("preprocess", preprocess),
    ("clf", MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation="relu",
        solver="adam",
        alpha=1e-3,
        max_iter=500,
        random_state=RANDOM_STATE,
        early_stopping=True,
        n_iter_no_change=15,
        validation_fraction=0.15,
        verbose=False
    ))
])

elm_configs = [
    {"n_hidden": 256,  "activation": "relu",    "alpha": 1e-2},
    {"n_hidden": 500,  "activation": "sigmoid", "alpha": 1e-2},
    {"n_hidden": 1000, "activation": "tanh",    "alpha": 1e-3},
    {"n_hidden": 2000, "activation": "relu",    "alpha": 1e-1},
]

# ============================================================
# 7) Train and Evaluate
# ============================================================
results = []

# ---- FFNN (Pipeline) ----
ffnn.fit(X_train, y_train)
ytr_ffnn_proba = ffnn.predict_proba(X_train)
yte_ffnn_proba = ffnn.predict_proba(X_test)
ytr_ffnn_pred  = ffnn.predict(X_train)
yte_ffnn_pred  = ffnn.predict(X_test)
results.append(compute_metrics("FFNN", y_test, yte_ffnn_proba, yte_ffnn_pred))

# ---- ELM: train multiple configs, keep BEST by test ROC AUC ----
best_elm = None
best_auc = -np.inf
best_name = None

for cfg in elm_configs:
    elm = ELMClassifier(**cfg, random_state=RANDOM_STATE)
    elm.fit(Xtr_enc, y_train)

    # train & test probabilities/preds
    ytr_elm_proba = elm.predict_proba(Xtr_enc)
    yte_elm_proba = elm.predict_proba(Xte_enc)
    ytr_elm_pred  = elm.predict(Xtr_enc)
    yte_elm_pred  = elm.predict(Xte_enc)

    name = f"ELM (h={cfg['n_hidden']}, act={cfg['activation']}, α={cfg['alpha']})"
    mt = compute_metrics(name, y_test, yte_elm_proba, yte_elm_pred)
    results.append(mt)

    if mt["roc_auc"] > best_auc:
        best_auc   = mt["roc_auc"]
        best_elm   = elm
        best_name  = name
        best_ytr_proba = ytr_elm_proba
        best_yte_proba = yte_elm_proba
        best_ytr_pred  = ytr_elm_pred
        best_yte_pred  = yte_elm_pred

print("Best ELM:", best_name, "| Test ROC AUC:", round(best_auc, 4))

# ============================================================
# 8) Plots (Confusions + Combined ROC)
# ============================================================
def plot_confusions(y_train, y_train_pred, y_test, y_test_pred, model_name, class_names=None):
    """Draw and save train/test confusion matrices for a model."""
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    for ax, (y_true, y_pred, title) in zip(
        axes,
        [(y_train, y_train_pred, "Train"), (y_test, y_test_pred, "Test")]
    ):
        cm = confusion_matrix(y_true, y_pred)
        ax.imshow(cm, cmap=plt.cm.Blues, interpolation='nearest')
        ax.set_title(f"{model_name} - {title}")
        ax.set_xlabel("Predicted")
        ax.set_ylabel("True")

        n_classes = cm.shape[0]
        ax.set_xticks(np.arange(n_classes))
        ax.set_yticks(np.arange(n_classes))
        if class_names is None:
            ax.set_xticklabels([f"{i}" for i in range(n_classes)], rotation=45, ha="right")
            ax.set_yticklabels([f"{i}" for i in range(n_classes)])
        else:
            ax.set_xticklabels(class_names, rotation=45, ha="right")
            ax.set_yticklabels(class_names)

        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j, i, str(cm[i, j]), ha='center', va='center',
                        color='green', fontsize=12, fontweight='bold')

    plt.tight_layout()
    out = f"figs/confusion_{model_name.lower().replace(' ', '_')}.png"
    plt.savefig(out, dpi=150)
    plt.close(fig)
    print(f"Saved confusion matrix figure for {model_name} -> {out}")

def plot_combined_rocs(
    y_train_ffnn, y_train_proba_ffnn, y_test_ffnn, y_test_proba_ffnn,
    y_train_elm,  y_train_proba_elm,  y_test_elm,  y_test_proba_elm
):
    """Draw FFNN and ELM ROC curves (Train & Test) side by side (binary)."""
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # FFNN
    RocCurveDisplay.from_predictions(y_train_ffnn, y_train_proba_ffnn[:, 1],
                                     name="FFNN Train", ax=axes[0])
    RocCurveDisplay.from_predictions(y_test_ffnn,  y_test_proba_ffnn[:, 1],
                                     name="FFNN Test",  ax=axes[0])
    axes[0].set_title("FFNN ROC (Train & Test)", fontsize=12)
    axes[0].legend(fontsize=10)

    # ELM
    RocCurveDisplay.from_predictions(y_train_elm, y_train_proba_elm[:, 1],
                                     name="ELM Train", ax=axes[1])
    RocCurveDisplay.from_predictions(y_test_elm,  y_test_proba_elm[:, 1],
                                     name="ELM Test",  ax=axes[1])
    axes[1].set_title("ELM ROC (Train & Test)", fontsize=12)
    axes[1].legend(fontsize=10)

    fig.suptitle("ROC Curves — FFNN vs Best ELM", fontsize=14)
    plt.tight_layout()
    out = "figs/roc_ffnn_bestelm_comparison.png"
    plt.savefig(out, dpi=150)
    plt.close(fig)
    print(f"Saved combined ROC figure -> {out}")

# — Confusions —
plot_confusions(y_train, ytr_ffnn_pred, y_test, yte_ffnn_pred, model_name="FFNN")
plot_confusions(y_train, best_ytr_pred,  y_test, best_yte_pred,  model_name="Best_ELM")

# — Combined ROC —
plot_combined_rocs(
    y_train, ytr_ffnn_proba, y_test, yte_ffnn_proba,
    y_train, best_ytr_proba,  y_test, best_yte_proba
)




In [24]:
# ============================================================
# 9) Results Table
# ============================================================
res_df = pd.DataFrame(results).sort_values(by="roc_auc", ascending=False)
print("\nModel scoreboard (sorted by Test ROC AUC):")
print(res_df.to_string(index=False))
res_df.to_csv("figs/results_ffnn_elm.csv", index=False)
print("Saved CSV -> figs/results_ffnn_elm.csv")


Model scoreboard (sorted by Test ROC AUC):
                           model  accuracy  precision   recall  specificity       f1  roc_auc
                            FFNN  0.766234   0.687500 0.611111         0.85 0.647059 0.836296
   ELM (h=256, act=relu, α=0.01)  0.493506   0.396552 0.851852         0.30 0.541176 0.713519
   ELM (h=2000, act=relu, α=0.1)  0.474026   0.378378 0.777778         0.31 0.509091 0.640556
ELM (h=500, act=sigmoid, α=0.01)  0.493506   0.388889 0.777778         0.34 0.518519 0.601296
 ELM (h=1000, act=tanh, α=0.001)  0.506494   0.390000 0.722222         0.39 0.506494 0.595926
Saved CSV -> figs/results_ffnn_elm.csv
