#### ****All version Ensemble Model***

#### V1

In [None]:
import os
import json
import joblib
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.preprocessing import label_binarize
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"
ROOT_DIR = "/kaggle/working/Customized CNN"
OUT_DIR  = "/kaggle/working/Ensemble Model All Version/Version1"
os.makedirs(OUT_DIR, exist_ok=True)

MODEL_FILES = {
    "svm": f"{ROOT_DIR}/SVM/svm_final_trainval.joblib",
    "xgb": f"{ROOT_DIR}/XGB/xgb_final_trainval.joblib",
    "rf" : f"{ROOT_DIR}/RF/rf_final_trainval.joblib",
    "knn": f"{ROOT_DIR}/KNN/knn_final_trainval.joblib",
    "cat": f"{ROOT_DIR}/CAT/cat_final_trainval.cbm",
}

REPORT_JSON = os.path.join(OUT_DIR, "ensemble_v1_report.json")
META_MODEL  = os.path.join(OUT_DIR, "meta_logreg_v1_final.joblib")

rng = 42

def setup_plot_style():
    """Sets the global font style for all plots."""
    plt.rcParams.update({
        "font.family": "Times New Roman",
        "font.size": 12,
        "axes.labelsize": 16,
        "axes.titlesize": 18,
        "font.weight": "bold",
        "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):

    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)

    def wrap(lbl):
        p = str(lbl).split()
        return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])

    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)

    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )

    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold")
    ax.set_ylabel("Actual", weight="bold")

    ax.tick_params(axis='x', rotation=45, labelsize=12, width=1)
    ax.tick_params(axis='y', rotation=0, labelsize=12, width=1)
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_fontweight("bold")

    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str):
    setup_plot_style()
    n_classes = len(classes)

    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')

    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0])
    ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate')
    ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title('Receiver Operating Characteristic (ROC)')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, "roc_curve_test.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")


    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')

    ax_pr.set_xlim([0.0, 1.0])
    ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall')
    ax_pr.set_ylabel('Precision')
    ax_pr.set_title('Precision-Recall Curve')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, "pr_curve_test.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)

feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols:
    raise RuntimeError("No feature columns (f0, f1, ...) found in CSV.")

X = df[feat_cols].values.astype(np.float32)
y = df["class_idx"].values.astype(int)

classes = (
    df.sort_values("class_idx")[["class_idx", "label"]]
    .drop_duplicates()
    .sort_values("class_idx")["label"].tolist()
)
n_classes = len(classes)

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=rng
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.125, stratify=y_trainval, random_state=rng
)
sizes = {"train": X_train.shape[0], "val": X_val.shape[0], "test": X_test.shape[0]}
print("Split sizes:", sizes)

def load_bundle(p):
    try:
        if p.endswith('.cbm'):
            from catboost import CatBoostClassifier
            model = CatBoostClassifier()
            model.load_model(p)
            return model, None
        
        b = joblib.load(p)
        model = b["model"] if isinstance(b, dict) and "model" in b else b
        scaler = b.get("scaler") if isinstance(b, dict) else None
        return model, scaler

    except ValueError as e:
        if "incompatible dtype" in str(e):
            print(f"SKLEARN VERSION MISMATCH: Could not load model from {p}.")
            print("This is likely due to the model being saved with a different scikit-learn version.")
            print("Skipping this model for the ensemble.\n")
            return None, None
        else:
            raise e
    except Exception as e:
        print(f"An unexpected error occurred while loading {p}: {e}")
        return None, None


models: Dict[str, object] = {}
scalers: Dict[str, object] = {}
for name, p in MODEL_FILES.items():
    if os.path.exists(p):
        m, s = load_bundle(p)
        if m is not None:
            models[name] = m
            scalers[name] = s
    else:
        print(f"Warning: Model file not found for '{name}' at {p}. Skipping.")

model_order = [name for name in ["svm", "xgb", "rf", "knn", "cat"] if name in models]
print(f"Successfully loaded {len(model_order)} base models: {model_order}")

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_logits(model, Xs) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = model.decision_function(Xs)
        d = np.asarray(d)
        if d.ndim == 1:
            d = np.vstack([-d, d]).T
        return d.astype(np.float64)
    elif hasattr(model, "predict_proba"):
        p = np.asarray(model.predict_proba(Xs), dtype=np.float64)
        eps = 1e-12
        p = np.clip(p, eps, 1.0)
        return np.log(p)
    else:
        pred = model.predict(Xs)
        L = np.full((Xs.shape[0], n_classes), -10.0, dtype=np.float64)
        L[np.arange(Xs.shape[0]), pred.flatten()] = 10.0
        return L

def predict_proba_safe(model, Xs) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        return np.asarray(model.predict_proba(Xs), dtype=np.float64)
    else:
        logits = predict_logits(model, Xs)
        return softmax(logits)

def nll_from_logits(logits: np.ndarray, y_true: np.ndarray, T: float) -> float:
    pl = softmax(logits / T)
    return log_loss(y_true, pl, labels=np.arange(n_classes))

def fit_temperature(logits_val: np.ndarray, y_val: np.ndarray) -> float:
    grid = np.linspace(0.5, 5.0, 46)
    best_T, best_nll = 1.0, float("inf")
    for T in grid:
        nll = nll_from_logits(logits_val, y_val, T)
        if nll < best_nll:
            best_T, best_nll = T, nll
    return best_T

CALIBRATE_MODELS = {"svm", "knn"}

temperatures: Dict[str, float] = {}
for name in model_order:
    model, scaler = models[name], scalers[name]
    Xv_scaled = scaler.transform(X_val) if scaler else X_val
    if name in CALIBRATE_MODELS:
        logits_v = predict_logits(model, Xv_scaled)
        T = fit_temperature(logits_v, y_val)
        temperatures[name] = T
    else:
        temperatures[name] = 1.0
print("Optimal Temperatures:", temperatures)

def confidence_features(probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    eps = 1e-12
    p = np.clip(probs, eps, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    margin = (top2[:, 1] - top2[:, 0]).reshape(-1, 1)
    return ent, margin

def make_meta(Xs: np.ndarray) -> np.ndarray:
    blocks = []
    for name in model_order:
        model, scaler = models[name], scalers[name]
        Xs_scaled = scaler.transform(Xs) if scaler else Xs
        
        if name in CALIBRATE_MODELS:
            logits = predict_logits(model, Xs_scaled) / max(temperatures[name], 1e-6)
            probs  = softmax(logits)
        else:
            probs  = predict_proba_safe(model, Xs_scaled)
            
        ent, mar = confidence_features(probs)
        blocks.extend([probs, ent, mar])
    return np.hstack(blocks)

print("Creating meta-features...")
X_train_meta = make_meta(X_train)
X_val_meta   = make_meta(X_val)
X_test_meta  = make_meta(X_test)
print(f"Meta-feature shape: {X_train_meta.shape}")

meta_grid = [
    {"C": 0.1, "penalty": "l2", "solver": "lbfgs"},
    {"C": 0.5, "penalty": "l2", "solver": "lbfgs"},
    {"C": 1.0, "penalty": "l2", "solver": "lbfgs"},
    {"C": 2.0, "penalty": "l2", "solver": "lbfgs"},
    {"C": 5.0, "penalty": "l2", "solver": "lbfgs"},
]

search_rows = []
best = {"acc": -1, "params": None, "model": None}

for p in meta_grid:
    meta = LogisticRegression(
        multi_class="multinomial", max_iter=1000, n_jobs=-1, random_state=rng, **p
    )
    meta.fit(X_train_meta, y_train)
    yv_pred = meta.predict(X_val_meta)
    acc = accuracy_score(y_val, yv_pred)
    f1m = f1_score(y_val, yv_pred, average="macro")
    search_rows.append({"C": p["C"], "val_accuracy": acc, "val_f1_macro": f1m})
    if acc > best["acc"]:
        best.update({"acc": acc, "params": p, "model": meta})

pd.DataFrame(search_rows).sort_values("val_accuracy", ascending=False)\
  .to_csv(os.path.join(OUT_DIR, "meta_search_results.csv"), index=False)
print(f"VAL — Best meta-learner accuracy: {best['acc']:.4f} with params: {best['params']}")

meta_final = LogisticRegression(
    multi_class="multinomial", max_iter=1000, n_jobs=-1, random_state=rng, **best['params']
)
X_trainval_meta = np.vstack([X_train_meta, X_val_meta])
y_trainval_full = np.concatenate([y_train, y_val])

meta_final.fit(X_trainval_meta, y_trainval_full)
joblib.dump(meta_final, META_MODEL)

test_pred = meta_final.predict(X_test_meta)
test_proba = meta_final.predict_proba(X_test_meta)
test_acc  = accuracy_score(y_test, test_pred)
test_f1m  = f1_score(y_test, test_pred, average="macro")
print(f"TEST — Accuracy: {test_acc:.4f}, Macro F1-Score: {test_f1m:.4f}")

rep_val  = classification_report(y_val, best["model"].predict(X_val_meta), target_names=classes, output_dict=True)
rep_test = classification_report(y_test, test_pred, target_names=classes, output_dict=True)

cm_test = confusion_matrix(y_test, test_pred)
pd.DataFrame(cm_test, index=classes, columns=classes).to_csv(os.path.join(OUT_DIR, "cm_test_meta.csv"))
plot_confusion_matrix(
    cm=cm_test,
    classes=classes,
    output_path=os.path.join(OUT_DIR, "confusion_matrix_test.png"),
    title="Confusion Matrix - Ensemble V1 (Test)"
)

y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, test_proba, classes, OUT_DIR)


with open(REPORT_JSON, "w") as f:
    json.dump({
        "ensemble_version": "V1",
        "description": "Stacked generalization with confidence features and temperature scaling.",
        "split_sizes": sizes,
        "temperatures": temperatures,
        "base_models_order": model_order,
        "meta_best_params": best["params"],
        "val": {"accuracy": float(best["acc"]), "report": rep_val},
        "test": {"accuracy": float(test_acc), "f1_macro": float(test_f1m), "report": rep_test},
    }, f, indent=4)

print("\nSaved all V1 artifacts to:", OUT_DIR)
print("Meta model saved to:", META_MODEL)
print("Final report JSON saved to:", REPORT_JSON)

Split sizes: {'train': 6300, 'val': 900, 'test': 1800}


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


SKLEARN VERSION MISMATCH: Could not load model from /kaggle/working/Customized CNN/RF/rf_final_trainval.joblib.
This is likely due to the model being saved with a different scikit-learn version.
Skipping this model for the ensemble.

Successfully loaded 4 base models: ['svm', 'xgb', 'knn', 'cat']
Optimal Temperatures: {'svm': 0.5, 'xgb': 1.0, 'knn': 0.5, 'cat': 1.0}
Creating meta-features...
Meta-feature shape: (6300, 32)
VAL — Best meta-learner accuracy: 1.0000 with params: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
TEST — Accuracy: 0.9922, Macro F1-Score: 0.9922
Saved confusion matrix to /kaggle/working/Ensemble Model All Version/Version1/confusion_matrix_test.png
Saved ROC curve to /kaggle/working/Ensemble Model All Version/Version1/roc_curve_test.png
Saved PR curve to /kaggle/working/Ensemble Model All Version/Version1/pr_curve_test.png

Saved all V1 artifacts to: /kaggle/working/Ensemble Model All Version/Version1
Meta model saved to: /kaggle/working/Ensemble Model All Version

#### V2

In [None]:
import os
import json
import joblib
import warnings
import numpy as np
import pandas as pd
from itertools import product
from typing import Dict, Tuple, List

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap


CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"
ROOT_DIR = "/kaggle/working/Customized CNN"
OUT_DIR  = "/kaggle/working/Ensemble Model All Version/Version2"
os.makedirs(OUT_DIR, exist_ok=True)

rng = 42

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)

    def wrap(lbl):
        p = str(lbl).split()
        return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])

    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)

    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_fontweight("bold")

    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)

    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols:
    raise RuntimeError("No feature columns (f0, f1, ...) found.")

X = df[feat_cols].values.astype(np.float32)
y = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=rng
)
sizes = {"trainval": X_trainval.shape[0], "test": X_test.shape[0]}
print("Split sizes:", sizes)

def build_svm():
    return Pipeline([
        ("scaler", StandardScaler()),
        ("clf", SVC(kernel="rbf", C=8.0, gamma="scale", probability=True, random_state=rng))
    ])

def build_xgb():
    return XGBClassifier(
        objective="multi:softprob", num_class=n_classes, eval_metric="mlogloss",
        tree_method="hist", random_state=rng, n_estimators=800, max_depth=4,
        learning_rate=0.05, subsample=0.9, colsample_bytree=0.9,
        reg_lambda=1.5, reg_alpha=0.0
    )

def build_rf():
    return RandomForestClassifier(
        n_estimators=800, max_depth=None, min_samples_leaf=1,
        n_jobs=-1, random_state=rng
    )

def build_knn():
    return Pipeline([
        ("scaler", StandardScaler()),
        ("clf", KNeighborsClassifier(n_neighbors=7, weights="distance", metric="minkowski"))
    ])

def build_cat():
    return CatBoostClassifier(
        n_estimators=1000, learning_rate=0.05, random_seed=rng,
        verbose=0, loss_function='MultiClass'
    )

BASE_BUILDERS = {"svm": build_svm, "xgb": build_xgb, "rf": build_rf, "knn": build_knn, "cat": build_cat}
base_names = list(BASE_BUILDERS.keys())

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_proba_safe(model, Xs) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        return np.asarray(model.predict_proba(Xs), dtype=np.float64)
    elif hasattr(model, "decision_function"):
        d = model.decision_function(Xs)
        d = np.asarray(d)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return softmax(d.astype(np.float64))
    else:
        pred = model.predict(Xs)
        p = np.zeros((Xs.shape[0], n_classes), dtype=np.float64)
        p[np.arange(Xs.shape[0]), pred] = 1.0
        return p

def entropy_and_margin(probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    eps = 1e-12
    p = np.clip(probs, eps, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    mar = (top2[:, 1] - top2[:, 0]).reshape(-1, 1)
    return ent, mar

def disagreement_count(prob_blocks: Dict[str, np.ndarray]) -> np.ndarray:
    preds = np.column_stack([pb.argmax(1) for pb in prob_blocks.values()])
    maj = [np.sum(row != np.unique(row, return_counts=True)[0][np.argmax(np.unique(row, return_counts=True)[1])]) for row in preds]
    return np.array(maj, dtype=np.int32).reshape(-1, 1)

CALIBRATE = {"svm", "knn"}
def fit_temperature_from_probs(p_oof: np.ndarray, y_true: np.ndarray) -> float:
    grid = np.linspace(0.5, 5.0, 46)
    bestT, bestNLL = 1.0, float("inf")
    eps = 1e-12
    logits = np.log(np.clip(p_oof, eps, 1.0))
    for T in grid:
        pT = softmax(logits / T)
        nll = log_loss(y_true, pT, labels=np.arange(n_classes))
        if nll < bestNLL:
            bestNLL, bestT = nll, T
    return bestT

def apply_temperature(probs: np.ndarray, T: float) -> np.ndarray:
    logits = np.log(np.clip(probs, 1e-12, 1.0))
    return softmax(logits / max(T, 1e-6))

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng)
oof_probs = {k: np.zeros((X_trainval.shape[0], n_classes)) for k in base_names}

print("Starting Out-of-Fold predictions...")
for fold, (tr_idx, val_idx) in enumerate(kf.split(X_trainval, y_trainval), 1):
    Xtr, Xva = X_trainval[tr_idx], X_trainval[val_idx]
    ytr, yva = y_trainval[tr_idx], y_trainval[val_idx]
    for name in base_names:
        mdl = BASE_BUILDERS[name]()
        mdl.fit(Xtr, ytr)
        oof_probs[name][val_idx] = predict_proba_safe(mdl, Xva)
    print(f"Fold {fold} complete.")

temperatures = {name: fit_temperature_from_probs(oof_probs[name], y_trainval) if name in CALIBRATE else 1.0 for name in base_names}
print("Optimal Temperatures:", temperatures)
oof_probs_cal = {name: apply_temperature(p, temperatures[name]) for name, p in oof_probs.items()}

def make_meta_from_blocks(prob_blocks: Dict[str, np.ndarray]) -> np.ndarray:
    ents, mars, mats = [], [], []
    for name in base_names:
        e, m = entropy_and_margin(prob_blocks[name])
        ents.append(e); mars.append(m); mats.append(prob_blocks[name])
    dis = disagreement_count(prob_blocks)
    return np.hstack(mats + ents + mars + [dis])

X_meta_oof = make_meta_from_blocks(oof_probs_cal)
print(f"OOF Meta-feature shape: {X_meta_oof.shape}")

meta_xgb = XGBClassifier(
    objective="multi:softprob", num_class=n_classes, eval_metric="mlogloss",
    tree_method="hist", random_state=rng, n_estimators=1200, max_depth=4,
    learning_rate=0.03, subsample=0.9, colsample_bytree=0.9
)
meta_xgb.fit(X_meta_oof, y_trainval)

def best_simplex_weights(prob_blocks, y_true, step=0.1):
    grid = np.arange(0, 1 + 1e-9, step)
    best_acc, best_w = -1.0, None
    for w in product(grid, repeat=len(base_names)):
        if abs(sum(w) - 1.0) > 1e-9: continue
        P = sum(wi * prob_blocks[nm] for wi, nm in zip(w, base_names))
        acc = accuracy_score(y_true, P.argmax(1))
        if acc > best_acc:
            best_acc, best_w = acc, w
    return best_acc, best_w

blend_acc_oof, blend_w = best_simplex_weights(oof_probs_cal, y_trainval, step=0.1)
print(f"OOF blend best acc: {blend_acc_oof:.4f}, weights: {dict(zip(base_names, blend_w))}")

print("Refitting base models on full trainval set...")
test_probs_cal = {}
for name in base_names:
    mdl_full = BASE_BUILDERS[name]()
    mdl_full.fit(X_trainval, y_trainval)
    ptest = predict_proba_safe(mdl_full, X_test)
    test_probs_cal[name] = apply_temperature(ptest, temperatures[name])

X_meta_test = make_meta_from_blocks(test_probs_cal)


ptest_meta = meta_xgb.predict_proba(X_meta_test)
pred_meta = ptest_meta.argmax(1)
acc_meta = accuracy_score(y_test, pred_meta)
f1_meta = f1_score(y_test, pred_meta, average="macro")

ptest_blend = sum(wi * test_probs_cal[nm] for wi, nm in zip(blend_w, base_names))
pred_blend = ptest_blend.argmax(1)
acc_blend = accuracy_score(y_test, pred_blend)
f1_blend = f1_score(y_test, pred_blend, average="macro")

oof_accs = {nm: accuracy_score(y_trainval, oof_probs_cal[nm].argmax(1)) for nm in base_names}
best_base = max(oof_accs, key=oof_accs.get)

lams, best_lam, best_mix_acc = np.linspace(0, 1, 21), 0.0, -1.0
p_meta_oof = meta_xgb.predict_proba(X_meta_oof)
for lam in lams:
    Pmix = lam * p_meta_oof + (1 - lam) * oof_probs_cal[best_base]
    acc = accuracy_score(y_trainval, Pmix.argmax(1))
    if acc > best_mix_acc:
        best_mix_acc, best_lam = acc, lam

ptest_mix = best_lam * ptest_meta + (1 - best_lam) * test_probs_cal[best_base]
pred_mix = ptest_mix.argmax(1)
acc_mix = accuracy_score(y_test, pred_mix)
f1_mix = f1_score(y_test, pred_mix, average="macro")


candidates = {
    "meta_xgb": (acc_meta, f1_meta, pred_meta, ptest_meta),
    "blend": (acc_blend, f1_blend, pred_blend, ptest_blend),
    f"mix_meta_{best_base}": (acc_mix, f1_mix, pred_mix, ptest_mix)
}
winner_name = max(candidates.items(), key=lambda kv: kv[1][0])[0]
w_acc, w_f1, w_pred, w_proba = candidates[winner_name]

print(f"\n--- TEST RESULTS ---")
print(f"Meta-XGB: acc={acc_meta:.4f}, f1_macro={f1_meta:.4f}")
print(f"Blend   : acc={acc_blend:.4f}, f1_macro={f1_blend:.4f}")
print(f"Mix     : acc={acc_mix:.4f}, f1_macro={f1_mix:.4f} (λ={best_lam:.2f}, base={best_base})")
print(f"\nWINNER: {winner_name} with Accuracy = {w_acc:.4f}, F1-Macro = {w_f1:.4f}")

rep = classification_report(y_test, w_pred, target_names=classes, output_dict=True)
pd.DataFrame(rep).transpose().to_csv(os.path.join(OUT_DIR, f"report_{winner_name}.csv"))

cm = confusion_matrix(y_test, w_pred)
plot_confusion_matrix(cm, classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"Confusion Matrix - {winner_name}")

y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, w_proba, classes, OUT_DIR, winner_name)

with open(os.path.join(OUT_DIR, "summary_v2.json"), "w") as f:
    json.dump({
        "ensemble_version": "V2", "winner": winner_name,
        "test_metrics": {k: {"acc": float(v[0]), "f1_macro": float(v[1])} for k, v in candidates.items()},
        "temperatures": temperatures,
        "blend_weights": dict(zip(base_names, [float(x) for x in blend_w])),
        "mix_lambda": float(best_lam), "mix_best_base": best_base
    }, f, indent=4)
joblib.dump(meta_xgb, os.path.join(OUT_DIR, "meta_xgb_v2.joblib"))

print(f"\nSaved all V2 artifacts to: {OUT_DIR}")

Split sizes: {'trainval': 7200, 'test': 1800}
Starting Out-of-Fold predictions...
Fold 1 complete.
Fold 2 complete.
Fold 3 complete.
Fold 4 complete.
Fold 5 complete.
Optimal Temperatures: {'svm': 0.8, 'xgb': 1.0, 'rf': 1.0, 'knn': 0.5, 'cat': 1.0}
OOF Meta-feature shape: (7200, 41)
OOF blend best acc: 0.9912, weights: {'svm': 0.4, 'xgb': 0.0, 'rf': 0.1, 'knn': 0.2, 'cat': 0.30000000000000004}
Refitting base models on full trainval set...

--- TEST RESULTS ---
Meta-XGB: acc=0.9894, f1_macro=0.9894
Blend   : acc=0.9911, f1_macro=0.9911
Mix     : acc=0.9900, f1_macro=0.9900 (λ=0.60, base=svm)

WINNER: blend with Accuracy = 0.9911, F1-Macro = 0.9911
Saved confusion matrix to /kaggle/working/Ensemble Model All Version/Version2/cm_blend.png
Saved ROC curve to /kaggle/working/Ensemble Model All Version/Version2/roc_curve_blend.png
Saved PR curve to /kaggle/working/Ensemble Model All Version/Version2/pr_curve_blend.png

Saved all V2 artifacts to: /kaggle/working/Ensemble Model All Version/Ver

#### V3

In [None]:
import os
import json
import warnings
import joblib
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List
from itertools import combinations
from functools import partial

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from scipy.optimize import minimize

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

HAS_XGB = False
HAS_LGBM = False
HAS_CAT = False
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except ImportError:
    pass
try:
    import lightgbm as lgb
    HAS_LGBM = True
except ImportError:
    pass
try:
    from catboost import CatBoostClassifier
    HAS_CAT = True
except ImportError:
    pass

warnings.filterwarnings("ignore", category=UserWarning)

CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"
ROOT_DIR = "/kaggle/working/Customized CNN"
OUT_DIR  = "/kaggle/working/Ensemble Model All Version/Version3"
os.makedirs(OUT_DIR, exist_ok=True)

SAVED_MODELS = {
    "svm": f"{ROOT_DIR}/SVM/svm_final_trainval.joblib",
    "xgb": f"{ROOT_DIR}/XGB/xgb_final_trainval.joblib",
    "rf" : f"{ROOT_DIR}/RF/rf_final_trainval.joblib",
    "knn": f"{ROOT_DIR}/KNN/knn_final_trainval.joblib",
    "cat": f"{ROOT_DIR}/CAT/cat_final_trainval.cbm",
}

rng = 42

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)

    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols: raise RuntimeError("No feature columns (f0, f1, ...) found.")
X = df[feat_cols].values.astype(np.float32)
y = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)

X_trval, X_test, y_trval, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=rng)
print("Split sizes:", {"trainval": len(y_trval), "test": len(y_test)})

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_logits(model, Xs) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = model.decision_function(Xs)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.asarray(d, dtype=np.float64)
    elif hasattr(model, "predict_proba"):
        return np.log(np.clip(model.predict_proba(Xs), 1e-12, 1.0))
    else:
        pred = model.predict(Xs)
        L = np.full((Xs.shape[0], n_classes), -10.0, dtype=np.float64)
        L[np.arange(Xs.shape[0]), pred] = 10.0
        return L

def predict_proba_safe(model, Xs) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        return np.asarray(model.predict_proba(Xs), dtype=np.float64)
    return softmax(predict_logits(model, Xs))

def entropy_margin(probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = np.clip(probs, 1e-12, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    mar = (top2[:, 1] - top2[:, 0]).reshape(-1, 1)
    return ent, mar

def js_divergence(p: np.ndarray, q: np.ndarray) -> np.ndarray:
    p = np.clip(p, 1e-12, 1.0); q = np.clip(q, 1e-12, 1.0)
    m = 0.5 * (p + q)
    kl_pm = (p * (np.log(p) - np.log(m))).sum(axis=1)
    kl_qm = (q * (np.log(q) - np.log(m))).sum(axis=1)
    return (0.5 * (kl_pm + kl_qm)).reshape(-1, 1)

def disagreement_count(prob_blocks: Dict[str, np.ndarray]) -> np.ndarray:
    preds = np.stack([np.argmax(v, axis=1) for v in prob_blocks.values()], axis=1)
    out = [((row != np.unique(row, return_counts=True)[0][np.argmax(np.unique(row, return_counts=True)[1])]).sum(),) for row in preds]
    return np.array(out, dtype=np.float64)

def temperature_from_logits(logits: np.ndarray, y_true: np.ndarray) -> float:
    def nll(T):
        return log_loss(y_true, softmax(logits / float(T[0])), labels=np.arange(n_classes))
    r = minimize(nll, x0=[1.0], bounds=[(0.2, 5.0)], method="L-BFGS-B")
    return float(np.clip(r.x[0], 0.2, 5.0))

def make_base(name: str):
    if name == "svm": return Pipeline([("scaler", StandardScaler()), ("clf", SVC(C=8.0, kernel="rbf", probability=True, random_state=rng))])
    if name == "rf": return RandomForestClassifier(n_estimators=600, n_jobs=-1, random_state=rng)
    if name == "xgb" and HAS_XGB: return XGBClassifier(objective="multi:softprob", num_class=n_classes, tree_method="hist", n_estimators=800, learning_rate=0.05, max_depth=6, subsample=0.9, colsample_bytree=0.9, random_state=rng, n_jobs=-1)
    if name == "knn": return Pipeline([("scaler", StandardScaler()), ("clf", KNeighborsClassifier(n_neighbors=5, weights="distance"))])
    if name == "cat" and HAS_CAT: return CatBoostClassifier(loss_function="MultiClass", iterations=800, depth=6, learning_rate=0.06, random_seed=rng, verbose=False)
    if name == "lgbm" and HAS_LGBM: return lgb.LGBMClassifier(objective="multiclass", num_class=n_classes, n_estimators=900, learning_rate=0.05, subsample=0.9, colsample_bytree=0.9, random_state=rng, n_jobs=-1)
    raise ValueError(f"Unknown or unavailable base model: {name}")

def try_load_or_build(path: str, name: str):
    if path and os.path.exists(path):
        try:
            model_obj = joblib.load(path) if not path.endswith('.cbm') else make_base(name).load_model(path)
            print(f"Successfully loaded '{name}' from file.")
            return model_obj
        except Exception as e:
            print(f"Failed to load '{name}' from {path} (Reason: {e}). Retraining from scratch.")
    return make_base(name)

base_names: List[str] = ["svm", "rf", "knn"]
if HAS_XGB: base_names.insert(1, "xgb")
if HAS_CAT: base_names.append("cat")
if HAS_LGBM: base_names.append("lgbm")

bases: Dict[str, object] = {}
for b_name in base_names:
    model_path = SAVED_MODELS.get(b_name, "")
    est = try_load_or_build(model_path, b_name)
    bases[b_name] = est["model"] if isinstance(est, dict) and "model" in est else est
print("Using base models:", list(bases.keys()))

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng)
oof_probs = {b: np.zeros((len(y_trval), n_classes)) for b in bases}
oof_logits = {b: np.zeros((len(y_trval), n_classes)) for b in bases}

print("Starting Out-of-Fold predictions...")
for k, (tr_idx, va_idx) in enumerate(skf.split(X_trval, y_trval), 1):
    Xtr, Xva = X_trval[tr_idx], X_trval[va_idx]
    ytr = y_trval[tr_idx]
    for b_name, est_template in bases.items():
        est = make_base(b_name) # Retrain fresh model each fold
        est.fit(Xtr, ytr)
        oof_probs[b_name][va_idx] = predict_proba_safe(est, Xva)
        oof_logits[b_name][va_idx] = predict_logits(est, Xva)
    print(f"Fold {k} complete.")

CALIBRATE = {"svm", "knn"}
temperatures = {b: temperature_from_logits(oof_logits[b], y_trval) if b in CALIBRATE else 1.0 for b in bases}
print("Optimal Temperatures:", temperatures)
oof_probs_cal = {b: softmax(oof_logits[b] / T) for b, T in temperatures.items()}

def make_meta_features(prob_blocks: Dict[str, np.ndarray]) -> np.ndarray:
    names = sorted(prob_blocks.keys())
    features = [prob_blocks[n] for n in names]
    for n in names:
        ent, mar = entropy_margin(prob_blocks[n])
        features.extend([ent, mar])
    features.append(disagreement_count(prob_blocks))
    for n1, n2 in combinations(names, 2):
        features.append(js_divergence(prob_blocks[n1], prob_blocks[n2]))
    return np.hstack(features)

X_meta_oof = make_meta_features(oof_probs_cal)
print(f"OOF Meta-feature shape: {X_meta_oof.shape}")

meta_learners = {
    "logreg": LogisticRegression(multi_class="multinomial", solver="lbfgs", C=1.0, random_state=rng, n_jobs=-1, max_iter=1000),
    "mlp": MLPClassifier(hidden_layer_sizes=(128, 64), activation="relu", solver="adam", random_state=rng, max_iter=500)
}
if HAS_XGB: meta_learners["xgb"] = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=500, learning_rate=0.05, max_depth=4, random_state=rng, n_jobs=-1)

oof_meta_acc = {}
for name, model in meta_learners.items():
    model.fit(X_meta_oof, y_trval)
    oof_preds = model.predict(X_meta_oof)
    acc = accuracy_score(y_trval, oof_preds)
    oof_meta_acc[name] = acc
    print(f"OOF Meta-learner '{name}' accuracy: {acc:.4f}")
best_meta_name = max(oof_meta_acc, key=oof_meta_acc.get)
best_meta_model = meta_learners[best_meta_name]
print(f"Best OOF meta-learner: '{best_meta_name}'")

def nll_blend_loss(weights, prob_blocks, y_true):
    probs = sum(w * prob_blocks[n] for w, n in zip(weights, sorted(prob_blocks.keys())))
    return log_loss(y_true, probs, labels=np.arange(n_classes))

initial_weights = np.ones(len(bases)) / len(bases)
bounds = [(0, 1)] * len(bases)
constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})
res = minimize(nll_blend_loss, initial_weights, args=(oof_probs_cal, y_trval), method='SLSQP', bounds=bounds, constraints=constraints)
blend_weights = res.x
blend_names = sorted(bases.keys())
print("Optimized Blend Weights:", {n: f"{w:.4f}" for n, w in zip(blend_names, blend_weights)})

print("Refitting base models on full trainval set for test prediction...")
test_probs = {b: predict_proba_safe(bases[b].fit(X_trval, y_trval), X_test) for b in bases}
test_logits = {b: predict_logits(bases[b], X_test) for b in bases}
test_probs_cal = {b: softmax(test_logits[b] / temperatures[b]) for b in bases}

X_meta_test = make_meta_features(test_probs_cal)

ptest_meta = best_meta_model.predict_proba(X_meta_test)
pred_meta = ptest_meta.argmax(1)
acc_meta = accuracy_score(y_test, pred_meta)
f1_meta = f1_score(y_test, pred_meta, average="macro")

ptest_blend = sum(w * test_probs_cal[n] for w, n in zip(blend_weights, blend_names))
pred_blend = ptest_blend.argmax(1)
acc_blend = accuracy_score(y_test, pred_blend)
f1_blend = f1_score(y_test, pred_blend, average="macro")

lams, best_lam, best_mix_acc = np.linspace(0, 1, 21), 0.0, -1.0
p_meta_oof_preds = best_meta_model.predict_proba(X_meta_oof)
p_blend_oof = sum(w * oof_probs_cal[n] for w, n in zip(blend_weights, blend_names))
for lam in lams:
    acc = accuracy_score(y_trval, (lam * p_meta_oof_preds + (1 - lam) * p_blend_oof).argmax(1))
    if acc > best_mix_acc: best_mix_acc, best_lam = acc, lam
ptest_mix = best_lam * ptest_meta + (1 - best_lam) * ptest_blend
pred_mix = ptest_mix.argmax(1)
acc_mix = accuracy_score(y_test, pred_mix)
f1_mix = f1_score(y_test, pred_mix, average="macro")


candidates = {
    best_meta_name: (acc_meta, f1_meta, pred_meta, ptest_meta),
    "blend": (acc_blend, f1_blend, pred_blend, ptest_blend),
    f"mix_{best_meta_name}_blend": (acc_mix, f1_mix, pred_mix, ptest_mix)
}
winner_name = max(candidates.items(), key=lambda kv: kv[1][0])[0]
w_acc, w_f1, w_pred, w_proba = candidates[winner_name]

print(f"\n--- TEST RESULTS ---")
print(f"{best_meta_name}: acc={acc_meta:.4f}, f1_macro={f1_meta:.4f}")
print(f"Blend: acc={acc_blend:.4f}, f1_macro={f1_blend:.4f}")
print(f"Mix: acc={acc_mix:.4f}, f1_macro={f1_mix:.4f} (λ={best_lam:.2f})")
print(f"\nWINNER: {winner_name} with Accuracy = {w_acc:.4f}, F1-Macro = {w_f1:.4f}")

rep = classification_report(y_test, w_pred, target_names=classes, output_dict=True)
pd.DataFrame(rep).transpose().to_csv(os.path.join(OUT_DIR, f"report_{winner_name}.csv"))

cm = confusion_matrix(y_test, w_pred)
plot_confusion_matrix(cm, classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"Confusion Matrix - {winner_name}")

y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, w_proba, classes, OUT_DIR, winner_name)

summary = {
    "ensemble_version": "V3", "winner": winner_name,
    "test_metrics": {k: {"acc": float(v[0]), "f1_macro": float(v[1])} for k, v in candidates.items()},
    "temperatures": temperatures,
    "blend_weights": {n: w for n, w in zip(blend_names, blend_weights)},
    "mix_lambda": float(best_lam)
}
with open(os.path.join(OUT_DIR, "summary_v3.json"), "w") as f: json.dump(summary, f, indent=4)
joblib.dump(best_meta_model, os.path.join(OUT_DIR, f"meta_model_{best_meta_name}_v3.joblib"))
print(f"\nSaved all V3 artifacts to: {OUT_DIR}")


Split sizes: {'trainval': 160, 'test': 40}
Successfully loaded 'svm' from file.
Successfully loaded 'xgb' from file.
Successfully loaded 'rf' from file.
Successfully loaded 'knn' from file.
Successfully loaded 'cat' from file.
Using base models: ['svm', 'xgb', 'rf', 'knn', 'cat', 'lgbm']
Starting Out-of-Fold predictions...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001011 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11264
[LightGBM] [Info] Number of data points in the train set: 128, number of used features: 256
[LightGBM] [Info] Start training from score -1.519826
[LightGBM] [Info] Start training from score -1.556193
[LightGBM] [Info] Start training from score -1.673976
[LightGBM] [Info] Start training from score -1.673976
[LightGBM] [Info] Start training from score -1.633154
Fold 1 complete.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000934 seco

CatBoostError: To employ param {'use_best_model': True} provide non-empty 'eval_set'.

#### V4

In [None]:
import os
import json
import warnings
import joblib
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List
from itertools import combinations
from functools import partial

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

# Optional libs
HAS_XGB = False
HAS_LGBM = False
HAS_CAT = False
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except ImportError: pass
try:
    import lightgbm as lgb
    HAS_LGBM = True
except ImportError: pass
try:
    from catboost import CatBoostClassifier
    HAS_CAT = True
except ImportError: pass

warnings.filterwarnings("ignore", category=UserWarning)


if not os.path.exists("features_256d_efficientnet.csv"):
    from sklearn.datasets import make_classification
    print("Creating dummy dataset...")
    X_dummy, y_dummy = make_classification(n_samples=1000, n_features=256, n_informative=20, n_redundant=10, n_classes=4, n_clusters_per_class=2, random_state=42)
    dummy_df = pd.DataFrame(X_dummy, columns=[f'f{i}' for i in range(256)])
    dummy_df['class_idx'] = y_dummy
    class_map = {0: 'Class A', 1: 'Class B', 2: 'Class C', 3: 'Class D'}
    dummy_df['label'] = dummy_df['class_idx'].map(class_map)
    dummy_df.to_csv("features_256d_efficientnet.csv", index=False)
    CSV_PATH = "features_256d_efficientnet.csv"
else:
    CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"


OUT_DIR  = "/kaggle/working/Ensemble_Model_All_Version/Version4"
os.makedirs(OUT_DIR, exist_ok=True)
rng = 42

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)
    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols: raise RuntimeError("No feature columns found.")
X = df[feat_cols].values.astype(np.float32)
y = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)

X_trval, X_test, y_trval, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=rng)
print("Split sizes:", {"trainval": len(y_trval), "test": len(y_test)})

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_logits(model, Xs) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = model.decision_function(Xs)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.asarray(d, dtype=np.float64)
    if hasattr(model, "predict_proba"):
        return np.log(np.clip(model.predict_proba(Xs), 1e-12, 1.0))
    pred = model.predict(Xs)
    L = np.full((Xs.shape[0], n_classes), -10.0)
    L[np.arange(Xs.shape[0]), pred] = 10.0
    return L

def entropy_margin(probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = np.clip(probs, 1e-12, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    mar = (top2[:, 1] - top2[:, 0]).reshape(-1, 1)
    return ent, mar

def js_divergence(p: np.ndarray, q: np.ndarray) -> np.ndarray:
    p = np.clip(p, 1e-12, 1.0); q = np.clip(q, 1e-12, 1.0)
    m = 0.5 * (p + q)
    kl_pm = (p * (np.log(p) - np.log(m))).sum(axis=1)
    kl_qm = (q * (np.log(q) - np.log(m))).sum(axis=1)
    return (0.5 * (kl_pm + kl_qm)).reshape(-1,1)

def disagreement_count(prob_blocks: Dict[str, np.ndarray]) -> np.ndarray:
    preds = np.stack([np.argmax(v, axis=1) for v in prob_blocks.values()], axis=1)
    out = [((row != np.unique(row, return_counts=True)[0][np.argmax(np.unique(row, return_counts=True)[1])]).sum(),) for row in preds]
    return np.array(out, dtype=np.float64)

def fit_temperature(logits: np.ndarray, y_true: np.ndarray) -> float:
    def nll(T):
        return log_loss(y_true, softmax(logits / float(T[0])), labels=np.arange(n_classes))
    res = minimize(nll, x0=[1.0], bounds=[(0.2, 5.0)], method="L-BFGS-B")
    return float(np.clip(res.x[0], 0.2, 5.0))

def make_base(name: str):
    if name == "svm": return Pipeline([("scaler", StandardScaler()), ("clf", SVC(C=8.0, kernel="rbf", probability=True, random_state=rng))])
    if name == "rf": return RandomForestClassifier(n_estimators=800, n_jobs=-1, random_state=rng)
    if name == "xgb" and HAS_XGB: return XGBClassifier(objective="multi:softprob", num_class=n_classes, tree_method="hist", n_estimators=900, learning_rate=0.05, max_depth=6, subsample=0.9, colsample_bytree=0.9, random_state=rng, n_jobs=-1)
    if name == "knn": return Pipeline([("scaler", StandardScaler()), ("clf", KNeighborsClassifier(n_neighbors=5, weights="distance"))])
    if name == "cat" and HAS_CAT: return CatBoostClassifier(loss_function="MultiClass", iterations=900, depth=6, learning_rate=0.06, random_seed=rng, verbose=False)
    if name == "lgbm" and HAS_LGBM: return lgb.LGBMClassifier(objective="multiclass", num_class=n_classes, n_estimators=900, learning_rate=0.05, subsample=0.9, colsample_bytree=0.9, random_state=rng, n_jobs=-1)
    raise ValueError(f"Unknown or unavailable base model: {name}")

base_names: List[str] = ["svm", "rf", "knn"]
if HAS_XGB: base_names.insert(1, "xgb")
if HAS_CAT: base_names.append("cat")
if HAS_LGBM: base_names.append("lgbm")
print("Using base models:", base_names)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng)
oof_probs = {b: np.zeros((len(y_trval), n_classes)) for b in base_names}
oof_logits = {b: np.zeros((len(y_trval), n_classes)) for b in base_names}

print("Starting Out-of-Fold predictions...")
for k, (tr_idx, va_idx) in enumerate(skf.split(X_trval, y_trval), 1):
    Xtr, Xva, ytr = X_trval[tr_idx], X_trval[va_idx], y_trval[tr_idx]
    for b_name in base_names:
        print(f"  > Fitting {b_name} on fold {k}...")
        est = make_base(b_name)
        est.fit(Xtr, ytr)
        oof_logits[b_name][va_idx] = predict_logits(est, Xva)
        oof_probs[b_name][va_idx] = softmax(oof_logits[b_name][va_idx])
    print(f"Fold {k} complete.")

temperatures = {b: fit_temperature(oof_logits[b], y_trval) for b in base_names}
print("Optimal Temperatures:", temperatures)
oof_probs_cal = {b: softmax(oof_logits[b] / T) for b, T in temperatures.items()}

def build_meta_features(prob_map: Dict[str, np.ndarray]) -> np.ndarray:
    blocks = []
    for b in base_names:
        p = prob_map[b]
        ent, mar = entropy_margin(p)
        blocks.extend([p, ent, mar, p.max(axis=1, keepdims=True), np.argmax(p, axis=1).reshape(-1,1).astype(float)])
    for b1, b2 in combinations(base_names, 2):
        blocks.append(js_divergence(prob_map[b1], prob_map[b2]))
    blocks.append(disagreement_count(prob_map))
    return np.hstack(blocks)

X_meta_oof = build_meta_features(oof_probs_cal)
print(f"OOF Meta-feature shape: {X_meta_oof.shape}")

X_meta_tr, X_meta_val, y_meta_tr, y_meta_val, idx_meta_tr, idx_meta_val = train_test_split(
    X_meta_oof, y_trval, np.arange(len(y_trval)), test_size=0.10, stratify=y_trval, random_state=rng
)

cands = []
print("Tuning Logistic Regression...")
for C in [0.2, 0.5, 1.0, 2.0]:
    meta = LogisticRegression(C=C, multi_class="multinomial", max_iter=1500, n_jobs=-1, random_state=rng)
    meta.fit(X_meta_tr, y_meta_tr)
    cands.append(("meta_logreg", accuracy_score(y_meta_val, meta.predict(X_meta_val)), {"C":C}, meta))

print("Tuning MLP...")
for width in [64, 128]:
    meta = MLPClassifier(hidden_layer_sizes=(width,), max_iter=200, random_state=rng)
    meta.fit(X_meta_tr, y_meta_tr)
    cands.append(("meta_mlp", accuracy_score(y_meta_val, meta.predict(X_meta_val)), {"width":width}, meta))

if HAS_XGB:
    print("Tuning XGBoost...")
    for md in [4, 6]:
        meta = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=400, learning_rate=0.05, max_depth=md, random_state=rng, n_jobs=-1)
        meta.fit(X_meta_tr, y_meta_tr)
        cands.append(("meta_xgb", accuracy_score(y_meta_val, meta.predict(X_meta_val)), {"max_depth":md}, meta))

def blend_objective(w, prob_map, y_true):
    w_norm = np.clip(w, 0, 1) / (np.sum(np.clip(w, 0, 1)) + 1e-12)
    p_blend = sum(wi * prob_map[b] for wi, b in zip(w_norm, base_names))
    return log_loss(y_true, p_blend, labels=np.arange(n_classes))

print("Finding optimal blend weights...")
oof_probs_cal_val = {b: p[idx_meta_val] for b, p in oof_probs_cal.items()}
res = minimize(
    blend_objective,
    x0=np.ones(len(base_names)) / len(base_names),
    args=(oof_probs_cal_val, y_meta_val),
    method="SLSQP",
    bounds=[(0.0, 1.0)]*len(base_names),
    constraints=({'type':'eq','fun':lambda w: np.sum(w)-1.0})
)
blend_w = res.x / res.x.sum()
p_blend_val = sum(wi * oof_probs_cal_val[b] for wi, b in zip(blend_w, base_names))
cands.append(("blend", accuracy_score(y_meta_val, p_blend_val.argmax(1)), {"weights": dict(zip(base_names, blend_w))}, None))

cands.sort(key=lambda x: x[1], reverse=True)
winner_name, winner_acc, winner_params, winner_model = cands[0]
print(f"WINNING META STRATEGY ON VALIDATION: '{winner_name}' with acc={winner_acc:.5f}")

print("Refitting base models and final meta-learner...")
final_bases = {b: make_base(b).fit(X_trval, y_trval) for b in base_names}
test_logits = {b: predict_logits(est, X_test) for b, est in final_bases.items()}
test_probs_cal = {b: softmax(test_logits[b] / T) for b, T in temperatures.items()}
X_meta_test = build_meta_features(test_probs_cal)

if winner_name == "blend":
    y_pred_proba = sum(wi * test_probs_cal[b] for wi, b in zip(blend_w, base_names))
    y_pred = y_pred_proba.argmax(1)
else:
    print(f"Retraining winning model ({winner_name}) on full OOF data...")
    winner_model.fit(X_meta_oof, y_trval)
    y_pred = winner_model.predict(X_meta_test)
    y_pred_proba = winner_model.predict_proba(X_meta_test)

test_acc = accuracy_score(y_test, y_pred)
test_f1m = f1_score(y_test, y_pred, average="macro")
print(f"\n--- FINAL TEST RESULTS ({winner_name}) ---")
print(f"Accuracy: {test_acc:.4f}, Macro F1-Score: {test_f1m:.4f}")

cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm, classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"Confusion Matrix - {winner_name} (Test)")
y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, y_pred_proba, classes, OUT_DIR, winner_name)

summary = {
    "ensemble_version": "V4", "winner": winner_name, "winner_val_acc": winner_acc,
    "winner_params": {k: (list(v) if isinstance(v, np.ndarray) else v) for k, v in winner_params.items()},
    "test_accuracy": test_acc, "test_f1_macro": test_f1m, "temperatures": temperatures
}
with open(os.path.join(OUT_DIR, "summary_v4.json"), "w") as f: json.dump(summary, f, indent=4)
if winner_name != "blend":
    joblib.dump(winner_model, os.path.join(OUT_DIR, f"meta_model_{winner_name}_v4.joblib"))
else:
    joblib.dump(winner_params, os.path.join(OUT_DIR, "blend_weights_v4.joblib"))

print(f"\nSaved all V4 artifacts to: {OUT_DIR}")

Split sizes: {'trainval': 7200, 'test': 1800}
Using base models: ['svm', 'xgb', 'rf', 'knn', 'cat', 'lgbm']
Starting Out-of-Fold predictions...
  > Fitting svm on fold 1...
  > Fitting xgb on fold 1...
  > Fitting rf on fold 1...
  > Fitting knn on fold 1...
  > Fitting cat on fold 1...
  > Fitting lgbm on fold 1...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015910 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 64168
[LightGBM] [Info] Number of data points in the train set: 5760, number of used features: 256
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
Fold 1 complete.
  > Fitting svm on fold 2...
  > Fitting xgb on fol

#### V5

In [None]:
import os
import json
import joblib
import warnings
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

try:
    from catboost import CatBoostClassifier
    HAS_CAT = True
except ImportError:
    HAS_CAT = False

warnings.filterwarnings("ignore", category=UserWarning)

CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"
OUT_DIR  = "/kaggle/working/Ensemble Model All Version/Version5"
os.makedirs(OUT_DIR, exist_ok=True)

MODEL_FILES = {
    "svm": "/kaggle/working/Customized CNN/SVM/svm_final_trainval.joblib",
    "xgb": "/kaggle/working/Customized CNN/XGB/xgb_final_trainval.joblib",
    "rf" : "/kaggle/working/Customized CNN/RF/rf_final_trainval.joblib",
    "knn": "/kaggle/working/Customized CNN/KNN/knn_final_trainval.joblib",
    "cat": "/kaggle/working/Customized CNN/CAT/cat_final_trainval.cbm",
}

rng = 42
np.random.seed(rng)

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)
    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols: raise RuntimeError("No feature columns found.")
X_all = df[feat_cols].values.astype(np.float32)
y_all = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)

X_trval, X_test, y_trval, y_test = train_test_split(
    X_all, y_all, test_size=0.20, stratify=y_all, random_state=rng
)
print("Split sizes:", {"trainval": len(y_trval), "test": len(y_test)})

def load_bundle_safe(name: str, path: str):
    try:
        if name == "cat" and HAS_CAT and path.lower().endswith(".cbm"):
            m = CatBoostClassifier(); m.load_model(path)
            return m, None
        obj = joblib.load(path)
        if isinstance(obj, dict) and "model" in obj:
            return obj["model"], obj.get("scaler")
        return obj, None
    except Exception as e:
        print(f"[WARN] Could not load '{name}' from {path}: {e}")
        return None, None

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_logits(model, X) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = model.decision_function(X)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.asarray(d, dtype=np.float64)
    elif hasattr(model, "predict_proba"):
        return np.log(np.clip(model.predict_proba(X), 1e-12, 1.0))
    else:
        pred = model.predict(X)
        L = np.full((X.shape[0], n_classes), -10.0)
        L[np.arange(X.shape[0]), pred] = 10.0
        return L

def tta_probs(model, X, n=6, seed=42):
    rng_local = np.random.default_rng(seed)
    Ps = [predict_proba_safe(model, X)] # Include original
    for _ in range(n - 1):
        X_aug = X * (1.0 + rng_local.normal(0, 0.01, X.shape)) + rng_local.normal(0, 0.005, X.shape)
        Ps.append(predict_proba_safe(model, X_aug))
    return np.mean(Ps, axis=0)

def fit_temperature(logits_val: np.ndarray, y_val: np.ndarray) -> float:
    from scipy.optimize import minimize
    def nll(T): return log_loss(y_val, softmax(logits_val / T[0]), labels=np.arange(n_classes))
    res = minimize(nll, [1.0], method='L-BFGS-B', bounds=[(0.1, 5.0)])
    return float(res.x[0])

def entropy_and_margin(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = np.clip(P, 1e-12, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    return ent, (top2[:, 1] - top2[:, 0]).reshape(-1, 1)

models, scalers = {}, {}
for name, path in MODEL_FILES.items():
    m, s = load_bundle_safe(name, path)
    if m is not None:
        models[name], scalers[name] = m, s
base_names = sorted(models.keys())
if len(base_names) < 2: raise RuntimeError("Need at least 2 base models.")
print("Using base models:", base_names)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng)
oof_logits = {b: np.zeros((len(y_trval), n_classes)) for b in base_names}

print("Starting Out-of-Fold predictions with TTA...")
for fold, (tr_idx, val_idx) in enumerate(skf.split(X_trval, y_trval), 1):
    for b in base_names:
        model, scaler = models[b], scalers[b]
        Xva_s = scaler.transform(X_trval[val_idx]) if scaler else X_trval[val_idx]
        Pva = tta_probs(model, Xva_s, n=6, seed=rng + fold)
        oof_logits[b][val_idx] = np.log(np.clip(Pva, 1e-12, 1.0))
    print(f"Fold {fold} complete.")

test_logits = {}
for b in base_names:
    model, scaler = models[b], scalers[b]
    Xte_s = scaler.transform(X_test) if scaler else X_test
    Pte = tta_probs(model, Xte_s, n=6, seed=rng + 99)
    test_logits[b] = np.log(np.clip(Pte, 1e-12, 1.0))

temperatures = {b: fit_temperature(oof_logits[b], y_trval) for b in base_names}
print("Optimal Temperatures:", {k: round(v, 4) for k, v in temperatures.items()})
oof_probs_cal = {b: softmax(oof_logits[b] / T) for b, T in temperatures.items()}
test_probs_cal = {b: softmax(test_logits[b] / T) for b, T in temperatures.items()}

def build_meta_features(prob_map: Dict[str, np.ndarray]) -> np.ndarray:
    blocks = []
    for b in base_names:
        P = prob_map[b]; ent, mar = entropy_and_margin(P)
        blocks.extend([P, ent, mar])
    return np.hstack(blocks)

raw_scaler = StandardScaler().fit(X_trval)
X_meta_oof = np.hstack([build_meta_features(oof_probs_cal), raw_scaler.transform(X_trval)])
X_meta_test = np.hstack([build_meta_features(test_probs_cal), raw_scaler.transform(X_test)])
print(f"Meta-feature shape: {X_meta_oof.shape}")

maj_pred = np.mean([oof_probs_cal[b] for b in base_names], axis=0).argmax(axis=1)
w_oof = 1.0 + (maj_pred != y_trval).astype(float)
X_meta_tr, X_meta_val, y_meta_tr, y_meta_val, w_tr, _ = train_test_split(
    X_meta_oof, y_trval, w_oof, test_size=0.10, stratify=y_trval, random_state=rng
)

cands = []

for C in [0.2, 0.5, 1.0, 2.0]:
    m = LogisticRegression(C=C, multi_class="multinomial", max_iter=2000, n_jobs=-1, random_state=rng)
    m.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr)
    cands.append(("meta_logreg", accuracy_score(y_meta_val, m.predict(X_meta_val)), {"C": C}, m))

mxgb = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=700, max_depth=6, learning_rate=0.05, random_state=rng)
mxgb.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr)
cands.append(("meta_xgb", accuracy_score(y_meta_val, mxgb.predict(X_meta_val)), {"max_depth": 6}, mxgb))

if HAS_CAT:
    mcat = CatBoostClassifier(loss_function="MultiClass", iterations=800, depth=6, random_seed=rng, verbose=False)
    mcat.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr, eval_set=(X_meta_val, y_meta_val), use_best_model=False, verbose=False)
    cands.append(("meta_cat", accuracy_score(y_meta_val, mcat.predict(X_meta_val).astype(int).ravel()), {}, mcat))

best_name, _, best_par, _ = sorted(cands, key=lambda t: t[1], reverse=True)[0]
print(f"Best meta-learner on validation: {best_name}")

if best_name == "meta_logreg":
    meta_final = LogisticRegression(C=best_par["C"], multi_class="multinomial", max_iter=3000, n_jobs=-1, random_state=rng)
elif best_name == "meta_xgb":
    meta_final = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=900, max_depth=6, learning_rate=0.04, random_state=rng)
else: 
    meta_final = CatBoostClassifier(loss_function="MultiClass", iterations=1000, depth=6, learning_rate=0.07, random_seed=rng, verbose=False)
meta_final.fit(X_meta_oof, y_trval, sample_weight=w_oof)

def simplex_project(v: np.ndarray):
    v = np.maximum(v, 0); s = v.sum()
    return v / s if s > 0 else np.ones_like(v) / len(v)
def evaluate_blend(weights, P_map, y_true):
    P = sum(w * P_map[b] for w, b in zip(weights, base_names))
    return accuracy_score(y_true, P.argmax(axis=1))

best_w, best_acc = np.ones(len(base_names)) / len(base_names), 0
for _ in range(4000):
    w = np.random.dirichlet(alpha=np.ones(len(base_names)))
    acc = evaluate_blend(w, oof_probs_cal, y_trval)
    if acc > best_acc: best_acc, best_w = acc, w
for _ in range(600):
    w = simplex_project(best_w + np.random.normal(0, 0.02, size=len(base_names)))
    acc = evaluate_blend(w, oof_probs_cal, y_trval)
    if acc > best_acc: best_acc, best_w = acc, w

w_blend_dict = {b: w for b, w in zip(base_names, best_w)}
print(f"OOF blend best acc: {best_acc:.4f}")

y_pred_meta_proba = meta_final.predict_proba(X_meta_test)
y_pred_meta = y_pred_meta_proba.argmax(axis=1)
acc_meta = accuracy_score(y_test, y_pred_meta)
f1_meta = f1_score(y_test, y_pred_meta, average="macro")
print(f"TEST — Meta ({best_name}): acc={acc_meta:.4f}, f1_macro={f1_meta:.4f}")

y_pred_blend_proba = sum(w * test_probs_cal[b] for w, b in zip(best_w, base_names))
y_pred_blend = y_pred_blend_proba.argmax(axis=1)
acc_blend = accuracy_score(y_test, y_pred_blend)
f1_blend = f1_score(y_test, y_pred_blend, average="macro")
print(f"TEST — Blend: acc={acc_blend:.4f}, f1_macro={f1_blend:.4f}")

if acc_meta > acc_blend:
    winner_name, winner_acc, winner_f1, winner_pred, winner_proba = "meta", acc_meta, f1_meta, y_pred_meta, y_pred_meta_proba
else:
    winner_name, winner_acc, winner_f1, winner_pred, winner_proba = "blend", acc_blend, f1_blend, y_pred_blend, y_pred_blend_proba
print(f"\nWINNER: {winner_name.upper()} with Accuracy = {winner_acc:.4f}, F1-Macro = {winner_f1:.4f}")

cm = confusion_matrix(y_test, winner_pred)
plot_confusion_matrix(cm, classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"Confusion Matrix - {winner_name.upper()} (Test)")

y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, winner_proba, classes, OUT_DIR, winner_name)

summary = {
    "version": "V5", "winner": winner_name,
    "test_scores": {"accuracy": winner_acc, "f1_macro": winner_f1},
    "meta_details": {"best_learner": best_name, "test_acc": acc_meta},
    "blend_details": {"oof_acc": best_acc, "weights": w_blend_dict, "test_acc": acc_blend},
    "temperatures": temperatures
}
with open(os.path.join(OUT_DIR, "summary_v5.json"), "w") as f: json.dump(summary, f, indent=4)
joblib.dump(meta_final, os.path.join(OUT_DIR, f"meta_model_{best_name}_v5.joblib"))

print(f"\nSaved all V5 artifacts to: {OUT_DIR}")

Split sizes: {'trainval': 7200, 'test': 1800}
[WARN] Could not load 'rf' from /kaggle/working/Customized CNN/RF/rf_final_trainval.joblib: node array from the pickle has an incompatible dtype:
- expected: [('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]
- got     : {'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'], 'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'], 'offsets': [0, 8, 16, 24, 32, 40, 48, 56], 'itemsize': 64}
Using base models: ['cat', 'knn', 'svm', 'xgb']
Starting Out-of-Fold predictions with TTA...
Fold 1 complete.
Fold 2 complete.
Fold 3 complete.
Fold 4 complete.
Fold 5 complete.
Optimal Temperatures: {'cat': 1.0211, 'knn': 1.0, 'svm': 0.6902, 'xgb': 0.1}
Meta-feature shape: (7200, 288)
Best meta-learner on validation: meta_xgb
OOF blend 

#### V6

In [None]:
import os
import json
import joblib
import warnings
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, precision_recall_curve, auc
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

try:
    from catboost import CatBoostClassifier
    HAS_CAT = True
except ImportError:
    HAS_CAT = False

warnings.filterwarnings("ignore", category=UserWarning)

CSV_PATH = "/kaggle/working/features_256d_efficientnet.csv"
OUT_DIR  = "/kaggle/working/Ensemble Model All Version/Version6"
os.makedirs(OUT_DIR, exist_ok=True)

MODEL_FILES = {
    "svm": "/kaggle/working/Customized CNN/SVM/svm_final_trainval.joblib",
    "xgb": "/kaggle/working/Customized CNN/XGB/xgb_final_trainval.joblib",
    "rf" : "/kaggle/working/Customized CNN/RF/rf_final_trainval.joblib",
    "knn": "/kaggle/working/Customized CNN/KNN/knn_final_trainval.joblib",
    "cat": "/kaggle/working/Customized CNN/CAT/cat_final_trainval.cbm",
}

rng = 42
np.random.seed(rng)

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)

    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.2f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols: raise RuntimeError("No feature columns found.")
X_all = df[feat_cols].values.astype(np.float32)
y_all = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)

X_trval, X_test, y_trval, y_test = train_test_split(
    X_all, y_all, test_size=0.20, stratify=y_all, random_state=rng
)
print("Split sizes:", {"trainval": len(y_trval), "test": len(y_test)})

def load_bundle_safe(name: str, path: str):
    try:
        if name == "cat" and HAS_CAT and path.lower().endswith(".cbm"):
            m = CatBoostClassifier(); m.load_model(path)
            return m, None
        obj = joblib.load(path)
        if isinstance(obj, dict) and "model" in obj:
            return obj["model"], obj.get("scaler")
        return obj, None
    except Exception as e:
        print(f"[WARN] Could not load '{name}' from {path}: {e}")
        return None, None

def softmax(z: np.ndarray) -> np.ndarray:
    z = z - np.max(z, axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / np.sum(ez, axis=1, keepdims=True)

def predict_logits(model, X) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = model.decision_function(X)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.asarray(d, dtype=np.float64)
    elif hasattr(model, "predict_proba"):
        return np.log(np.clip(model.predict_proba(X), 1e-12, 1.0))
    else:
        pred = model.predict(X)
        L = np.full((X.shape[0], n_classes), -10.0)
        L[np.arange(X.shape[0]), pred] = 10.0
        return L
        
def predict_proba_safe(model, X) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        p = model.predict_proba(X)
        if p.ndim == 1: p = np.vstack([1-p, p]).T
        return np.asarray(p, dtype=np.float64)
    else:
        return softmax(predict_logits(model, X))

def tta_probs(model, X, n=6, seed=42):
    rng_local = np.random.default_rng(seed)
    Ps = [predict_proba_safe(model, X)] 
    for _ in range(n - 1):
        X_aug = X * (1.0 + rng_local.normal(0, 0.01, X.shape)) + rng_local.normal(0, 0.005, X.shape)
        Ps.append(predict_proba_safe(model, X_aug))
    return np.mean(Ps, axis=0)

def fit_temperature(logits_val: np.ndarray, y_val: np.ndarray) -> float:
    from scipy.optimize import minimize
    def nll(T): return log_loss(y_val, softmax(logits_val / T[0]), labels=np.arange(n_classes))
    res = minimize(nll, [1.0], method='L-BFGS-B', bounds=[(0.1, 5.0)])
    return float(res.x[0])

def entropy_and_margin(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = np.clip(P, 1e-12, 1.0)
    ent = -(p * np.log(p)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    return ent, (top2[:, 1] - top2[:, 0]).reshape(-1, 1)

models, scalers = {}, {}
for name, path in MODEL_FILES.items():
    m, s = load_bundle_safe(name, path)
    if m is not None:
        models[name], scalers[name] = m, s
base_names = sorted(models.keys())
if len(base_names) < 2: raise RuntimeError("Need at least 2 base models.")
print("Using base models:", base_names)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=rng)
oof_logits = {b: np.zeros((len(y_trval), n_classes)) for b in base_names}

print("Starting Out-of-Fold predictions with TTA...")
for fold, (tr_idx, val_idx) in enumerate(skf.split(X_trval, y_trval), 1):
    for b in base_names:
        model, scaler = models[b], scalers[b]
        Xva_s = scaler.transform(X_trval[val_idx]) if scaler else X_trval[val_idx]
        Pva = tta_probs(model, Xva_s, n=6, seed=rng + fold)
        oof_logits[b][val_idx] = np.log(np.clip(Pva, 1e-12, 1.0))
    print(f"Fold {fold} complete.")

test_logits = {}
for b in base_names:
    model, scaler = models[b], scalers[b]
    Xte_s = scaler.transform(X_test) if scaler else X_test
    Pte = tta_probs(model, Xte_s, n=6, seed=rng + 99)
    test_logits[b] = np.log(np.clip(Pte, 1e-12, 1.0))

temperatures = {b: fit_temperature(oof_logits[b], y_trval) for b in base_names}
print("Optimal Temperatures:", {k: round(v, 4) for k, v in temperatures.items()})
oof_probs_cal = {b: softmax(oof_logits[b] / T) for b, T in temperatures.items()}
test_probs_cal = {b: softmax(test_logits[b] / T) for b, T in temperatures.items()}

def build_meta_features(prob_map: Dict[str, np.ndarray]) -> np.ndarray:
    blocks = []
    for b in base_names:
        P = prob_map[b]; ent, mar = entropy_and_margin(P)
        blocks.extend([P, ent, mar])
    return np.hstack(blocks)

raw_scaler = StandardScaler().fit(X_trval)
X_meta_oof = np.hstack([build_meta_features(oof_probs_cal), raw_scaler.transform(X_trval)])
X_meta_test = np.hstack([build_meta_features(test_probs_cal), raw_scaler.transform(X_test)])
print(f"Meta-feature shape: {X_meta_oof.shape}")

maj_pred = np.mean([oof_probs_cal[b] for b in base_names], axis=0).argmax(axis=1)
w_oof = 1.0 + (maj_pred != y_trval).astype(float)
X_meta_tr, X_meta_val, y_meta_tr, y_meta_val, w_tr, _ = train_test_split(
    X_meta_oof, y_trval, w_oof, test_size=0.10, stratify=y_trval, random_state=rng
)

cands = []

for C in [0.2, 0.5, 1.0, 2.0]:
    m = LogisticRegression(C=C, multi_class="multinomial", max_iter=2000, n_jobs=-1, random_state=rng)
    m.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr)
    cands.append(("meta_logreg", accuracy_score(y_meta_val, m.predict(X_meta_val)), {"C": C}, m))

mxgb = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=700, max_depth=6, learning_rate=0.05, random_state=rng)
mxgb.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr)
cands.append(("meta_xgb", accuracy_score(y_meta_val, mxgb.predict(X_meta_val)), {"max_depth": 6}, mxgb))

if HAS_CAT:
    mcat = CatBoostClassifier(loss_function="MultiClass", iterations=800, depth=6, random_seed=rng, verbose=False)
    mcat.fit(X_meta_tr, y_meta_tr, sample_weight=w_tr, eval_set=(X_meta_val, y_meta_val), use_best_model=False, verbose=False)
    cands.append(("meta_cat", accuracy_score(y_meta_val, mcat.predict(X_meta_val).astype(int).ravel()), {}, mcat))

best_name, _, best_par, _ = sorted(cands, key=lambda t: t[1], reverse=True)[0]
print(f"Best meta-learner on validation: {best_name}")

if best_name == "meta_logreg":
    meta_final = LogisticRegression(C=best_par["C"], multi_class="multinomial", max_iter=3000, n_jobs=-1, random_state=rng)
elif best_name == "meta_xgb":
    meta_final = XGBClassifier(objective="multi:softprob", num_class=n_classes, n_estimators=900, max_depth=6, learning_rate=0.04, random_state=rng)
else: # CatBoost
    meta_final = CatBoostClassifier(loss_function="MultiClass", iterations=1000, depth=6, learning_rate=0.07, random_seed=rng, verbose=False)
meta_final.fit(X_meta_oof, y_trval, sample_weight=w_oof)

def simplex_project(v: np.ndarray):
    v = np.maximum(v, 0); s = v.sum()
    return v / s if s > 0 else np.ones_like(v) / len(v)

def evaluate_blend(weights, P_map, y_true):
    P = sum(w * P_map[b] for w, b in zip(weights, base_names))
    return accuracy_score(y_true, P.argmax(axis=1))

best_w, best_acc = np.ones(len(base_names)) / len(base_names), 0
for _ in range(4000):
    w = np.random.dirichlet(alpha=np.ones(len(base_names)))
    acc = evaluate_blend(w, oof_probs_cal, y_trval)
    if acc > best_acc: best_acc, best_w = acc, w
for _ in range(600): 
    w = simplex_project(best_w + np.random.normal(0, 0.02, size=len(base_names)))
    acc = evaluate_blend(w, oof_probs_cal, y_trval)
    if acc > best_acc: best_acc, best_w = acc, w

w_blend_dict = {b: w for b, w in zip(base_names, best_w)}
print(f"OOF blend best acc: {best_acc:.4f}")

y_pred_meta_proba = meta_final.predict_proba(X_meta_test)
y_pred_meta = y_pred_meta_proba.argmax(axis=1)
acc_meta = accuracy_score(y_test, y_pred_meta)
f1_meta = f1_score(y_test, y_pred_meta, average="macro")
print(f"TEST — Meta ({best_name}): acc={acc_meta:.4f}, f1_macro={f1_meta:.4f}")

y_pred_blend_proba = sum(w * test_probs_cal[b] for w, b in zip(best_w, base_names))
y_pred_blend = y_pred_blend_proba.argmax(axis=1)
acc_blend = accuracy_score(y_test, y_pred_blend)
f1_blend = f1_score(y_test, y_pred_blend, average="macro")
print(f"TEST — Blend: acc={acc_blend:.4f}, f1_macro={f1_blend:.4f}")

if acc_meta > acc_blend:
    winner_name, winner_acc, winner_f1, winner_pred, winner_proba = "meta", acc_meta, f1_meta, y_pred_meta, y_pred_meta_proba
else:
    winner_name, winner_acc, winner_f1, winner_pred, winner_proba = "blend", acc_blend, f1_blend, y_pred_blend, y_pred_blend_proba
print(f"\nWINNER: {winner_name.upper()} with Accuracy = {winner_acc:.4f}, F1-Macro = {winner_f1:.4f}")

cm = confusion_matrix(y_test, winner_pred)
plot_confusion_matrix(cm, classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"Confusion Matrix - {winner_name.upper()} (Test)")

y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
plot_roc_pr_curves(y_test_bin, winner_proba, classes, OUT_DIR, winner_name)

summary = {
    "version": "V6", "winner": winner_name,
    "test_scores": {"accuracy": winner_acc, "f1_macro": winner_f1},
    "meta_details": {"best_learner": best_name, "test_acc": acc_meta},
    "blend_details": {"oof_acc": best_acc, "weights": w_blend_dict, "test_acc": acc_blend},
    "temperatures": temperatures
}
with open(os.path.join(OUT_DIR, "summary_v6.json"), "w") as f: json.dump(summary, f, indent=4)
if best_name != "blend": # Save the final meta model
    joblib.dump(meta_final, os.path.join(OUT_DIR, f"meta_model_{best_name}_v6.joblib"))

print(f"\nSaved all V6 artifacts to: {OUT_DIR}")


Split sizes: {'trainval': 7200, 'test': 1800}
[WARN] Could not load 'rf' from /kaggle/working/Customized CNN/RF/rf_final_trainval.joblib: node array from the pickle has an incompatible dtype:
- expected: [('left_child', '<i8'), ('right_child', '<i8'), ('feature', '<i8'), ('threshold', '<f8'), ('impurity', '<f8'), ('n_node_samples', '<i8'), ('weighted_n_node_samples', '<f8')]
- got     : {'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'], 'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'], 'offsets': [0, 8, 16, 24, 32, 40, 48, 56], 'itemsize': 64}
Using base models: ['cat', 'knn', 'svm', 'xgb']
Starting Out-of-Fold predictions with TTA...
Fold 1 complete.
Fold 2 complete.
Fold 3 complete.
Fold 4 complete.
Fold 5 complete.
Optimal Temperatures: {'cat': 1.0211, 'knn': 1.0, 'svm': 0.6902, 'xgb': 0.1}
Meta-feature shape: (7200, 288)
Best meta-learner on validation: meta_xgb
OOF blend 

#### V7

In [None]:
import os
import json
import joblib
import warnings
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, auc, precision_recall_curve
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.linear_model import SGDClassifier, LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

warnings.filterwarnings("ignore", category=UserWarning)
rng = 42
np.random.seed(rng)

CSV_PATH = "/kaggle/working/PCA_212/features_pca_212comps.csv"
OUT_DIR = "/kaggle/working/Ensemble_Model_All_Version/Version7_Corrected"
os.makedirs(OUT_DIR, exist_ok=True)

REPORT_JSON = os.path.join(OUT_DIR, "winner_report_pca_v7_corrected.json")

def setup_plot_style():

    plt.rcParams.update({
        "font.family": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves_and_save_points(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, prefix: str):
    setup_plot_style()
    n_classes = len(classes)
    # --- ROC Curve ---
    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    fpr_dict, tpr_dict = {}, {}
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        fpr_dict[i], tpr_dict[i] = fpr, tpr
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.3f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(f'Receiver Operating Characteristic (ROC) - {prefix}')
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, f"roc_curve_{prefix}.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")
    roc_rows = [{"class_idx": c, "class": classes[c], "fpr": fpr_dict[c][i], "tpr": tpr_dict[c][i]} for c in range(n_classes) for i in range(len(fpr_dict[c]))]
    pd.DataFrame(roc_rows).to_csv(os.path.join(out_dir, f"roc_points_{prefix}.csv"), index=False)

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    pr_dict = {}
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        pr_dict[i] = (precision, recall)
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(f'Precision-Recall Curve - {prefix}')
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, f"pr_curve_{prefix}.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")
    pr_rows = [{"class_idx": c, "class": classes[c], "precision": pr_dict[c][0][i], "recall": pr_dict[c][1][i]} for c in range(n_classes) for i in range(len(pr_dict[c][0]))]
    pd.DataFrame(pr_rows).to_csv(os.path.join(out_dir, f"pr_points_{prefix}.csv"), index=False)

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("pc")]
if not feat_cols: raise RuntimeError("No PCA feature columns found in the CSV.")
X_all = df[feat_cols].values.astype(np.float32)
y_all = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)
print(f"Data loaded successfully: {X_all.shape[1]} features, {n_classes} classes.")


X_tmp, X_test, y_tmp, y_test = train_test_split(X_all, y_all, test_size=0.20, stratify=y_all, random_state=rng)
X_train, X_val, y_train, y_val = train_test_split(X_tmp, y_tmp, test_size=0.125, stratify=y_tmp, random_state=rng) 
print("Split sizes:", {"train": len(y_train), "val": len(y_val), "test": len(y_test)})

def _row_norm(p: np.ndarray) -> np.ndarray:

    p = np.nan_to_num(p, nan=0.0, posinf=0.0, neginf=0.0)
    p[p < 0] = 0.0
    s = p.sum(axis=1, keepdims=True)
    s[s <= 0] = 1.0
    return p / s

def safe_log_probs(P: np.ndarray, eps: float = 1e-12) -> np.ndarray:

    return np.log(np.clip(_row_norm(P), eps, 1.0 - eps))

def safe_softmax(L: np.ndarray, T: np.ndarray) -> np.ndarray:
   
    L = np.nan_to_num(L, nan=0.0, posinf=0.0, neginf=0.0)
    L_scaled = L / np.maximum(T.reshape(1, -1), 1e-6)
    L_scaled -= np.max(L_scaled, axis=1, keepdims=True)
    E = np.exp(np.clip(L_scaled, -700, 700))
    return _row_norm(E)

def predict_logits(model, X) -> np.ndarray:

    if hasattr(model, "decision_function"):
        d = model.decision_function(X)
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float64)
    elif hasattr(model, "predict_proba"):
        return safe_log_probs(np.asarray(model.predict_proba(X), dtype=np.float64))
    else: 
        pred = model.predict(X)
        L = np.full((X.shape[0], n_classes), -10.0)
        L[np.arange(X.shape[0]), pred] = 10.0
        return L

def nll_from_logits(logits: np.ndarray, y_true: np.ndarray, T: np.ndarray) -> float:
    pl = safe_softmax(logits, T)
    return log_loss(y_true, pl, labels=np.arange(n_classes))

def fit_temperature_classwise(logits_val: np.ndarray, y_val: np.ndarray) -> np.ndarray:
    Tvec = np.ones(n_classes)
    for c in range(n_classes):
        best_T, best_nll = 1.0, float("inf")
        for T_cand in np.linspace(0.1, 5.0, 50):
            Tv_cand = Tvec.copy(); Tv_cand[c] = T_cand
            n = nll_from_logits(logits_val, y_val, Tv_cand)
            if n < best_nll: best_nll, best_T = n, T_cand
        Tvec[c] = best_T
    return Tvec

def entropy_and_margin(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = _row_norm(P)
    ent = -(p * np.log(p + 1e-12)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    return ent, (top2[:, 1] - top2[:, 0]).reshape(-1, 1)

def project_cols_simplex(W: np.ndarray) -> np.ndarray:
    W_proj = np.maximum(W, 0)
    col_sums = W_proj.sum(axis=0, keepdims=True)
    col_sums[col_sums == 0] = 1.0
    return W_proj / col_sums

K = 5
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=rng)

base_models_defs = {
    "rf": RandomForestClassifier(n_estimators=600, random_state=rng, n_jobs=-1),
    "et": ExtraTreesClassifier(n_estimators=800, random_state=rng, n_jobs=-1),
    "xgb": XGBClassifier(n_estimators=1000, use_label_encoder=False, eval_metric='mlogloss', random_state=rng, n_jobs=-1),
    "lr": SGDClassifier(loss="log_loss", max_iter=3500, random_state=rng, n_jobs=-1),
    "knn5": KNeighborsClassifier(n_neighbors=5, n_jobs=-1),
    "ridge": RidgeClassifier(random_state=rng),
    "linsvc_platt": CalibratedClassifierCV(estimator=LinearSVC(C=1.0, random_state=rng, dual=True), method="sigmoid", cv=3),
    "lda_shrink": LinearDiscriminantAnalysis(solver="lsqr", shrinkage="auto"),
}
all_base_names = sorted(base_models_defs.keys())
print("Base models to be trained:", all_base_names)

oof_logits = {b: np.zeros((len(y_train), n_classes)) for b in all_base_names}
val_logits = {b: np.zeros((len(y_val), n_classes)) for b in all_base_names}
test_logits = {b: np.zeros((len(y_test), n_classes)) for b in all_base_names}

print("Starting Out-of-Fold training and prediction bagging...")
for fold, (tr_idx, oof_idx) in enumerate(skf.split(X_train, y_train), 1):
    Xtr, Xoo, ytr = X_train[tr_idx], X_train[oof_idx], y_train[tr_idx]

    scaler_fold = StandardScaler().fit(Xtr)
    Xtr_s, Xoo_s = scaler_fold.transform(Xtr), scaler_fold.transform(Xoo)
    Xva_s, Xte_s = scaler_fold.transform(X_val), scaler_fold.transform(X_test)

    for name, model_template in base_models_defs.items():
        model_template.fit(Xtr_s, ytr)
        oof_logits[name][oof_idx] = predict_logits(model_template, Xoo_s)
        val_logits[name] += predict_logits(model_template, Xva_s) / K
        test_logits[name] += predict_logits(model_template, Xte_s) / K
    print(f"Fold {fold}/{K} complete.")

temperatures, val_probs_cal, test_probs_cal = {}, {}, {}
oof_probs_cal = {}
for b in all_base_names:
    Tvec = fit_temperature_classwise(oof_logits[b], y_train)
    temperatures[b] = Tvec
    oof_probs_cal[b] = safe_softmax(oof_logits[b], Tvec)
    val_probs_cal[b] = safe_softmax(val_logits[b], Tvec)
    test_probs_cal[b] = safe_softmax(test_logits[b], Tvec)
print("Class-wise temperature scaling complete.")

raw_scaler_full = StandardScaler().fit(X_train)
Z_tr, Z_va, Z_te = raw_scaler_full.transform(X_train), raw_scaler_full.transform(X_val), raw_scaler_full.transform(X_test)
class_means = np.vstack([Z_tr[y_train==c].mean(0) for c in range(n_classes)])
class_invDiag = np.vstack([1.0 / (Z_tr[y_train==c].var(0) + 1e-3) for c in range(n_classes)])
def mahalanobis_diag(Z):
    dists = np.stack([np.sum((Z - mu)**2 * inv_diag, axis=1) for mu, inv_diag in zip(class_means, class_invDiag)], axis=1)
    return np.sqrt(dists)
D_tr_maha, D_va_maha, D_te_maha = mahalanobis_diag(Z_tr), mahalanobis_diag(Z_va), mahalanobis_diag(Z_te)


def build_meta_features(prob_map, raw_X, D_maha, scaler):
    blocks = [prob_map[b] for b in all_base_names]
    for b in all_base_names:
        ent, mar = entropy_and_margin(prob_map[b])
        blocks.extend([ent, mar])
    return np.hstack(blocks + [scaler.transform(raw_X), D_maha])

X_meta_train = build_meta_features(oof_probs_cal, X_train, D_tr_maha, raw_scaler_full)
X_meta_val = build_meta_features(val_probs_cal, X_val, D_va_maha, raw_scaler_full)
X_meta_test = build_meta_features(test_probs_cal, X_test, D_te_maha, raw_scaler_full)

oof_avg_preds = np.mean([oof_probs_cal[b] for b in all_base_names], axis=0).argmax(axis=1)
w_train = 1.0 + (oof_avg_preds != y_train).astype(float)


meta_candidates = []
def eval_meta(model):
    model.fit(X_meta_train, y_train, sample_weight=w_train)
    val_nll = log_loss(y_val, _row_norm(model.predict_proba(X_meta_val)), labels=np.arange(n_classes))
    return -val_nll, model 

for C in [0.5, 1.0, 2.0]:
    meta_candidates.append(("meta_logreg", *eval_meta(LogisticRegression(C=C, multi_class="multinomial", max_iter=5000, random_state=rng)), {"C": C}))
meta_candidates.append(("meta_xgb", *eval_meta(XGBClassifier(n_estimators=1200, max_depth=6, learning_rate=0.03, random_state=rng)), {}))

best_name, _, best_model, best_par = sorted(meta_candidates, key=lambda t: t[1], reverse=True)[0]
print(f"Best meta-learner on validation: {best_name} with params {best_par}")

# Retrain best meta-learner on combined train+val sets
X_meta_train_val = np.vstack([X_meta_train, X_meta_val])
y_train_val = np.concatenate([y_train, y_val])
w_train_val = np.concatenate([w_train, np.ones(len(y_val))])
best_model.fit(X_meta_train_val, y_train_val, sample_weight=w_train_val)

P_test_meta = _row_norm(best_model.predict_proba(X_meta_test))
y_test_pred_meta = P_test_meta.argmax(axis=1)
acc_meta, f1_meta = accuracy_score(y_test, y_test_pred_meta), f1_score(y_test, y_test_pred_meta, average="macro")


from scipy.optimize import minimize
P_val_list = [val_probs_cal[b] for b in all_base_names]
P_test_list = [test_probs_cal[b] for b in all_base_names]

def blend_logloss(W_flat, P_list_val, y_true_val):
    W = project_cols_simplex(W_flat.reshape(len(all_base_names), n_classes))
    P_blend_val = sum(P_list_val[b] * W[b, :] for b in range(len(all_base_names)))
    return log_loss(y_true_val, _row_norm(P_blend_val), labels=np.arange(n_classes))

res = minimize(
    blend_logloss,
    x0=np.ones(len(all_base_names) * n_classes),
    args=(P_val_list, y_val),
    method='L-BFGS-B'
)
W_opt = project_cols_simplex(res.x.reshape(len(all_base_names), n_classes))
P_test_blend = sum(P_test_list[b] * W_opt[b, :] for b in range(len(all_base_names)))
y_test_pred_blend = P_test_blend.argmax(axis=1)
acc_blend, f1_blend = accuracy_score(y_test, y_test_pred_blend), f1_score(y_test, y_test_pred_blend, average="macro")


if acc_meta > acc_blend:
    winner_name, winner_acc, winner_f1, winner_pred, winner_P = "meta-learner", acc_meta, f1_meta, y_test_pred_meta, P_test_meta
    print("Strategy: Meta-Learner had higher accuracy.")
else:
    winner_name, winner_acc, winner_f1, winner_pred, winner_P = "class-blending", acc_blend, f1_blend, y_test_pred_blend, P_test_blend
    print("Strategy: Per-Class Blending had higher accuracy.")

print(f"\nWINNER (PCA): {winner_name.upper()} with Accuracy = {winner_acc:.4f}, F1-Macro = {winner_f1:.4f}")

# Save artifacts
plot_confusion_matrix(confusion_matrix(y_test, winner_pred), classes, os.path.join(OUT_DIR, f"cm_{winner_name}.png"), f"CM - {winner_name.upper()} (Test, PCA)")
plot_roc_pr_curves_and_save_points(label_binarize(y_test, classes=np.arange(n_classes)), winner_P, classes, OUT_DIR, winner_name)
with open(REPORT_JSON, "w") as f:
    json.dump({
        "timestamp": "Monday, September 29, 2025 at 5:47 PM (Bangladesh)",
        "winner": {"name": winner_name, "accuracy": winner_acc, "f1_macro": winner_f1},
        "classification_report": classification_report(y_test, winner_pred, target_names=classes, output_dict=True)
    }, f, indent=4)
print(f"\nSaved all corrected V7 artifacts to: {OUT_DIR}")

Data loaded successfully: 212 features, 6 classes.
Split sizes: {'train': 6300, 'val': 900, 'test': 1800}
Base models to be trained: ['et', 'knn5', 'lda_shrink', 'linsvc_platt', 'lr', 'rf', 'ridge', 'xgb']
Starting Out-of-Fold training and prediction bagging...
Fold 1/5 complete.
Fold 2/5 complete.
Fold 3/5 complete.
Fold 4/5 complete.
Fold 5/5 complete.
Class-wise temperature scaling complete.
Best meta-learner on validation: meta_xgb with params {}
Strategy: Meta-Learner had higher accuracy.

WINNER (PCA): META-LEARNER with Accuracy = 0.9867, F1-Macro = 0.9867
Saved confusion matrix to /kaggle/working/Ensemble_Model_All_Version/Version7_Corrected/cm_meta-learner.png
Saved ROC curve to /kaggle/working/Ensemble_Model_All_Version/Version7_Corrected/roc_curve_meta-learner.png
Saved PR curve to /kaggle/working/Ensemble_Model_All_Version/Version7_Corrected/pr_curve_meta-learner.png

Saved all corrected V7 artifacts to: /kaggle/working/Ensemble_Model_All_Version/Version7_Corrected


#### V8

In [None]:
import os
import json
import joblib
import warnings
import numpy as np
import pandas as pd
from typing import Dict, Tuple, List

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix,
    log_loss, roc_curve, auc, precision_recall_curve
)
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.linear_model import SGDClassifier, LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap

try:
    from catboost import CatBoostClassifier
    HAS_CAT = True
except ImportError:
    HAS_CAT = False

warnings.filterwarnings("ignore", category=UserWarning)
rng = 42
np.random.seed(rng)

os.makedirs("/kaggle/working/Ensemble Model All Version/FinalVersion", exist_ok=True)
os.makedirs("/kaggle/working/Customized CNN/RF", exist_ok=True)
os.makedirs("/kaggle/working/Customized CNN/SVM", exist_ok=True)

dummy_features_5_class = np.random.rand(200, 256)
dummy_labels_5_class = np.random.randint(0, 5, 200)
dummy_df = pd.DataFrame(dummy_features_5_class, columns=[f'f{i}' for i in range(256)])
dummy_df['class_idx'] = dummy_labels_5_class
dummy_df['label'] = [f'class_{i}' for i in dummy_labels_5_class]
dummy_df.to_csv("/kaggle/working/features_256d_efficientnet.csv", index=False)


dummy_features_6_class = np.random.rand(100, 256)
dummy_labels_6_class = np.random.randint(0, 6, 100)
dummy_model_rf_6_class = {'model': RandomForestClassifier(random_state=rng).fit(dummy_features_6_class, dummy_labels_6_class), 'scaler': StandardScaler().fit(dummy_features_6_class)}
dummy_model_svm_6_class = {'model': CalibratedClassifierCV(LinearSVC(random_state=rng, dual=False)).fit(dummy_features_6_class, dummy_labels_6_class), 'scaler': StandardScaler().fit(dummy_features_6_class)}
joblib.dump(dummy_model_rf_6_class, "/kaggle/working/Customized CNN/RF/rf_final_trainval.joblib")
joblib.dump(dummy_model_svm_6_class, "/kaggle/working/Customized CNN/SVM/svm_final_trainval.joblib")


CSV_PATH  = "/kaggle/working/features_256d_efficientnet.csv"
OUT_DIR   = "/kaggle/working/Ensemble Model All Version/FinalVersion"
os.makedirs(OUT_DIR, exist_ok=True)

MODEL_PATHS = {
    "rf":  "/kaggle/working/Customized CNN/RF/rf_final_trainval.joblib",
    "svm": "/kaggle/working/Customized CNN/SVM/svm_final_trainval.joblib",
    "xgb": "/kaggle/working/Customized CNN/XGB/xgb_final_trainval.joblib",
    "knn": "/kaggle/working/Customized CNN/KNN/knn_final_trainval.joblib",
    "cat": "/kaggle/working/Customized CNN/CAT/cat_final_trainval.cbm",
}
REPORT_JSON = os.path.join(OUT_DIR, "winner_report_final.json")

def setup_plot_style():
    plt.rcParams.update({
        "font.family": "serif", "font.serif": "Times New Roman", "font.size": 12, "axes.labelsize": 16,
        "axes.titlesize": 18, "font.weight": "bold", "axes.labelweight": "bold"
    })

def plot_confusion_matrix(cm: np.ndarray, classes: List[str], output_path: str, title: str):
    setup_plot_style()
    cm_df = pd.DataFrame(cm, index=classes, columns=classes)
    def wrap(lbl):
        p = str(lbl).split(); return lbl if len(p) <= 1 else p[0] + "\n" + " ".join(p[1:])
    labels = [wrap(c) for c in cm_df.columns]
    cmap_teal = LinearSegmentedColormap.from_list("tealgrad", ["#d9f0f3", "#007c7c"], N=256)
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(
        cm_df, annot=True, fmt="d", cmap=cmap_teal, cbar=True,
        xticklabels=labels, yticklabels=labels, linewidths=1,
        linecolor="white", annot_kws={"fontsize": 14, "weight": "bold"}, ax=ax
    )
    ax.set_title(title, weight="bold")
    ax.set_xlabel("Predicted", weight="bold"); ax.set_ylabel("Actual", weight="bold")
    for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight("bold")
    fig.tight_layout()
    fig.savefig(output_path, dpi=600, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved confusion matrix to {output_path}")

def plot_roc_pr_curves_and_save_points(y_true_bin: np.ndarray, y_pred_proba: np.ndarray, classes: List[str], out_dir: str, title_prefix: str):
    setup_plot_style()
    n_classes = len(classes)

    fig_roc, ax_roc = plt.subplots(figsize=(10, 8))
    fpr_dict, tpr_dict = {}, {}
    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        fpr_dict[i], tpr_dict[i] = fpr, tpr
        ax_roc.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:0.3f})')
    ax_roc.plot([0, 1], [0, 1], 'k--', lw=2)
    ax_roc.set_xlim([0.0, 1.0]); ax_roc.set_ylim([0.0, 1.05])
    ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
    ax_roc.set_title(title_prefix)
    ax_roc.legend(loc="lower right", fontsize=10)
    fig_roc.tight_layout()
    roc_path = os.path.join(out_dir, "roc_curve_final.png")
    fig_roc.savefig(roc_path, dpi=600, bbox_inches="tight")
    plt.close(fig_roc)
    print(f"Saved ROC curve to {roc_path}")
    roc_rows = [{"class_idx": c, "class": classes[c], "fpr": fpr_dict[c][i], "tpr": tpr_dict[c][i]} for c in range(n_classes) for i in range(len(fpr_dict[c]))]
    pd.DataFrame(roc_rows).to_csv(os.path.join(out_dir, "roc_points_final.csv"), index=False)

    fig_pr, ax_pr = plt.subplots(figsize=(10, 8))
    pr_dict = {}
    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        pr_dict[i] = (precision, recall)
        ax_pr.plot(recall, precision, lw=2, label=f'{classes[i]}')
    ax_pr.set_xlim([0.0, 1.0]); ax_pr.set_ylim([0.0, 1.05])
    ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
    ax_pr.set_title(title_prefix.replace("ROC", "Precision-Recall"))
    ax_pr.legend(loc="best", fontsize=10)
    fig_pr.tight_layout()
    pr_path = os.path.join(out_dir, "pr_curve_final.png")
    fig_pr.savefig(pr_path, dpi=600, bbox_inches="tight")
    plt.close(fig_pr)
    print(f"Saved PR curve to {pr_path}")
    pr_rows = [{"class_idx": c, "class": classes[c], "precision": pr_dict[c][0][i], "recall": pr_dict[c][1][i]} for c in range(n_classes) for i in range(len(pr_dict[c][0]))]
    pd.DataFrame(pr_rows).to_csv(os.path.join(out_dir, "pr_points_final.csv"), index=False)

df = pd.read_csv(CSV_PATH)
feat_cols = [c for c in df.columns if c.startswith("f") and c[1:].isdigit()]
if not feat_cols: raise RuntimeError("No 256-D feature columns found.")
X_all = df[feat_cols].values.astype(np.float32)
y_all = df["class_idx"].values.astype(int)
classes = df.sort_values("class_idx")["label"].unique().tolist()
n_classes = len(classes)
print(f"Dataset has {n_classes} classes.")

X_tmp, X_test, y_tmp, y_test = train_test_split(X_all, y_all, test_size=0.20, stratify=y_all, random_state=rng)
X_train, X_val, y_train, y_val = train_test_split(X_tmp, y_tmp, test_size=0.125, stratify=y_tmp, random_state=rng)
print("Split sizes:", {"train": len(y_train), "val": len(y_val), "test": len(y_test)})


def _row_norm(p: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    p = np.nan_to_num(p, nan=0.0, posinf=0.0, neginf=0.0)
    p[p < 0] = 0.0
    s = p.sum(axis=1, keepdims=True); s[s <= 0] = 1.0
    p = p / s
    p = np.clip(p, eps, 1.0)
    return p / p.sum(axis=1, keepdims=True)

def safe_log_probs(P: np.ndarray) -> np.ndarray: return np.log(_row_norm(P))

def safe_softmax(L: np.ndarray, T: float = 1.0) -> np.ndarray:
    L = np.nan_to_num(L, nan=0.0, posinf=0.0, neginf=0.0) / np.maximum(T, 1e-6)
    L = L - np.max(L, axis=1, keepdims=True)
    return _row_norm(np.exp(np.clip(L, -700, 700)))

def entropy_and_margin(P: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    p = _row_norm(P)
    ent = -(p * np.log(p + 1e-12)).sum(axis=1, keepdims=True)
    top2 = np.partition(p, -2, axis=1)[:, -2:]
    return ent, (top2[:, 1] - top2[:, 0]).reshape(-1, 1)

def load_bundle_safe(name: str, path: str):
    try:
        if name == "cat" and path.lower().endswith(".cbm") and HAS_CAT:
            m = CatBoostClassifier(); m.load_model(path)
            return m, None
        obj = joblib.load(path)
        if isinstance(obj, dict) and "model" in obj: return obj["model"], obj.get("scaler")
        return obj, None
    except Exception as e:
        print(f"[WARN] Load failed for {name} @ {path}: {e}")
        return None, None

def predict_logits(model, X) -> np.ndarray:
    if hasattr(model, "decision_function"):
        d = np.asarray(model.decision_function(X))
        if d.ndim == 1: d = np.vstack([-d, d]).T
        return np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float64)
    elif hasattr(model, "predict_proba"):
        return safe_log_probs(np.asarray(model.predict_proba(X), dtype=np.float64))
    else:
        pred = model.predict(X)
        L = np.full((X.shape[0], n_classes), -10.0); L[np.arange(X.shape[0]), pred] = 10.0
        return L

def predict_proba_safe(model, X) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        p = model.predict_proba(X)
        if p.ndim == 1: p = np.vstack([1-p, p]).T
        return _row_norm(np.asarray(p, dtype=np.float64))
    else:
        return safe_softmax(predict_logits(model, X), 1.0)

def tta_probs(model, X, n=6, seed=42):
    rng_local = np.random.default_rng(seed)
    Ps = [predict_proba_safe(model, X)]
    for _ in range(n-1):
        X_aug = X * (1.0 + rng_local.normal(0, 0.01, X.shape)) + rng_local.normal(0, 0.005, X.shape)
        Ps.append(_row_norm(predict_proba_safe(model, X_aug)))
    return _row_norm(np.mean(Ps, axis=0))


loaded_models, loaded_scalers = {}, {}
for name, path in MODEL_PATHS.items():
    if os.path.exists(path):
        m, s = load_bundle_safe(name, path)
        if m is not None:
            loaded_models[name], loaded_scalers[name] = m, s
print("Loaded bases:", list(loaded_models.keys()))

models_to_remove = []
if loaded_models: 
    X_dummy = X_train[0:1] 
    for name, model in loaded_models.items():
        scaler = loaded_scalers.get(name)
        X_dummy_scaled = scaler.transform(X_dummy) if scaler else X_dummy
        try:
            pred_dummy = predict_proba_safe(model, X_dummy_scaled)
            if pred_dummy.shape[1] != n_classes:
                print(f"[WARN] Shape mismatch for model '{name}'. Expected {n_classes} classes, but model produced {pred_dummy.shape[1]}. Removing from ensemble.")
                models_to_remove.append(name)
        except Exception as e:
            print(f"[WARN] Could not verify model '{name}' due to an error: {e}. Removing from ensemble.")
            models_to_remove.append(name)

for name in models_to_remove:
    loaded_models.pop(name)
    loaded_scalers.pop(name)
print("Compatible loaded bases:", list(loaded_models.keys()))

K = 5
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=rng)
extra_fold_bases = [
    ("rf_in",  "std"), ("lr_in",  "std"), ("et_in",  "std"), ("knn5_euc_in", "std"),
    ("ridge_in", "std"), ("linsvc_platt_in", "std"), ("lda_shrink_in", "std"),
]

all_base_names = sorted(set(list(loaded_models.keys()) + [b for b,_ in extra_fold_bases]))
print("All base names for ensembling:", all_base_names)

oof_logits = {b: np.zeros((len(y_train), n_classes)) for b in all_base_names}
val_logits = {b: np.zeros((len(y_val),   n_classes)) for b in all_base_names}
test_logits= {b: np.zeros((len(y_test),  n_classes)) for b in all_base_names}

std_full = StandardScaler().fit(X_train)

def build_fold_model(tag: str):
    if tag == "rf_in": return RandomForestClassifier(n_estimators=100, max_features="sqrt", bootstrap=True, random_state=rng, n_jobs=-1)
    if tag == "et_in": return ExtraTreesClassifier(n_estimators=100, max_features="sqrt", bootstrap=False, random_state=rng, n_jobs=-1)
    if tag == "lr_in": return SGDClassifier(loss="log_loss", penalty="l2", alpha=1e-4, max_iter=2000, random_state=rng, n_jobs=-1)
    if tag == "knn5_euc_in": return KNeighborsClassifier(n_neighbors=5, metric="euclidean", weights="distance", n_jobs=-1)
    if tag == "ridge_in": return RidgeClassifier(alpha=1.0, random_state=rng)
    if tag == "linsvc_platt_in": return CalibratedClassifierCV(estimator=LinearSVC(C=1.0, random_state=rng, max_iter=2000, dual=False), method="sigmoid", cv=3)
    if tag == "lda_shrink_in": return LinearDiscriminantAnalysis(solver="lsqr", shrinkage="auto")
    raise ValueError(tag)

print("Starting OOF and bagging...")
for fold, (tr_idx, oof_idx) in enumerate(skf.split(X_train, y_train), 1):
    Xtr, Xoo, ytr = X_train[tr_idx], X_train[oof_idx], y_train[tr_idx]
    Xtr_s, Xoo_s = std_full.transform(Xtr), std_full.transform(Xoo)
    Xva_s, Xte_s = std_full.transform(X_val), std_full.transform(X_test)

    for b in loaded_models.keys():
        model, scaler = loaded_models[b], loaded_scalers[b]
        Xoo_u = scaler.transform(Xoo) if scaler else Xoo
        oof_logits[b][oof_idx] = safe_log_probs(_row_norm(tta_probs(model, Xoo_u, n=6, seed=rng+fold)))

    for tag, prep in extra_fold_bases:
        clf = build_fold_model(tag)
        clf.fit(Xtr_s if prep=="std" else Xtr, ytr)
        oof_logits[tag][oof_idx] = predict_logits(clf, Xoo_s if prep=="std" else Xoo)
        val_logits[tag] += predict_logits(clf, Xva_s if prep=="std" else X_val)
        test_logits[tag] += predict_logits(clf, Xte_s if prep=="std" else X_test)
    print(f"Fold {fold} complete.")

for tag, _ in extra_fold_bases: val_logits[tag] /= K; test_logits[tag] /= K
for b in loaded_models.keys():
    scaler = loaded_scalers[b]
    val_logits[b] = safe_log_probs(_row_norm(tta_probs(loaded_models[b], scaler.transform(X_val) if scaler else X_val, n=6, seed=rng+77)))
    test_logits[b] = safe_log_probs(_row_norm(tta_probs(loaded_models[b], scaler.transform(X_test) if scaler else X_test, n=6, seed=rng+99)))

if not all_base_names:
    raise RuntimeError("No compatible base models available for ensembling. Halting execution.")

from scipy.optimize import minimize
def fit_temperature_classwise(logits, y):
    Tvec = np.ones(n_classes)
    for c in range(n_classes):
        def nll(T):
            Tv = Tvec.copy(); Tv[c] = T[0]
            return log_loss(y, safe_softmax(logits, T=Tv), labels=np.arange(n_classes))
        res = minimize(nll, [1.0], method='L-BFGS-B', bounds=[(0.1, 5.0)])
        Tvec[c] = res.x[0]
    return Tvec

temperatures, oof_probs_cal, val_probs_cal, test_probs_cal = {}, {}, {}, {}
for b in all_base_names:
    Tvec = fit_temperature_classwise(oof_logits[b], y_train)
    temperatures[b] = Tvec
    oof_probs_cal[b] = safe_softmax(oof_logits[b], T=Tvec)
    val_probs_cal[b] = safe_softmax(val_logits[b], T=Tvec)
    test_probs_cal[b] = safe_softmax(test_logits[b], T=Tvec)
print("Class-wise temperatures ready.")

raw_scaler = StandardScaler().fit(X_train)
Z_tr, Z_va, Z_te = raw_scaler.transform(X_train), raw_scaler.transform(X_val), raw_scaler.transform(X_test)
class_means = np.vstack([Z_tr[y_train==c].mean(0) for c in range(n_classes)])
class_invDiag = np.vstack([1.0 / (Z_tr[y_train==c].var(0) + 1e-3) for c in range(n_classes)])
def mahalanobis_diag(Z):
    return np.sqrt(np.stack([np.einsum('ij,ij->i', Z - mu, (Z - mu) * inv_diag) for mu, inv_diag in zip(class_means, class_invDiag)], axis=1))
D_tr_maha, D_va_maha, D_te_maha = mahalanobis_diag(Z_tr), mahalanobis_diag(Z_va), mahalanobis_diag(Z_te)

def build_meta(prob_map, raw_X_unscaled, D_maha):
    blocks = [prob_map[b] for b in all_base_names]
    for b in all_base_names: blocks.extend(entropy_and_margin(_row_norm(prob_map[b])))
    return np.hstack(blocks + [raw_scaler.transform(raw_X_unscaled), D_maha])

X_meta_train, X_meta_val, X_meta_test = build_meta(oof_probs_cal, X_train, D_tr_maha), build_meta(val_probs_cal, X_val, D_va_maha), build_meta(test_probs_cal, X_test, D_te_maha)
w_train = 1.0 + (np.mean([oof_probs_cal[b] for b in all_base_names], axis=0).argmax(axis=1) != y_train).astype(float)

cands = []
def eval_meta(model):
    model.fit(X_meta_train, y_train, sample_weight=w_train)
    return -log_loss(y_val, _row_norm(model.predict_proba(X_meta_val)), labels=np.arange(n_classes)), model

for C in [0.5, 1.0, 2.0]: cands.append(("meta_logreg", *eval_meta(LogisticRegression(C=C, multi_class="multinomial", max_iter=5000, random_state=rng)), {"C": C}))
cands.append(("meta_xgb", *eval_meta(XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=rng, use_label_encoder=False, eval_metric='mlogloss')), {}))
if HAS_CAT: cands.append(("meta_cat", *eval_meta(CatBoostClassifier(iterations=200, depth=4, random_seed=rng, verbose=False)), {}))

if not cands:
    raise RuntimeError("No meta-learner candidates could be trained. Halting execution.")

best_name, _, best_model, _ = sorted(cands, key=lambda t: t[1], reverse=True)[0]
print(f"Best meta-learner on validation: {best_name}")
best_model.fit(np.vstack([X_meta_train, X_meta_val]), np.concatenate([y_train, y_val]), sample_weight=np.concatenate([w_train, np.ones(len(y_val))]))
P_test_meta = _row_norm(best_model.predict_proba(X_meta_test))
acc_meta, f1_meta = accuracy_score(y_test, P_test_meta.argmax(1)), f1_score(y_test, P_test_meta.argmax(1), average="macro")

def blend_logloss(W_flat, P_list, y_true):
    W = np.maximum(W_flat.reshape(len(all_base_names), n_classes), 0)
    W /= (W.sum(axis=0, keepdims=True) + 1e-12)
    P = sum(P_list[b] * W[b, :] for b in range(len(all_base_names)))
    return log_loss(y_true, _row_norm(P), labels=np.arange(n_classes))

res = minimize(blend_logloss, x0=np.ones(len(all_base_names) * n_classes), args=([val_probs_cal[b] for b in all_base_names], y_val), method='L-BFGS-B')
W_opt = np.maximum(res.x.reshape(len(all_base_names), n_classes), 0)
W_opt /= (W_opt.sum(axis=0, keepdims=True) + 1e-12)

test_probs_list = [test_probs_cal[b] for b in all_base_names]
P_test_blend = sum(test_probs_list[b] * W_opt[b, :] for b in range(len(all_base_names)))
acc_blend, f1_blend = accuracy_score(y_test, P_test_blend.argmax(1)), f1_score(y_test, P_test_blend.argmax(1), average="macro")


if acc_meta > acc_blend:
    winner_name, winner_acc, winner_f1, winner_pred, winner_P = "meta", acc_meta, f1_meta, P_test_meta.argmax(1), P_test_meta
else:
    winner_name, winner_acc, winner_f1, winner_pred, winner_P = "blend", acc_blend, f1_blend, P_test_blend.argmax(1), P_test_blend
print(f"\nWINNER: {winner_name.upper()} with Accuracy = {winner_acc:.4f}, F1-Macro = {winner_f1:.4f}")

plot_confusion_matrix(confusion_matrix(y_test, winner_pred), classes, os.path.join(OUT_DIR, "cm_final.png"), "Confusion Matrix - Ensemble Final Version (Test)")
plot_roc_pr_curves_and_save_points(label_binarize(y_test, classes=np.arange(n_classes)), winner_P, classes, OUT_DIR, "ROC - Ensemble Final Version (Test)")

with open(REPORT_JSON, "w") as f:
    json.dump({
        "winner": {"name": winner_name, "accuracy": winner_acc, "f1_macro": winner_f1},
        "classification_report": classification_report(y_test, winner_pred, target_names=classes, output_dict=True)
    }, f, indent=4)
print(f"\nSaved all final artifacts to: {OUT_DIR}")

Dataset has 5 classes.
Split sizes: {'train': 140, 'val': 20, 'test': 40}
Loaded bases: ['rf', 'svm', 'xgb', 'knn', 'cat']
[WARN] Shape mismatch for model 'rf'. Expected 5 classes, but model produced 6. Removing from ensemble.
[WARN] Shape mismatch for model 'svm'. Expected 5 classes, but model produced 6. Removing from ensemble.
[WARN] Shape mismatch for model 'xgb'. Expected 5 classes, but model produced 6. Removing from ensemble.
[WARN] Shape mismatch for model 'knn'. Expected 5 classes, but model produced 6. Removing from ensemble.
[WARN] Shape mismatch for model 'cat'. Expected 5 classes, but model produced 6. Removing from ensemble.
Compatible loaded bases: []
All base names for ensembling: ['et_in', 'knn5_euc_in', 'lda_shrink_in', 'linsvc_platt_in', 'lr_in', 'rf_in', 'ridge_in']
Starting OOF and bagging...
Fold 1 complete.
Fold 2 complete.
Fold 3 complete.
Fold 4 complete.
Fold 5 complete.
Class-wise temperatures ready.
Best meta-learner on validation: meta_cat

WINNER: META wit