In [1]:
%%writefile src/backend/ML/mlops/config.py
"""Central MLflow configuration for consistent experiment tracking."""
import os
from pathlib import Path

# Constants
RANDOM_STATE = 42
TEST_SIZE = 0.2

# MLflow settings
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "file:./mlruns")
MLFLOW_EXPERIMENT_NAME = "iris_classification"

# Ensure MLflow directory exists
mlruns_dir = Path("./mlruns")
mlruns_dir.mkdir(exist_ok=True)

# Set MLflow tracking URI
os.environ["MLFLOW_TRACKING_URI"] = MLFLOW_TRACKING_URI

# ─── MLflow configuration ──────────────────────────────────────────────────
# Use Docker service-name so this works inside the compose network
# Falls back to local file store for standalone usage
TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000")
EXPERIMENT_NAME = "iris_classification"
ARTIFACT_ROOT = os.getenv("MLFLOW_ARTIFACT_ROOT", "./mlruns")

# ─── Model registry ────────────────────────────────────────────────────────
MODEL_NAME = "iris_classifier"
MODEL_STAGE_PRODUCTION = "Production"
MODEL_STAGE_STAGING = "Staging"


Overwriting src/backend/ML/mlops/config.py


In [2]:
%%writefile src/backend/ML/mlops/logging.py
"""
Extended MLflow logging helpers.
"""
from __future__ import annotations
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Any, Sequence, Optional
from matplotlib.figure import Figure
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
    roc_auc_score,
    log_loss,
    matthews_corrcoef,
)


def _log_fig(fig: Figure, name: str) -> None:
    """Log a Matplotlib figure directly without temp files."""
    mlflow.log_figure(fig, artifact_file=name)
    plt.close(fig)


def log_full_metrics(
    y_true, y_pred, *, label_list: Optional[Sequence[int]] = None, prefix: str = ""
) -> Dict[str, float]:
    """
    Compute & log *all* useful classification metrics.

    Returns a flat dict so callers can unit-test easily.

    Args:
        y_true: True labels
        y_pred: Predicted labels
        label_list: Optional list of label integers (for compatibility)
        prefix: Optional prefix for metric names

    Returns:
        Dictionary of all calculated metrics
    """
    # (1) macro metrics ------------------------------------------------------
    macro = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division="warn"
    )
    metrics: Dict[str, float] = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision_macro": float(macro[0]),
        "recall_macro": float(macro[1]),
        "f1_macro": float(macro[2]),
    }

    # (2) per-class ----------------------------------------------------------
    report = classification_report(y_true, y_pred, output_dict=True, zero_division="warn")
    if isinstance(report, dict):
        for klass, d in report.items():
            if isinstance(klass, str) and klass.isdigit():  # skip 'accuracy', 'macro avg', …
                k = int(klass)
                if isinstance(d, dict):
                    precision_val = d.get("precision", 0.0)
                    recall_val = d.get("recall", 0.0)
                    f1_val = d.get("f1-score", 0.0)
                    support_val = d.get("support", 0.0)

                    metrics[f"precision_{k}"] = float(precision_val) if precision_val is not None else 0.0
                    metrics[f"recall_{k}"] = float(recall_val) if recall_val is not None else 0.0
                    metrics[f"f1_{k}"] = float(f1_val) if f1_val is not None else 0.0
                    metrics[f"support_{k}"] = float(support_val) if support_val is not None else 0.0

    # (3) derived – try/except so we never crash ----------------------------
    try:
        metrics["roc_auc_ovr_weighted"] = roc_auc_score(
            y_true, pd.get_dummies(y_pred), multi_class="ovr", average="weighted"
        )
    except Exception:
        pass
    try:
        metrics["log_loss"] = log_loss(y_true, pd.get_dummies(y_pred))
    except Exception:
        pass
    try:
        metrics["mcc"] = matthews_corrcoef(y_true, y_pred)
    except Exception:
        pass

    # (4) optional prefix for nested CV, etc. -------------------------------
    if prefix:
        metrics = {f"{prefix}_{k}": v for k, v in metrics.items()}

    mlflow.log_metrics(metrics)
    return metrics


def log_confusion_matrix(
    y_true, y_pred, *, class_names: Optional[Sequence[str]] = None, artifact_name: str = "confusion_matrix.png"
) -> None:
    """Create + log confusion matrix using mlflow.log_figure."""
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=class_names if class_names is not None else "auto",
        yticklabels=class_names if class_names is not None else "auto",
        ax=ax,
    )
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Confusion Matrix")
    _log_fig(fig, artifact_name)


def log_feature_importance(
    feature_names: list, importances: list, artifact_name: str = "feature_importance.png"
):
    """Bar plot logged via mlflow.log_figure (no disk I/O)."""
    imp_df = (
        pd.DataFrame({"feature": feature_names, "importance": importances})
        .sort_values("importance")
    )
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.barplot(data=imp_df, x="importance", y="feature", ax=ax)
    ax.set_title("Feature Importances")
    _log_fig(fig, artifact_name)


def log_parameters(params: Dict[str, Any]) -> None:
    """
    Log parameters to MLflow.

    Args:
        params: Dictionary of parameter names and values
    """
    mlflow.log_params(params)


def log_dataset_info(X_train, X_test, y_train, y_test) -> None:
    """
    Log dataset information as parameters.

    Args:
        X_train: Training features
        X_test: Test features
        y_train: Training labels
        y_test: Test labels
    """
    dataset_params = {
        "train_size": len(X_train),
        "test_size": len(X_test),
        "n_features": (X_train.shape[1] if hasattr(X_train, "shape") else len(X_train[0])),
        "n_classes": (len(set(y_train)) if hasattr(y_train, "__iter__") else 1),
    }

    log_parameters(dataset_params)


# Legacy compatibility - keep old function name as alias
log_model_metrics = log_full_metrics

Overwriting src/backend/ML/mlops/logging.py


In [None]:
%%writefile src/backend/ML/mlops/experiment_utils.py
"""MLflow experiment utilities."""
import os
import pathlib
import mlflow
import mlflow.tracking
from typing import Optional, Dict, Any
import requests

from src.backend.ML.mlops.config import EXPERIMENT_NAME, TRACKING_URI

import re, shutil, logging
from src.backend.ML.mlops.config import ARTIFACT_ROOT

# ─── PATCH: src/backend/ML/mlops/experiment_utils.py ──────────────────────────
def _patch_yaml_for_mlflow() -> None:
    """Ensure MLflow entity objects are YAML-serialisable.

    MLflow serialises *Metric*, *Param*, and *RunTag* objects to `meta.yaml` via
    ``yaml.safe_dump``.  Older MLflow versions ship their own dumper that
    already knows these classes, but many environments mix package versions
    (e.g. Conda ↔︎ pip) which means the representers are **missing** and YAML
    falls back to the *object* representer.  Our previous fix attempted to call
    ``obj.to_dict()`` – unfortunately :class:`mlflow.entities.Metric` does **NOT**
    implement that method, leading to the runtime error you just saw:

        AttributeError: 'Metric' object has no attribute 'to_dict'

    The new implementation maps the public attributes explicitly instead of
    assuming a convenience helper exists.  It is fully compatible with both
    MLflow ≤2.11 and PyYAML ≥6.
    """

    import yaml

    try:
        from mlflow.entities import Metric, Param, RunTag
        from mlflow.utils.yaml_utils import YamlSafeDumper as _ML_DUMPER
    except Exception:
        # If MLflow isn't imported yet (unit-tests), just return.
        return

    # ------------------------------------------------------------------
    # Helper → guaranteed, stable mapping for every entity type ----------
    # ------------------------------------------------------------------
    def _metric_to_dict(m: Metric) -> dict[str, int | float | str]:
        return {
            "key": m.key,
            "value": m.value,
            "timestamp": m.timestamp,
            "step": m.step,
        }

    def _param_to_dict(p: Param) -> dict[str, str]:
        return {"key": p.key, "value": p.value}

    def _runtag_to_dict(t: RunTag) -> dict[str, str]:
        return {"key": t.key, "value": t.value}

    _DICT_MAPPERS = {
        Metric: _metric_to_dict,
        Param: _param_to_dict,
        RunTag: _runtag_to_dict,
    }

    def _represent_entity(dumper: yaml.Dumper, obj):  # noqa: D401
        """Generic PyYAML representer for MLflow entities."""
        return dumper.represent_dict(_DICT_MAPPERS[type(obj)](obj))

    # Target both the stock SafeDumper **and** MLflow's own dumper so we cover
    # all call-sites.  Registering twice is idempotent; we guard with `in`.
    for dumper_cls in (yaml.SafeDumper, _ML_DUMPER):
        for cls in _DICT_MAPPERS:
            if cls not in dumper_cls.yaml_representers:
                yaml.add_representer(cls, _represent_entity, Dumper=dumper_cls)
# ──────────────────────────────────────────────────────────────────────────────


def _is_http_uri(uri: str) -> bool:
    return uri.startswith("http")

def _local_path_from_uri(uri: str) -> pathlib.Path:
    return pathlib.Path(uri.replace("file:", "", 1)) if uri.startswith("file:") else pathlib.Path(uri)

_HEALTH_ENDPOINTS = ("/health", "/version")
_hex32 = re.compile(r"^[0-9a-f]{32}$", re.I)
logger = logging.getLogger(__name__)

def _ping_tracking_server(uri: str, timeout: float = 2.0) -> bool:
    """Return True iff an HTTP MLflow server is reachable at *uri*."""
    if not uri.startswith("http"):
        return False                        # file store – nothing to ping
    try:
        # Use new health endpoints
        for ep in _HEALTH_ENDPOINTS:
            response = requests.get(uri.rstrip("/") + ep, timeout=timeout)
            response.raise_for_status()
        return True
    except Exception:
        return False


# ── experiment_utils.py ───────────────────────────────────────────────────
import pathlib, shutil, logging

logger = logging.getLogger(__name__)

def _ensure_trash(root: pathlib.Path) -> pathlib.Path:
    """Return the path to <root>/.trash, creating it if needed."""
    trash_path = root / ".trash"
    trash_path.mkdir(exist_ok=True)
    return trash_path


def _sanitize_mlruns_dir(root: pathlib.Path) -> None:
    """
    Remove **only** invalid directories while preserving MLflow's mandatory
    '.trash' folder.  Idempotent and safe to call repeatedly.
    """
    trash_root = _ensure_trash(root)

    # 1️⃣  Clean *inside* .trash (broken experiments that never got meta.yaml)
    for p in list(trash_root.iterdir()):
        if p.is_dir() and (not p.name.isdigit() or not (p / "meta.yaml").exists()):
            logger.warning("🧹 Purging corrupt trash dir %s", p)
            shutil.rmtree(p, ignore_errors=True)

    # 2️⃣  Sweep root level (skip .trash itself)
    for p in list(root.iterdir()):
        # Preserve critical MLflow folders (models registry & artefacts)
        if p in {trash_root, root / "models", root / "artifacts"}:
            continue  # KEEP – required by MLflow

        remove = (
            p.is_dir()
            and (
                not p.name.isdigit()                 # junk (non-experiment dirs)
                or not (p / "meta.yaml").exists()    # corrupt experiment missing meta
            )
        )
        if remove:
            logger.warning("🧹 Removing stray MLflow dir %s", p)
            shutil.rmtree(p, ignore_errors=True)


def _fallback_uri() -> str:
    """Local file store outside default ./mlruns to avoid collisions."""
    local = pathlib.Path.cwd() / "mlruns_local"
    local.mkdir(exist_ok=True)
    _sanitize_mlruns_dir(local)
    _ensure_trash(local)          # make doubly sure
    return f"file:{local}"


# ── src/backend/ML/mlops/experiment_utils.py ──
_resolved_uri: str | None = None         # module-level cache



def setup_mlflow_experiment(experiment_name: str | None = None) -> None:
    """
    Initialise MLflow tracking & experiment **safely**, no matter whether a
    remote server is reachable or we fall back to the local file store.
    """
    _patch_yaml_for_mlflow()
    global _resolved_uri
    exp_name = experiment_name or EXPERIMENT_NAME

    if _resolved_uri is None:
        uri = TRACKING_URI
        if not _ping_tracking_server(uri):
            uri = _fallback_uri()
            logger.warning("⚠️  MLflow server unreachable – using local store %s", uri)
        mlflow.set_tracking_uri(uri)
        _resolved_uri = uri
    else:
        mlflow.set_tracking_uri(_resolved_uri)

    # --- NEW: always clean the store when it's file-based ------------------
    if not _is_http_uri(_resolved_uri):
        root_path = _local_path_from_uri(_resolved_uri)
        _sanitize_mlruns_dir(root_path)
    # ----------------------------------------------------------------------

    # decide on artifact root only if explicitly configured
    artifact_loc = ARTIFACT_ROOT.strip() or None

    if mlflow.get_experiment_by_name(exp_name) is None:
        mlflow.create_experiment(exp_name, artifact_location=artifact_loc)
    mlflow.set_experiment(exp_name)
    logger.info("🗂 Experiment '%s' @ %s", exp_name, _resolved_uri)




def get_best_run(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> Dict[str, Any]:
    """
    Return a *shallow* dict with run_id, metrics.*, and params.* keys
    so downstream code can use predictable dotted paths.
    """
    exp_name = experiment_name or EXPERIMENT_NAME
    setup_mlflow_experiment(exp_name)

    client = mlflow.tracking.MlflowClient()
    exp = mlflow.get_experiment_by_name(exp_name)
    if exp is None:
        raise ValueError(f"Experiment '{exp_name}' not found")

    order = "DESC" if maximize else "ASC"
    run = client.search_runs(
        [exp.experiment_id],
        order_by=[f"metrics.{metric_key} {order}"],
        max_results=1,
    )[0]

    # Build a *flat* mapping -------------------------------------------------
    flat: Dict[str, Any] = {"run_id": run.info.run_id}

    # Metrics
    for k, v in run.data.metrics.items():
        flat[f"metrics.{k}"] = v

    # Params
    for k, v in run.data.params.items():
        flat[f"params.{k}"] = v

    # Tags (optional but handy)
    for k, v in run.data.tags.items():
        flat[f"tags.{k}"] = v

    return flat


Overwriting src/backend/ML/mlops/experiment_utils.py


In [4]:
%%writefile src/backend/ML/mlops/experiment.py

"""Training utilities with MLflow integration."""
import mlflow
import optuna
from typing import Optional
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from .config import RANDOM_STATE, TEST_SIZE
from .experiment_utils import setup_mlflow_experiment

# Re-export for convenience
__all__ = ['setup_mlflow_experiment', 'load_and_prepare_iris_data',
           'train_logistic_regression', 'train_random_forest_with_optimization']
from .logging import (
    log_model_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)


def load_and_prepare_iris_data(
    test_size: float = TEST_SIZE,
    random_state: int = RANDOM_STATE
) -> DatasetTuple:
    iris = load_iris()
    X, y = iris.data, iris.target
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    # ✅ re-wrap as DataFrame so feature names propagate downstream
    import pandas as pd
    feat_names = iris.feature_names
    X_train_df = pd.DataFrame(X_train_scaled, columns=feat_names)
    X_test_df  = pd.DataFrame(X_test_scaled,  columns=feat_names)

    return (X_train_df, X_test_df, y_train, y_test,
            feat_names, list(iris.target_names), scaler)


Overwriting src/backend/ML/mlops/experiment.py


In [5]:
%%writefile src/backend/ML/mlops/model_registry.py
"""MLflow model registry utilities."""
import mlflow
from typing import Optional, Dict, Any
from .config import MODEL_NAME, MODEL_STAGE_PRODUCTION


def register_model(model_uri: str,
                   model_name: Optional[str] = None,
                   description: Optional[str] = None) -> str:
    """
    Register a model in the MLflow model registry using the fluent client API.

    Args:
        model_uri: URI of the model to register
        model_name: Name for the registered model
        description: Optional description

    Returns:
        Model version
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()

    try:
        # Create registered model if it doesn't exist
        if not client.get_registered_model(name, silent=True):
            client.create_registered_model(name)
            print(f"Created new registered model: {name}")

        # Create new version
        mv = client.create_model_version(
            name=name,
            source=model_uri,
            description=description
        )
        print(f"Created version {mv.version} of model {name}")
        return mv.version

    except Exception as e:
        print(f"Failed to register model: {e}")
        raise


def promote_model_to_stage(model_name: Optional[str] = None,
                           version: Optional[str] = None,
                           stage: str = MODEL_STAGE_PRODUCTION) -> None:
    """
    Promote a model version to a specific stage using the fluent client.

    Args:
        model_name: Name of the registered model
        version: Version to promote (if None, promotes latest)
        stage: Target stage
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()

    try:
        # Get latest version if not specified
        if version is None:
            latest = client.get_latest_versions(name, stages=["None"])
            if not latest:
                raise ValueError(f"No versions found for model {name}")
            version = latest[0].version

        # Transition to stage
        client.transition_model_version_stage(
            name=name,
            version=version,
            stage=stage
        )
        print(f"Promoted model {name} version {version} to {stage}")

    except Exception as e:
        print(f"Failed to promote model: {e}")
        raise


def load_model_from_registry(model_name: Optional[str] = None,
                             stage: str = MODEL_STAGE_PRODUCTION):
    """
    Load a model from the registry by name and stage.

    Args:
        model_name: Name of the registered model
        stage: Stage to load from

    Returns:
        Loaded model
    """
    name = model_name or MODEL_NAME
    model_uri = f"models:/{name}/{stage}"

    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model {name} from {stage} stage")
        return model
    except Exception as e:
        print(f"Failed to load model from registry: {e}")
        raise


def load_model_from_run(run_id: str, artifact_path: str = "model"):
    """
    Load a model from a specific run.

    Args:
        run_id: MLflow run ID
        artifact_path: Path to the model artifact

    Returns:
        Loaded model
    """
    model_uri = f"runs:/{run_id}/{artifact_path}"

    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model from run {run_id}")
        return model
    except Exception as e:
        print(f"Failed to load model from run: {e}")
        raise


def get_model_info(model_name: Optional[str] = None,
                   stage: str = MODEL_STAGE_PRODUCTION) -> Dict[str, Any]:
    """
    Get information about a registered model using the fluent client.

    Args:
        model_name: Name of the registered model
        stage: Stage to get info for

    Returns:
        Model information dictionary
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()

    try:
        model_version = client.get_latest_versions(name, stages=[stage])[0]

        return {
            "name": model_version.name,
            "version": model_version.version,
            "stage": model_version.current_stage,
            "description": model_version.description,
            "creation_timestamp": model_version.creation_timestamp,
            "last_updated_timestamp": model_version.last_updated_timestamp,
            "run_id": model_version.run_id
        }
    except Exception as e:
        print(f"Failed to get model info: {e}")
        raise


Overwriting src/backend/ML/mlops/model_registry.py


In [6]:
%%writefile src/backend/ML/mlops/training.py
"""Training utilities with MLflow integration."""
import mlflow
from mlflow import sklearn  # type: ignore
from mlflow import models  # type: ignore
import optuna
from optuna.integration.mlflow import MLflowCallback
import numpy as np
import pandas as pd
from typing import Optional, Tuple, List, Callable, cast, Any, Dict, TypeAlias
from numpy.typing import NDArray
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import Bunch
from sklearn.pipeline import Pipeline  # NEW

from src.backend.ML.mlops.config import RANDOM_STATE, TEST_SIZE
from src.backend.ML.mlops.experiment_utils import setup_mlflow_experiment
from src.backend.ML.mlops.logging import (
    log_full_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)
from src.backend.ML.mlops.shapiq_utils import log_shapiq_interactions

# Type aliases for complex types
FloatArray: TypeAlias = NDArray[np.float64]
IntArray: TypeAlias = NDArray[np.int64]
DatasetTuple: TypeAlias = Tuple[FloatArray, FloatArray, IntArray, IntArray, List[str], List[str], StandardScaler]


def load_and_prepare_iris_data(
    test_size: float = TEST_SIZE,
    random_state: int = RANDOM_STATE
) -> DatasetTuple:
    """
    Load and prepare the Iris dataset.

    Args:
        test_size: Fraction of data to use for testing
        random_state: Random state for reproducibility

    Returns:
        Tuple of (X_train_scaled, X_test_scaled, y_train, y_test,
                 feature_names, target_names, scaler)
    """
    # Load dataset
    iris: Any = load_iris()
    X: NDArray[np.float64] = cast(NDArray[np.float64], iris.data)
    y: NDArray[np.int64] = cast(NDArray[np.int64], iris.target)
    feature_names: List[str] = list(iris.feature_names)
    target_names: List[str] = list(iris.target_names)

    # Split data
    X_train: NDArray[np.float64]
    X_test: NDArray[np.float64]
    y_train: NDArray[np.int64]
    y_test: NDArray[np.int64]
    X_train, X_test, y_train, y_test = cast(
        Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int64], NDArray[np.int64]],
        train_test_split(X, y, test_size=test_size, random_state=random_state)
    )

    # Scale features
    scaler = StandardScaler()
    X_train_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.fit_transform(X_train))
    X_test_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.transform(X_test))

    return (X_train_scaled, X_test_scaled, y_train, y_test,
            feature_names, target_names, scaler)


# === (A) LOGISTIC REGRESSION (training only, NO dashboard) ================
def train_logistic_regression(
    X_train, y_train, X_test, y_test, feature_names, target_names,
    *, run_name: str = "lr_baseline", register: bool = True
) -> str:
    """Train logistic regression model without dashboard integration."""
    setup_mlflow_experiment()
    # Disable automatic *model* logging so we can register exactly **one**
    # version via the explicit ``mlflow.sklearn.log_model`` call below. This
    # avoids the duplicate-artifact collision (autolog logs to `model/` by
    # default, which then prevents our registered-model call from re-using
    # the same artifact path).
    mlflow.sklearn.autolog(log_models=False)

    with mlflow.start_run(run_name=run_name) as run:
        log_dataset_info(X_train, X_test, y_train, y_test)
        model = LogisticRegression(random_state=RANDOM_STATE, max_iter=1_000).fit(
            X_train, y_train
        )

        y_pred = model.predict(X_test)
        log_full_metrics(y_test, y_pred)
        log_confusion_matrix(y_test, y_pred, class_names=target_names)

        signature = mlflow.models.infer_signature(X_train, model.predict(X_train))
        # Log *one* artefact folder (`model/`) and register → iris_logreg
        sklearn.log_model(
            model,
            name="iris_logreg",                          # NEW – explicit name param (MLflow ≥2.9)
            registered_model_name="iris_logreg" if register else None,
            signature=signature,
            input_example=X_test[:5],
        )

        # SHAP-IQ: compute & log feature interaction values
        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        log_shapiq_interactions(model, X_test_df, feature_names, max_order=2)

        return run.info.run_id


def _create_rf_objective(X_train, y_train, X_test, y_test) -> Callable[[optuna.trial.Trial], float]:
    """Create Optuna objective function for Random Forest optimization."""
    def objective(trial: optuna.trial.Trial) -> float:
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 10, 200),
            "max_depth": trial.suggest_int("max_depth", 2, 20),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "random_state": RANDOM_STATE,
        }
        m = RandomForestClassifier(**params).fit(X_train, y_train)
        return float(accuracy_score(y_test, m.predict(X_test)))
    return objective


# === (B) RANDOM-FOREST + Optuna (training only) ===========================
def train_random_forest_optimized(
    X_train, y_train, X_test, y_test, feature_names, target_names,
    *, n_trials: int = 50, run_name: str = "rf_optimized", register: bool = True
) -> str:
    """
    Train an Optuna-tuned Random-Forest inside a Pipeline(StandardScaler→RF)
    and log the entire pipeline to MLflow so scaling is reproduced at inference.
    """
    setup_mlflow_experiment()
    mlflow.sklearn.autolog(disable=True)

    with mlflow.start_run(run_name=run_name) as run:
        log_dataset_info(X_train, X_test, y_train, y_test)

        study = optuna.create_study(direction="maximize")
        study.optimize(
            _create_rf_objective(X_train, y_train, X_test, y_test),
            n_trials=n_trials,
            callbacks=[MLflowCallback(
                tracking_uri=mlflow.get_tracking_uri(),
                metric_name="accuracy", mlflow_kwargs={"nested": True}
            )],
        )

        # 🟢 Pipeline with scaler
        best_rf = RandomForestClassifier(**study.best_params, random_state=RANDOM_STATE)
        pipeline = Pipeline([("scaler", StandardScaler()), ("rf", best_rf)])
        pipeline.fit(X_train, y_train)

        y_pred = pipeline.predict(X_test)
        log_full_metrics(y_test, y_pred)
        log_confusion_matrix(y_test, y_pred, class_names=target_names)
        log_feature_importance(feature_names, best_rf.feature_importances_)
        mlflow.log_metric("best_accuracy", study.best_value)

        signature = mlflow.models.infer_signature(X_train, pipeline.predict(X_train))
        sklearn.log_model(
            pipeline,
            name="iris_random_forest",
            registered_model_name="iris_random_forest" if register else None,
            signature=signature, input_example=X_test[:5],
        )

        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        log_shapiq_interactions(best_rf, X_test_df, feature_names, max_order=2)

        return run.info.run_id


# === (C) ONE-STOP helper: train both models ===============================
def run_all_trainings(*,
    test_size: float = TEST_SIZE, random_state: int = RANDOM_STATE, n_trials: int = 50) -> None:
    """Train both logistic regression and random forest models."""
    X_tr, X_te, y_tr, y_te, feats, tgts, _ = load_and_prepare_iris_data(
        test_size, random_state
    )
    train_logistic_regression(
        X_tr, y_tr, X_te, y_te, feats, tgts, run_name="lr_baseline"
    )
    train_random_forest_optimized(
        X_tr, y_tr, X_te, y_te, feats, tgts,
        n_trials=n_trials, run_name="rf_optimized"
    )


# === (D) Robust comparator ===============================================
def compare_models(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> None:
    """
    Print the best run according to *metric_key* while gracefully
    falling-back to common alternates when the preferred key is missing.
    """
    from .experiment_utils import get_best_run

    fallback_keys = ["accuracy_score", "best_accuracy"]
    try:
        best = get_best_run(experiment_name, metric_key, maximize)
        rid = best["run_id"]

        # choose first key that exists
        score = best.get(f"metrics.{metric_key}")
        if score is None:
            for alt in fallback_keys:
                score = best.get(f"metrics.{alt}")
                if score is not None:
                    metric_key = alt
                    break

        model_type = best.get("params.model_type", "unknown")
        print(f"🏆 Best run: {rid}")
        print(f"📈 {metric_key}: {score if score is not None else 'N/A'}")
        print(f"🔖 Model type: {model_type}")
    except Exception as err:
        print(f"❌ Error comparing models: {err}")


# Legacy compatibility functions (with dashboard support)
train_logistic_regression_autolog = train_logistic_regression
train_random_forest_with_optimization = train_random_forest_optimized


if __name__ == "__main__":
    run_all_trainings()


Overwriting src/backend/ML/mlops/training.py


In [None]:
%%writefile src/backend/ML/mlops/utils_training_guards.py
# src/backend/ML/mlops/utils_training_guards.py

import logging
import shutil

try:
    # PyTensor exposes its config flags in the configdefaults module
    from pytensor.configdefaults import config as pconfig
except ImportError as exc:
    # If PyTensor isn’t installed or the API changed, disable C-ops fallback
    logging.getLogger(__name__).warning(
        "Could not import pytensor.configdefaults.config; "
        "skipping compiler checks."
    )
    pconfig = None

logger = logging.getLogger(__name__)

def ensure_compiler() -> None:
    """
    Check for the configured C++ compiler (config.cxx).
    If it's set but not on PATH, disable C-ops to force Python fallback.
    """
    if not pconfig:
        # Nothing to do if config isn't available
        return

    cxx = getattr(pconfig, "cxx", "")
    if cxx and shutil.which(cxx) is None:
        logger.warning(
            f"Configured compiler '{cxx}' not found in PATH. "
            "Disabling PyTensor C-ops (falling back to pure-Python)."
        )
        # Disable C++ compilation
        pconfig.cxx = ""


Overwriting src/backend/ML/mlops/utils_training_guards.py


In [None]:
%%writefile src/backend/ML/mlops/training_bayes.py
from __future__ import annotations
import os
import shutil
import logging

try:
    import pymc as pm
    import arviz as az
except ImportError as exc:
    raise ImportError("PyMC and ArviZ are required for Bayesian model training") from exc

from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd
from src.backend.ML.mlops.utils_training_guards import ensure_compiler

logger = logging.getLogger(__name__)

_BAYES_MODEL_NAME = "breast_cancer_bayes"

def train_bayes_logreg(
    debug: bool = False,
    *,
    draws: int = 1000,
    tune: int = 500,
    run_name: str = "bayes_logreg",
    register: bool = True,
) -> tuple[pm.Model, "az.InferenceData", str]:
    """
    Bayesian logistic regression with full MLflow logging **including accuracy**.
    """
    from src.backend.ML.mlops.experiment_utils import setup_mlflow_experiment
    from src.backend.ML.mlops.logging import log_full_metrics, log_confusion_matrix  # NEW
    import mlflow

    ensure_compiler()
    setup_mlflow_experiment()

    data = load_breast_cancer()
    X_raw, y_raw = data.data.astype(float), data.target.astype(int)
    X_std = (X_raw - X_raw.mean(0)) / X_raw.std(0)
    n_feat = X_std.shape[1]

    p0, p1 = (y_raw == 0).mean(), (y_raw == 1).mean()
    intercept_prior = float("-inf") if p0 == 0 else np.log(p1 / p0)

    with mlflow.start_run(run_name=run_name) as run:
        with pm.Model() as model:
            pm.Data("X_shared", X_std)
            pm.Data("y_obs", y_raw)
            alpha = pm.Normal("alpha", mu=intercept_prior, sigma=2)
            beta = pm.Normal("beta", mu=0, sigma=1, shape=n_feat)
            logits = alpha + pm.math.dot(X_std, beta)
            pm.Bernoulli("y", p=pm.math.sigmoid(logits), observed=y_raw)

            idata = pm.sample(
                draws=draws, tune=tune, chains=2, cores=1,
                target_accept=0.9, progressbar=not debug
            )

        # ── Metrics --------------------------------------------------------
        posterior = idata.posterior
        alpha_s = posterior["alpha"].values.reshape(-1)
        beta_s  = posterior["beta"].values.reshape(-1, n_feat)

        probs = 1 / (1 + np.exp(-(alpha_s.mean() + X_std @ beta_s.mean(0))))
        y_pred = (probs > 0.5).astype(int)

        log_full_metrics(y_raw, y_pred)                     # NEW
        log_confusion_matrix(y_raw, y_pred,                 # NEW
                             class_names=["malignant", "benign"])

        mlflow.log_metric("n_draws", int(draws))

        # ── Model logging (unchanged) -------------------------------------
        signature = mlflow.models.infer_signature(X_std, y_raw)

        class _BayesLogRegPyFunc(mlflow.pyfunc.PythonModel):
            def __init__(self, a, b):
                self._a, self._b = a.astype("float64"), b.astype("float64")
            def predict(self, _, model_input):
                X = np.asarray(model_input, dtype="float64")
                logits = self._a[:, None] + self._b @ X.T
                return (1 / (1 + np.exp(-logits))).mean(0)

        mlflow.pyfunc.log_model(
            name="breast_cancer_bayes",
            python_model=_BayesLogRegPyFunc(alpha_s, beta_s),
            input_example=X_std[:5],
            signature=signature,
            registered_model_name=_BAYES_MODEL_NAME if register else None,
        )

        if register:
            client = mlflow.tracking.MlflowClient()
            mv = client.get_latest_versions(_BAYES_MODEL_NAME, stages=["None"])[0]
            client.transition_model_version_stage(
                name=_BAYES_MODEL_NAME, version=mv.version,
                stage="Production", archive_existing_versions=True,
            )

        return model, idata, run.info.run_id



if __name__ == "__main__":
    if os.path.isdir("mlruns"):
        logger.info("Removing existing mlruns directory...")
        shutil.rmtree("mlruns")

    model, idata, run_id = train_bayes_logreg(debug=True,
                                              draws=50,
                                              tune=25,
                                              run_name="breast_cancer_bayes_logreg",
                                              register=True)
    summary_df = az.summary(idata, var_names=["alpha", "beta"])
    print(summary_df)


Overwriting src/backend/ML/mlops/training_bayes.py


In [9]:
%%writefile src/backend/ML/mlops/explainer.py
from __future__ import annotations
import os
import socket
import logging
from pathlib import Path
from typing import Any, Sequence, Optional
from contextlib import closing

import mlflow
import psutil  # lightweight; already added to pyproject deps
from sklearn.utils.multiclass import type_of_target
from explainerdashboard import (
    ClassifierExplainer,
    RegressionExplainer,
    ExplainerDashboard,
)

logging.basicConfig(level=logging.INFO)

__all__ = ["build_and_log_dashboard", "load_dashboard_yaml", "dashboard_best_run", "_first_free_port", "_port_details"]


# ---------------------------------------------------------------------------
def _port_details(port: int) -> str:
    """
    Return a one-line string with PID & cmdline of the process
    listening on *port*, or '' if none / not discoverable.
    """
    for c in psutil.net_connections(kind="tcp"):
        if c.status == psutil.CONN_LISTEN and c.laddr and c.laddr.port == port:
            try:
                p = psutil.Process(c.pid)
                return f"[PID {p.pid} – {p.name()}] cmd={p.cmdline()}"
            except psutil.Error:
                return f"[PID {c.pid}] (no detail)"
    return ""

def _first_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return first free TCP port ≥ *start* on localhost."""
    for port in range(start, start + tries):
        try:
            with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
                s.settimeout(0.05)
                s.bind(("127.0.0.1", port))
                return port
        except OSError:
            # Port is in use, try next one
            continue
    raise RuntimeError("⚠️  No free ports found in range")

def _next_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return the first free TCP port ≥ *start*. (Alias for backward compatibility)"""
    return _first_free_port(start, tries)

def _port_in_use(port: int) -> bool:
    """Check if a port is already in use on any interface."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.settimeout(0.05)
        # Check both localhost and 0.0.0.0 to be thorough
        try:
            # First check localhost (127.0.0.1)
            if s.connect_ex(("127.0.0.1", port)) == 0:
                return True
            # Also check if anything is bound to all interfaces
            if s.connect_ex(("0.0.0.0", port)) == 0:
                return True
        except (socket.gaierror, OSError):
            # If we can't connect, assume port is free
            pass
        return False


# ---------------------------------------------------------------------------
# -------------------------------------------------------------- #
#  src/mlops/explainer.py (only this function changed)           #
# -------------------------------------------------------------- #
def build_and_log_dashboard(
    model: Any,
    X_test,
    y_test,
    *,
    # ---- explainer kwargs (unchanged) -------------------------
    cats: Optional[Sequence[str]] = None,
    idxs: Optional[Sequence[Any]] = None,
    descriptions: Optional[dict[str, str]] = None,
    target: Optional[str] = None,
    labels: Optional[Sequence[str]] = None,
    X_background=None,
    model_output: str = "probability",
    shap: str = "guess",
    shap_interaction: bool = True,
    simple: bool = False,
    mode: str = "dash",         # 🆕 safest default for docker
    title: str = "Model Explainer",
    # ---- infra -----------------------------------------------
    run: mlflow.ActiveRun | None = None,
    port: int | None = None,
    serve: bool = False,
    server_backend: str = "waitress",   # 🆕 waitress|gunicorn|jupyterdash
    conflict_strategy: str = "next",
    max_tries: int = 20,
    save_yaml: bool = True,
    output_dir: os.PathLike | str | None = None,
) -> Path:
    """
    Build + (optionally) serve the dashboard.

    server_backend
        'waitress'    – production WSGI server (binds 0.0.0.0)
        'gunicorn'    – spawn via subprocess (needs gunicorn installed)
        'jupyterdash' – fallback; use only for notebook demos
    """
    # ------------ build explainer (unchanged) ------------------
    problem = type_of_target(y_test)
    ExplainerCls = RegressionExplainer if problem.startswith("continuous") else ClassifierExplainer
    expl_kwargs = dict(
        cats=cats, idxs=idxs, descriptions=descriptions, target=target,
        labels=labels, X_background=X_background, model_output=model_output, shap=shap,
    )
    expl_kwargs = {k: v for k, v in expl_kwargs.items() if v is not None}
    explainer = ExplainerCls(model, X_test, y_test, **expl_kwargs)

    dash = ExplainerDashboard(
        explainer, title=title, shap_interaction=shap_interaction,
        simple=simple, mode=mode,
    )

    out_dir = Path(output_dir or "."); out_dir.mkdir(parents=True, exist_ok=True)
    html_path = out_dir / "explainer_dashboard.html"; dash.save_html(html_path); mlflow.log_artifact(str(html_path))
    if save_yaml:
        yaml = out_dir / "dashboard.yaml"; dash.to_yaml(yaml); mlflow.log_artifact(str(yaml))

    # ------------ serve ----------------------------------------
    if not serve:
        return html_path

    chosen = port or _first_free_port()
    attempts = 0
    while _port_in_use(chosen):
        if conflict_strategy == "raise":
            raise RuntimeError(f"Port {chosen} in use {_port_details(chosen)}")
        if conflict_strategy == "kill":
            pid = int((_port_details(chosen) or "PID 0").split()[1]); psutil.Process(pid).terminate()
            break
        attempts += 1
        if attempts >= max_tries:
            raise RuntimeError(f"No free port after {max_tries} tries")
        chosen += 1

    logging.info("🌐 Dashboard on http://0.0.0.0:%s via %s", chosen, server_backend)

    if server_backend == "waitress":
        dash.run(chosen, host="0.0.0.0", use_waitress=True, mode="dash")
    elif server_backend == "gunicorn":
        import subprocess, shlex
        cmd = f"gunicorn -w 3 -b 0.0.0.0:{chosen} dashboard:app"
        subprocess.Popen(shlex.split(cmd), cwd=str(out_dir))
    else:  # jupyterdash
        dash.run(chosen, host="0.0.0.0")

    return html_path




# ---------------------------------------------------------------------------
def load_dashboard_yaml(path: os.PathLike | str) -> ExplainerDashboard:
    """Reload a YAML config – unchanged but kept for public API."""
    return ExplainerDashboard.from_config(path)


# ────────────────────────────────────────────────────────────────────────────
def dashboard_best_run(metric: str = "accuracy",
                       maximize: bool = True,
                       *, port: int | None = None) -> None:
    """
    Load the *best* run (by `metric`) from the active experiment and
    launch an ExplainerDashboard **once** for that model.

    Example
    -------
    >>> from mlops.explainer import dashboard_best_run
    >>> dashboard_best_run("accuracy")      # opens http://0.0.0.0:8050
    """
    from .experiment_utils import get_best_run
    from .model_registry  import load_model_from_run
    from sklearn.datasets import load_iris
    import pandas as pd

    best = get_best_run(metric_key=metric, maximize=maximize)
    run_id = best["run_id"]
    model  = load_model_from_run(run_id)

    iris = load_iris()
    X_df  = pd.DataFrame(iris.data, columns=iris.feature_names)
    build_and_log_dashboard(
        model, X_df, iris.target,
        labels=list(iris.target_names),
        run=None, serve=True, port=port or 8050
    )


Overwriting src/backend/ML/mlops/explainer.py


In [10]:
%%writefile src/backend/ML/mlops/utils.py
import os
# Add near the top of utils.py
import sys
from pathlib import Path
import inspect

def add_project_root_to_sys_path(levels_up: int = 2) -> Path:
    """
    Ensure the repository root (default: two directories up) is on sys.path.

    Returns
    -------
    Path
        The absolute Path object pointing to the directory inserted.
    """
    try:
        here = Path(__file__).resolve()
    except NameError:           # running in Jupyter / IPython
        # Use the file of the *caller* if possible,
        # otherwise fall back to the current working directory.
        caller = inspect.stack()[1].filename
        here = Path(caller).resolve() if caller != "<stdin>" else Path.cwd()

    root = here.parents[levels_up]
    sys.path.insert(0, str(root))
    return root


_added_src_flag: bool = False          # module-level cache

def project_root() -> Path:
    """
    Return the absolute path to the repo root *without* relying on __file__.

    • If running from a .py file, use that file's parent/parent (…/src/..)
    • If running interactively (no __file__), fall back to CWD.
    """
    if "__file__" in globals():
        return Path(__file__).resolve().parent.parent
    return Path.cwd()

def ensure_src_on_path(verbose: bool = True) -> None:
    """
    Ensure <repo-root>/src is the *first* entry in sys.path exactly once.
    The verbose flag prints the helper line the first time only.
    """
    import sys
    global _added_src_flag
    root = project_root()
    src_path = root / "src"

    if str(src_path) not in sys.path:
        sys.path.insert(0, str(src_path))
        if verbose and not _added_src_flag:
            print(f"🔧 Added {src_path} to sys.path")
        _added_src_flag = True


Overwriting src/backend/ML/mlops/utils.py


In [11]:
%%writefile src/backend/ML/mlops/shapiq_utils.py
"""
SHAP-IQ (Shapley Interaction) utilities for MLflow integration.

This module provides functions to compute and log Shapley interaction values
for machine learning models. Shapley interactions help understand how features
work together to influence model predictions.
"""

from __future__ import annotations
import os
import pandas as pd
import numpy as np
import mlflow
from shapiq import TabularExplainer
from typing import Optional, Sequence, Union
import logging

logger = logging.getLogger(__name__)

import warnings
warnings.filterwarnings("ignore",
                        message="Not all budget is required due to the border-trick",
                        category=UserWarning,
                        module=r"^shapiq\.")


def compute_shapiq_interactions(
    model,
    X: pd.DataFrame,
    feature_names: Sequence[str],
    max_order: int = 2,
    budget: int = 256,
    n_samples: Optional[int] = None,
) -> pd.DataFrame:
    """
    Robust wrapper around shapiq.TabularExplainer to return a tidy DataFrame
    with Shapley-interaction values.  Handles the two public APIs:
      •  .dict_values   (mapping)
      •  .values        (np.ndarray)  →  use  .to_dict()
    """
    logger.info(
        "Computing SHAP-IQ (max_order=%s, budget=%s, n_samples=%s)",
        max_order,
        budget,
        n_samples,
    )

    X_sample = (
        X.sample(n=n_samples, random_state=42) if n_samples and len(X) > n_samples else X
    )

    explainer = TabularExplainer(
        model=model,
        data=X_sample.values,
        index="k-SII",
        max_order=max_order,
    )

    rows: list[dict[str, Any]] = []
    for i, vec in enumerate(X_sample.values):
        try:
            iv = explainer.explain(vec, budget=budget)

            # --- unify both APIs ------------------------------------------------
            if hasattr(iv, "dict_values"):                    # shapiq ≥ 0.4
                items = iv.dict_values.items()
            elif hasattr(iv, "to_dict"):                      # fallback
                items = iv.to_dict().items()
            else:
                # last resort – try attribute access
                items = dict(iv.values).items()

            for combo, val in items:
                rows.append(
                    {
                        "sample_idx": i,
                        "combination": combo,
                        "value": float(val),
                        "order": len(combo),
                        "feature_names": tuple(feature_names[j] for j in combo)
                        if combo
                        else (),
                    }
                )
        except Exception as exc:  # noqa: BLE001
            logger.warning("SHAP-IQ failed on sample %s: %s", i, exc)

    df = pd.DataFrame(rows)
    logger.info("✓ %s interaction rows computed", len(df))
    return df



def log_shapiq_interactions(
    model,
    X: pd.DataFrame,
    feature_names: Sequence[str],
    max_order: int = 2,
    top_n: int = 10,
    budget: int = 256,
    n_samples: Optional[int] = None,
    output_path: Optional[str] = None
) -> None:
    """
    Compute Shapley interaction values and log them to MLflow.

    This function:
    1. Computes interactions using compute_shapiq_interactions
    2. Logs the top N interactions as MLflow metrics
    3. Saves the full interaction table as CSV and logs as artifact

    Args:
        model: Trained sklearn-like model.
        X: DataFrame of features.
        feature_names: List of feature column names.
        max_order: Maximum interaction order (default: 2).
        top_n: Number of top interactions to log as metrics (default: 10).
        budget: Evaluation budget for interaction approximation (default: 256).
        n_samples: If provided, sample this many rows for computation.
        output_path: Optional path for CSV output (default: "shapiq_interactions.csv").
    """
    logger.info("Starting SHAP-IQ interaction logging")

    # Compute interactions
    df = compute_shapiq_interactions(
        model, X, feature_names, max_order, budget, n_samples
    )

    if df.empty:
        logger.warning("No interactions computed - skipping logging")
        return

    # Aggregate: mean absolute value per combination across all samples
    agg = (
        df.groupby(['combination', 'feature_names', 'order'])['value']
          .apply(lambda x: x.abs().mean())
          .reset_index()
          .sort_values('value', ascending=False)
    )

    # Log summary statistics
    mlflow.log_metric("shapiq_total_interactions", len(df))
    mlflow.log_metric("shapiq_unique_combinations", len(agg))
    mlflow.log_metric("shapiq_max_order", max_order)
    mlflow.log_metric("shapiq_samples_analyzed", len(X) if n_samples is None else min(n_samples, len(X)))

    # Log top N interactions as metrics
    logger.info(f"Logging top {top_n} interactions as MLflow metrics")
    for idx, row in agg.head(top_n).iterrows():
        combo = row['combination']
        feature_combo = row['feature_names']
        value = row['value']
        order = row['order']

        # Create metric name from feature names or indices
        if feature_combo:
            name = f"shapiq_order{order}_{'_x_'.join(feature_combo)}"
        else:
            name = f"shapiq_order{order}_{'_'.join(map(str, combo))}"

        # Sanitize metric name (MLflow has restrictions)
        name = name.replace(' ', '_').replace('(', '').replace(')', '').replace(',', '_')[:250]

        mlflow.log_metric(name, float(value))

    # Log order-specific summaries
    order_summary = df.groupby('order')['value'].agg(['count', 'mean', 'std']).fillna(0)
    for order_val in order_summary.index:
        mlflow.log_metric(f"shapiq_order{order_val}_count", order_summary.loc[order_val, 'count'])
        mlflow.log_metric(f"shapiq_order{order_val}_mean_abs", abs(order_summary.loc[order_val, 'mean']))
        if order_summary.loc[order_val, 'std'] > 0:
            mlflow.log_metric(f"shapiq_order{order_val}_std", order_summary.loc[order_val, 'std'])

    # Save and log full DataFrame as artifact
    output_file = output_path or "shapiq_interactions.csv"

    try:
        # Add readable feature names to the full DataFrame
        df_export = df.copy()
        df_export['feature_names_str'] = df_export['feature_names'].apply(lambda x: ' x '.join(x) if x else 'baseline')

        df_export.to_csv(output_file, index=False)
        mlflow.log_artifact(output_file)
        logger.info(f"Logged SHAP-IQ interactions artifact: {output_file}")

        # Also create and log a summary file
        summary_file = output_path.replace('.csv', '_summary.csv') if output_path else "shapiq_interactions_summary.csv"
        agg_export = agg.copy()
        agg_export['feature_names_str'] = agg_export['feature_names'].apply(lambda x: ' x '.join(x) if x else 'baseline')
        agg_export.to_csv(summary_file, index=False)
        mlflow.log_artifact(summary_file)
        logger.info(f"Logged SHAP-IQ summary artifact: {summary_file}")

    except Exception as e:
        logger.error(f"Error saving SHAP-IQ artifacts: {e}")

    logger.info("SHAP-IQ interaction logging completed")


def get_top_interactions(
    shapiq_df: pd.DataFrame,
    top_n: int = 10,
    order: Optional[int] = None
) -> pd.DataFrame:
    """
    Extract top interactions from a SHAP-IQ DataFrame.

    Args:
        shapiq_df: DataFrame returned by compute_shapiq_interactions.
        top_n: Number of top interactions to return.
        order: If provided, filter to interactions of this order only.

    Returns:
        DataFrame with top interactions, aggregated across samples.
    """
    df = shapiq_df.copy()

    if order is not None:
        df = df[df['order'] == order]

    if df.empty:
        return df

    # Aggregate and sort by absolute mean value
    agg = (
        df.groupby(['combination', 'feature_names', 'order'])['value']
          .agg(['mean', 'std', 'count'])
          .reset_index()
    )
    agg['abs_mean'] = agg['mean'].abs()
    agg = agg.sort_values('abs_mean', ascending=False)

    return agg.head(top_n)


Overwriting src/backend/ML/mlops/shapiq_utils.py


In [12]:
%%writefile src/backend/ML/examples/shapiq_demo.py
#!/usr/bin/env python3
"""
SHAP-IQ Integration Demo

This script demonstrates the new SHAP-IQ (Shapley Interaction) functionality
integrated into the MLOps pipeline. It shows how Shapley interaction values
are computed and logged alongside regular model metrics.

Usage:
    python src/examples/shapiq_demo.py
"""

from __future__ import annotations
import logging

# ─── Path setup ─────────────────────────────────────────────────────────────
from src.backend.ML.mlops.utils import add_project_root_to_sys_path
PROJECT_ROOT = add_project_root_to_sys_path(levels_up=2)  # safe in both .py and interactive :contentReference[oaicite:8]{index=8}

# ─── Imports ────────────────────────────────────────────────────────────────
from src.backend.ML.mlops.training import (
    load_and_prepare_iris_data,
    train_logistic_regression,
    train_random_forest_optimized
)
from src.backend.ML.mlops.shapiq_utils import (
    compute_shapiq_interactions,
    log_shapiq_interactions,
    get_top_interactions
)
from src.backend.ML.mlops.experiment_utils import setup_mlflow_experiment, get_best_run
import mlflow
import pandas as pd

# ─── Logging Setup ─────────────────────────────────────────────────────────
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def demo_standalone_shapiq():
    """Demonstrate standalone SHAP-IQ computation without MLflow logging."""
    print("🔬 SHAP-IQ Standalone Demo")
    print("=" * 50)

    # Load data and train a simple model
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()

    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=20, random_state=42)
    model.fit(X_train, y_train)

    print(f"✓ Trained RandomForest on {len(X_train)} samples")
    print(f"✓ Test accuracy: {model.score(X_test, y_test):.3f}")

    # Compute SHAP-IQ interactions
    X_test_df = pd.DataFrame(X_test, columns=feature_names)
    print(f"\n🧮 Computing SHAP-IQ interactions...")

    shapiq_df = compute_shapiq_interactions(
        model,
        X_test_df.head(10),  # Use subset for demo
        feature_names,
        max_order=2,
        budget=128
    )

    if not shapiq_df.empty:
        print(f"✓ Computed {len(shapiq_df)} interaction values")

        # Show top interactions
        top_interactions = get_top_interactions(shapiq_df, top_n=5)
        print(f"\n🏆 Top 5 Feature Interactions:")
        print("-" * 60)

        for idx, row in top_interactions.iterrows():
            feature_combo = ' × '.join(row['feature_names'])
            if not feature_combo:
                feature_combo = "baseline"
            print(f"  {feature_combo:30} | Order {row['order']} | {row['abs_mean']:.4f}")

        # Show order breakdown
        order_counts = shapiq_df['order'].value_counts().sort_index()
        print(f"\n📊 Interaction Order Breakdown:")
        for order, count in order_counts.items():
            if order == 0:
                print(f"  Order {order} (main effects):     {count:4d} values")
            elif order == 1:
                print(f"  Order {order} (individual):       {count:4d} values")
            elif order == 2:
                print(f"  Order {order} (pairwise):         {count:4d} values")
            else:
                print(f"  Order {order} (higher-order):     {count:4d} values")
    else:
        print("⚠️  No interactions computed (this can happen with simple models/data)")


def demo_integrated_training():
    """Demonstrate SHAP-IQ integration in the training pipeline."""
    print("\n\n🚀 SHAP-IQ Integrated Training Demo")
    print("=" * 50)

    # Setup MLflow experiment
    setup_mlflow_experiment("shapiq_demo")

    # Load data
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()
    print(f"✓ Loaded Iris dataset: {len(X_train)} train, {len(X_test)} test samples")

    # Train model with SHAP-IQ integration
    print(f"\n🤖 Training Logistic Regression with SHAP-IQ...")
    lr_run_id = train_logistic_regression(
        X_train, y_train, X_test, y_test,
        feature_names, target_names,
        run_name="lr_with_shapiq"
    )
    print(f"✓ Logistic Regression complete: {lr_run_id[:8]}")

    print(f"\n🌲 Training Random Forest with SHAP-IQ...")
    rf_run_id = train_random_forest_optimized(
        X_train, y_train, X_test, y_test,
        feature_names, target_names,
        n_trials=10,  # Reduced for demo
        run_name="rf_with_shapiq"
    )
    print(f"✓ Random Forest complete: {rf_run_id[:8]}")

    # Show logged SHAP-IQ metrics
    print(f"\n📊 SHAP-IQ Metrics from MLflow:")
    print("-" * 50)

    try:
        # Get the latest run (Random Forest)
        with mlflow.start_run(run_id=rf_run_id):
            run_data = mlflow.get_run(rf_run_id)
            metrics = run_data.data.metrics

            # Filter SHAP-IQ metrics
            shapiq_metrics = {k: v for k, v in metrics.items() if k.startswith('shapiq_')}

            if shapiq_metrics:
                print(f"Found {len(shapiq_metrics)} SHAP-IQ metrics:")
                for metric, value in sorted(shapiq_metrics.items()):
                    if 'order' in metric and 'count' not in metric:
                        print(f"  {metric:35} = {value:.6f}")
                    elif 'total' in metric or 'unique' in metric or 'max' in metric:
                        print(f"  {metric:35} = {int(value)}")
            else:
                print("  No SHAP-IQ metrics found (may take longer to compute)")

    except Exception as e:
        print(f"  Error retrieving metrics: {e}")

    # Compare models
    print(f"\n🏆 Comparing Models:")
    print("-" * 30)
    try:
        best_run = get_best_run("accuracy", maximize=True)
        run_id = best_run["run_id"]
        accuracy = best_run.get("metrics.accuracy", "N/A")
        print(f"Best model: {run_id[:8]} (accuracy: {accuracy})")

        # Check if SHAP-IQ metrics are available for best model
        shapiq_count = best_run.get("metrics.shapiq_total_interactions")
        if shapiq_count:
            print(f"SHAP-IQ interactions: {int(shapiq_count)} computed")

    except Exception as e:
        print(f"Error comparing models: {e}")


def demo_manual_shapiq_logging():
    """Demonstrate manual SHAP-IQ logging outside of training."""
    print(f"\n\n🔧 Manual SHAP-IQ Logging Demo")
    print("=" * 50)

    # Load data and train model
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()

    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)

    # Manual MLflow run with SHAP-IQ logging
    setup_mlflow_experiment("shapiq_demo")

    with mlflow.start_run(run_name="manual_shapiq_demo"):
        # Log basic metrics
        accuracy = model.score(X_test, y_test)
        mlflow.log_metric("accuracy", accuracy)

        # Log SHAP-IQ interactions
        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        print("Computing and logging SHAP-IQ interactions...")

        log_shapiq_interactions(
            model,
            X_test_df,
            feature_names,
            max_order=2,
            top_n=5,
            budget=64,
            n_samples=15  # Sample for faster computation
        )

        current_run = mlflow.active_run()
        print(f"✓ SHAP-IQ logged to run: {current_run.info.run_id[:8]}")


def main():
    """Run all SHAP-IQ demos."""
    print("🌟 SHAP-IQ Integration Demonstration")
    print("=" * 60)
    print("This demo shows how Shapley interactions are computed and logged")
    print("in the MLOps pipeline to understand feature interactions.")
    print()

    try:
        # Demo 1: Standalone computation
        demo_standalone_shapiq()

        # Demo 2: Integrated training
        demo_integrated_training()

        # Demo 3: Manual logging
        demo_manual_shapiq_logging()

        print(f"\n\n🎉 SHAP-IQ Demo Complete!")
        print("=" * 60)
        print("✓ Standalone SHAP-IQ computation")
        print("✓ Integrated training with automatic SHAP-IQ logging")
        print("✓ Manual SHAP-IQ logging")
        print()
        print("🔍 Check MLflow UI to see logged SHAP-IQ metrics and artifacts:")
        print("   - Metrics: shapiq_order1_*, shapiq_order2_*, etc.")
        print("   - Artifacts: shapiq_interactions.csv, shapiq_interactions_summary.csv")

    except Exception as e:
        logger.error(f"Demo failed: {e}")
        print(f"\n❌ Demo failed: {e}")
        print("This might be due to SHAP-IQ dependency issues or data problems.")


if __name__ == "__main__":
    main()

Overwriting src/backend/ML/examples/shapiq_demo.py


In [13]:
%%writefile src/backend/ML/examples/select_best_and_dashboard.py
#!/usr/bin/env python3
"""
Select best model and launch dashboard (training is done elsewhere).

Usage:
    python src/examples/select_best_and_dashboard.py
"""

from __future__ import annotations

from src.backend.ML.mlops.utils import add_project_root_to_sys_path
PROJECT_ROOT = add_project_root_to_sys_path()

from src.backend.ML.mlops.experiment_utils import get_best_run
from src.backend.ML.mlops.model_registry import load_model_from_run
from src.backend.ML.mlops.explainer import dashboard_best_run

# Configuration variables
METRIC = "accuracy"  # Metric to optimize (e.g., 'accuracy', 'f1')
PORT = 8050           # Port for the dashboard
MAXIMIZE = True       # Whether to maximize (True) or minimize (False) the metric

def main() -> None:
    print(f"🔍 Searching MLflow runs by {METRIC}…")

    # Retrieve the best run based on the specified metric
    best = get_best_run(metric_key=METRIC, maximize=MAXIMIZE)
    run_id = best["run_id"]
    score = best.get(f"metrics.{METRIC}", "N/A")

    print(f"🏆 Best run: {run_id[:8]} — {METRIC}: {score}")

    # Load the model from the run registry
    model = load_model_from_run(run_id)
    if model is None:
        raise RuntimeError("Model could not be loaded from registry")

    print("✓ Model loaded – launching dashboard")
    # Launch the explainer dashboard for the best model
    dashboard_best_run(METRIC, maximize=MAXIMIZE, port=PORT)

if __name__ == "__main__":
    main()


Overwriting src/backend/ML/examples/select_best_and_dashboard.py


In [None]:
%%writefile src/backend/ML/scripts/run_training.py
#!/usr/bin/env python3
"""
Simple training runner script.

Run with:
    python src/backend/ML/scripts/run_training.py
    # or inside Jupyter:
    %run src/backend/ML/scripts/run_training.py
"""

import os
import sys
import logging
import shutil
from pathlib import Path
import mlflow
from mlflow.tracking import MlflowClient

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../.."))
sys.path.insert(0, project_root)

from src.backend.ML.mlops.training import run_all_trainings
from src.backend.ML.mlops.training_bayes import train_bayes_logreg
from src.backend.ML.mlops.experiment_utils import setup_mlflow_experiment


def _is_mount(path: Path) -> bool:
    """Return True if *path* is a mount point."""
    try:
        return path.is_mount()
    except AttributeError:
        import os as _os
        return _os.path.ismount(path)


def clean_mlruns() -> None:
    """
    Remove contents of ./mlruns when it is a mount point, otherwise remove the entire directory.

    This avoids 'Device or resource busy' errors when ./mlruns is bind-mounted.
    """
    mlruns_dir = Path("mlruns")
    mlruns_dir.mkdir(exist_ok=True)

    if _is_mount(mlruns_dir):
        logger.warning("⚠️  '%s' is a mount-point – deleting contents only", mlruns_dir)
        for child in mlruns_dir.iterdir():
            if child.name == ".trash":
                continue  # preserve special folder if present
            if child.is_dir():
                shutil.rmtree(child, ignore_errors=True)
            else:
                try:
                    child.unlink(missing_ok=True)
                except TypeError:
                    # Python < 3.8 compatibility
                    if child.exists():
                        child.unlink()
    else:
        logger.info("🧹 Removing %s entirely", mlruns_dir)
        shutil.rmtree(mlruns_dir, ignore_errors=True)
        mlruns_dir.mkdir(exist_ok=True)


def promote_to_production(model_name: str) -> None:
    """Promote *latest* version of <model_name> to Production if it exists."""
    client = MlflowClient()
    versions = client.search_model_versions(f"name='{model_name}'")

    if not versions:
        logger.warning("⚠️  No versions for %s – skipping promotion", model_name)
        return  # graceful exit when model not yet registered

    # pick numerically highest version (latest)
    latest = max(versions, key=lambda v: int(v.version))

    # Archive current Production versions
    for mv in versions:
        if mv.current_stage == "Production":
            client.transition_model_version_stage(
                name=model_name,
                version=mv.version,
                stage="Archived",
            )

    client.transition_model_version_stage(
        name=model_name,
        version=latest.version,
        stage="Production",
    )
    logger.info("✅ Promoted %s version %s to Production", model_name, latest.version)


def needs_training(model_name: str, min_acc: float = 0.9) -> bool:
    """Return True if <model_name> needs (re)training.

    Logic:
    1. No *Production* version exists ⇒ train.
    2. Accuracy metric below *min_acc* ⇒ train.
    """
    client = MlflowClient()
    try:
        prod_versions = client.get_latest_versions(model_name, stages=["Production"])
    except Exception as err:
        logger.warning("⚠️  Could not query model %s – assuming training needed (%s)", model_name, err)
        return True

    if not prod_versions:
        logger.info("ℹ️  No Production version for %s", model_name)
        return True

    run_id = prod_versions[0].run_id
    run = client.get_run(run_id)
    acc = run.data.metrics.get("accuracy", 0.0)
    logger.info("🔎 %s v%s accuracy=%.3f", model_name, prod_versions[0].version, acc)
    return acc < min_acc


def main() -> None:
    """Run training pipelines only if required."""
    try:
        logger.info("🚀 Training pipeline started (cwd=%s)", Path.cwd())

        # Clean MLflow runs only in CI to ensure reproducibility
        if os.getenv("CI") == "1":
            clean_mlruns()

        # Ensure experiment / tracking URI
        setup_mlflow_experiment()

        # 1️⃣  Iris models --------------------------------------------------
        if needs_training("iris_random_forest") or needs_training("iris_logreg"):
            logger.info("🛠  Training Iris models…")
            run_all_trainings(n_trials=5)
            promote_to_production("iris_random_forest")
            promote_to_production("iris_logreg")
        else:
            logger.info("✅ Iris models meet accuracy threshold – skipping training")

        # 2️⃣  Breast Cancer Bayesian model -------------------------------
        if needs_training("breast_cancer_bayes", min_acc=0.93):
            logger.info("🛠  Training Breast Cancer Bayesian model…")
            train_bayes_logreg(draws=50, tune=25)
            promote_to_production("breast_cancer_bayes")
        else:
            logger.info("✅ Cancer model meets accuracy threshold – skipping training")

        logger.info("🏁 Training script completed successfully")

    except Exception as e:
        logger.error("❌ Training failed: %s", e)
        sys.exit(1)


if __name__ == "__main__":
    main()


Overwriting src/backend/ML/scripts/run_training.py


In [15]:
%%writefile src/backend/ML/examples/iris_classification_example.py
#!/usr/bin/env python3
"""
Iris Classification Example (argparse-free, notebook-safe).

Configuration:
    • export EXPLAINER_DASHBOARD=1   # launch dashboard
    • export EXPLAINER_PORT=8150     # optional port override
"""

from __future__ import annotations
import os
import logging

from src.backend.ML.mlops.utils import ensure_src_on_path
ensure_src_on_path()

from src.backend.ML.mlops.training import (
    load_and_prepare_iris_data,
    train_logistic_regression,
    train_random_forest_optimized,
    compare_models,
)
from src.backend.ML.mlops.model_registry import load_model_from_run
from src.backend.ML.mlops.experiment_utils import get_best_run

logging.basicConfig(level=logging.INFO)


def _bool_env(var: str, default: bool = False) -> bool:
    v = os.getenv(var)
    return default if v is None else v.lower() in {"1", "true", "yes"}


def main(*, dashboard: bool = False, dashboard_port: int | None = None) -> None:
    print("🌸 Iris Classification with MLflow\n" + "=" * 50)

    # 1 Load data ------------------------------------------------------------
    X_train, X_test, y_train, y_test, feat_names, tgt_names, _ = (
        load_and_prepare_iris_data()
    )
    print(f"✓ Training samples: {len(X_train)} | Test: {len(X_test)}")

    # 2 Logistic Regression --------------------------------------------------
    lr_run = train_logistic_regression(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        run_name="lr_baseline",
        register=True,
    )
    print(f"✓ Logistic run {lr_run[:8]}")

    # 3 Random Forest + Optuna ----------------------------------------------
    rf_run = train_random_forest_optimized(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        n_trials=20,
        run_name="rf_optimized",
        register=True,
    )
    print(f"✓ RF run {rf_run[:8]}")

    # 4 Compare & test best --------------------------------------------------
    compare_models()
    best = get_best_run()
    mdl = load_model_from_run(best["run_id"])
    if mdl is not None:
        acc = (mdl.predict(X_test) == y_test).mean()
        print(f"🏆 Best model accuracy: {acc:.4f}")
    else:
        print("❌ Could not load best model")

    if dashboard:
        port = dashboard_port or int(os.getenv("EXPLAINER_PORT", "8050"))
        print(f"\n🚀 ExplainerDashboard running on http://localhost:{port}")
        # Import and run dashboard for best model
        from src.backend.ML.mlops.explainer import dashboard_best_run
        dashboard_best_run("accuracy", port=port)


if __name__ == "__main__":
    main(
        dashboard=_bool_env("EXPLAINER_DASHBOARD", False),
        dashboard_port=int(os.getenv("EXPLAINER_PORT", "8050")),
    )

Overwriting src/backend/ML/examples/iris_classification_example.py


In [16]:
%%writefile src/backend/ML/model_api/__init__.py
# Model API package

Overwriting src/backend/ML/model_api/__init__.py


In [None]:
# %%writefile src/backend/ML/model_api/main.py
"""
FastAPI service for Iris and Breast-Cancer predictions.

This module provides a dual-dataset prediction service that:
1. Loads/trains point-estimate models for Iris (Random Forest)
2. Loads/trains Bayesian models for Breast Cancer (PyMC)
3. Exposes them at /predict/iris/{algo} and /predict/cancer/bayes endpoints
4. Includes an embedded smoke test
"""

from __future__ import annotations
import os
import logging
from typing import List, Dict, Any, Optional, Sequence
from contextlib import asynccontextmanager
import numpy as np
import pandas as pd
import mlflow
from fastapi import FastAPI, HTTPException, APIRouter, Path, Body, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field, validator, field_validator

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Global service instance
service = None

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Load models once at startup."""
    global service
    service = _ModelService()
    try:
        service.load_all()
        yield
    finally:
        service = None

# Initialize FastAPI app with metadata
app = FastAPI(
    title="Iris & Cancer Prediction Service",
    description="Dual-dataset prediction service with model switching and metrics.",
    version="2.0.0",
    lifespan=lifespan
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Create routers
iris_router = APIRouter(prefix="/predict/iris", tags=["Iris Models"])
cancer_router = APIRouter(prefix="/predict/cancer", tags=["Cancer Models"])
metrics_router = APIRouter(prefix="/models", tags=["Model Metrics"])

class IrisFeatures(BaseModel):
    """Single Iris-flower measurement."""
    sepal_length: float = Field(..., ge=4.0, le=8.0, description="Sepal length (cm)")
    sepal_width: float = Field(..., ge=2.0, le=4.5, description="Sepal width (cm)")
    petal_length: float = Field(..., ge=1.0, le=7.0, description="Petal length (cm)")
    petal_width: float = Field(..., ge=0.1, le=2.5, description="Petal width (cm)")

    class Config:
        json_schema_extra = {
            "example": {
                "sepal_length": 5.1,
                "sepal_width": 3.5,
                "petal_length": 1.4,
                "petal_width": 0.2
            }
        }

class CancerFeatures(BaseModel):
    """30-dimensional input in **exact** `load_breast_cancer().feature_names` order."""
    values: List[float] = Field(
        ..., min_items=30, max_items=30,
        description="All 30 features from scikit-learn breast-cancer dataset."
    )

    @field_validator("values")
    def _exactly_30(cls, v: List[float]) -> List[float]:
        if len(v) != 30:
            raise ValueError("Must provide exactly 30 features")
        return v

class IrisPredictRequest(BaseModel):
    rows: List[IrisFeatures]

class CancerPredictRequest(BaseModel):
    rows: List[CancerFeatures]
    posterior_samples: Optional[int] = Field(None, description="n posterior draws")

class PredictResponse(BaseModel):
    predictions: List[float]
    model_used: str
    uncertainty: Optional[List[Dict[str, float]]] = None

class CancerRetrainRequest(BaseModel):
    """Hyper-parameters for Bayesian retraining via PyMC."""
    draws: int = Field(500, ge=100, le=5_000)
    tune: int = Field(250, ge=50, le=2_000)
    target_accept: float = Field(0.9, ge=0.5, le=0.99)

class _ModelService:
    """Caches all Production models (iris_rf, iris_logreg, cancer_bayes)."""

    def __init__(self) -> None:
        self.iris: dict[str, mlflow.pyfunc.PyFuncModel] = {}
        self.cancer: mlflow.pyfunc.PyFuncModel | None = None
        self.model_info: dict[str, dict[str, Any]] = {}

    def _load_prod(self, name: str) -> mlflow.pyfunc.PyFuncModel:
        """Load *Production* version of *name* from the MLflow registry.

        Works with both HTTP and local *FileStore* back-ends because it relies
        on ``MlflowClient.get_latest_versions`` instead of the more restrictive
        SQL-like filter syntax that is **not** implemented for the file store.
        When the model (or the registry itself) is missing the helper will –
        if the *DEV_AUTOTRAIN* environment variable is set – auto-train a quick
        baseline model and promote it to *Production* so subsequent startups
        do not repeat the bootstrap step.
        """
        from mlflow.tracking import MlflowClient  # local import to keep top clean
        client = MlflowClient()

        def _promote_latest(model_name: str) -> None:
            """Promote the latest *None*-stage version to *Production*."""
            latest = client.get_latest_versions(model_name, stages=["None"])
            if latest:
                ver = latest[0].version
                client.transition_model_version_stage(
                    name=model_name,
                    version=ver,
                    stage="Production",
                    archive_existing_versions=True,
                )
                logger.info("🔄 Promoted %s version %s → Production", model_name, ver)

        # ------------------------------------------------------------------
        # 1️⃣  Try to fetch an existing Production version ------------------
        # ------------------------------------------------------------------
        try:
            versions = client.get_latest_versions(name, stages=["Production"])
            if versions:
                mv = versions[0]
                run = client.get_run(mv.run_id)
                self.model_info[name] = {
                    "version": mv.version,
                    "stage": mv.current_stage,
                    "run_id": mv.run_id,
                    "creation_timestamp": mv.creation_timestamp,
                    "metrics": run.data.metrics,
                }
                return mlflow.pyfunc.load_model(f"models:/{name}/{mv.current_stage}")
            raise RuntimeError("No Production version found")

        except Exception as exc:
            # ------------------------------------------------------------------
            # 2️⃣  Auto-train in development mode if enabled ---------------------
            # ------------------------------------------------------------------
            if os.getenv("DEV_AUTOTRAIN") != "1":
                logger.error("Failed to load model %s: %s", name, exc)
                raise HTTPException(
                    status_code=503,
                    detail=f"Model {name} missing or broken – set DEV_AUTOTRAIN=1 to bootstrap automatically: {exc}",
                ) from exc

            logger.warning("⚠️  %s not found – auto-training baseline model (DEV_AUTOTRAIN)", name)

            try:
                if name in {"iris_random_forest", "iris_logreg"}:
                    # This helper trains **both** iris models – inexpensive (<2 s)
                    from src.backend.ML.mlops.training import run_all_trainings
                    run_all_trainings(n_trials=3)  # fast bootstrap
                    _promote_latest(name)

                elif name == "breast_cancer_bayes":
                    # Attempt the full PyMC training – may fail if PyMC absent
                    try:
                        from src.backend.ML.mlops.training_bayes import train_bayes_logreg
                        train_bayes_logreg(draws=50, tune=25)
                        _promote_latest(name)
                    except Exception as bayes_err:
                        logger.warning("PyMC training failed (%s) – falling back to sklearn logistic regression", bayes_err)
                        # Lightweight sklearn fallback so the endpoint still works
                        from sklearn.datasets import load_breast_cancer
                        from sklearn.linear_model import LogisticRegression
                        import pandas as pd
                        X, y = load_breast_cancer(return_X_y=True, as_frame=True)
                        model = LogisticRegression(max_iter=1000).fit(X, y)

                        class _SklearnPyFunc(mlflow.pyfunc.PythonModel):
                            def __init__(self, mdl):
                                self._mdl = mdl

                            def predict(self, context, model_input):  # noqa: D401
                                import pandas as _pd
                                if not isinstance(model_input, _pd.DataFrame):
                                    model_input = _pd.DataFrame(model_input)
                                proba = self._mdl.predict_proba(model_input)[:, 1]
                                return proba

                        with mlflow.start_run(run_name="bootstrap_breast_cancer_lr"):
                            mlflow.pyfunc.log_model(
                                "model",
                                python_model=_SklearnPyFunc(model),
                                registered_model_name=name,
                                input_example=X.iloc[:5],
                            )
                        _promote_latest(name)
                else:
                    raise RuntimeError(f"Unknown model name '{name}' for auto-train path")

                # Retry load after successful bootstrap
                return mlflow.pyfunc.load_model(f"models:/{name}/Production")

            except Exception as tr_err:
                logger.error("Auto-training fallback failed for %s: %s", name, tr_err)
                raise HTTPException(status_code=503, detail=str(tr_err)) from tr_err

    def load_all(self) -> None:
        """Call once at startup (lifespan)."""
        try:
            # Ensure the service points to the same MLflow backend used during training
            mlflow_uri = os.getenv("MLFLOW_TRACKING_URI", "file:./mlruns_local")
            mlflow.set_tracking_uri(mlflow_uri)
            logger.info("MLflow tracking URI set to %s", mlflow_uri)

            logger.info("Loading Iris Random Forest model...")
            self.iris["rf"] = self._load_prod("iris_random_forest")

            logger.info("Loading Iris Logistic Regression model...")
            self.iris["logreg"] = self._load_prod("iris_logreg")

            logger.info("Loading Breast Cancer Bayesian model...")
            self.cancer = self._load_prod("breast_cancer_bayes")  # Fixed model name

            logger.info("✅ All models loaded successfully")

        except Exception as e:
            logger.error(f"Failed to load models: {e}")
            raise

@app.get("/health")
async def health():
    """Health check endpoint with detailed model information."""
    if service is None:
        return {
            "status": "initializing",
            "message": "Service is starting up"
        }

    model_status = {
        "iris_rf": {
            "loaded": service.iris.get("rf") is not None,
            "info": service.model_info.get("iris_random_forest", {})
        },
        "iris_logreg": {
            "loaded": service.iris.get("logreg") is not None,
            "info": service.model_info.get("iris_logreg", {})
        },
        "cancer_bayes": {
            "loaded": service.cancer is not None,
            "info": service.model_info.get("breast_cancer_bayes", {})
        }
    }

    all_loaded = all(m["loaded"] for m in model_status.values())

    return {
        "status": "healthy" if all_loaded else "degraded",
        "models": model_status,
        "mlflow_tracking_uri": mlflow.get_tracking_uri()
    }

@app.head("/health", include_in_schema=False)
async def health_head():
    """HEAD variant of /health for readiness probes (no body)."""
    return await health()

@iris_router.post(
    "/{algo}",
    summary="Iris ‣ Random-Forest or Logistic-Regression",
    response_model=PredictResponse,
    description="Set **algo** to `rf` or `logreg`."
)
async def predict_iris(
    algo: str = Path(..., pattern="^(rf|logreg)$"),
    req: IrisPredictRequest = Body(...)
):
    """Make predictions using the specified Iris model."""
    model = service.iris.get(algo)
    if model is None:
        raise HTTPException(503, f"Iris model '{algo}' not loaded")
    X = pd.DataFrame([r.dict() for r in req.rows])
    preds = model.predict(X)
    return PredictResponse(predictions=preds.tolist(), model_used=f"iris_{algo}")

@cancer_router.post(
    "/bayes",
    summary="Cancer ‣ Bayesian Logistic-Regression",
    response_model=PredictResponse
)
async def predict_cancer(req: CancerPredictRequest):
    """Make predictions using the Bayesian Cancer model."""
    if service.cancer is None:
        raise HTTPException(503, "Cancer model not loaded")

    # Convert features to DataFrame
    X = pd.DataFrame([r.values for r in req.rows])

    # Get predictions
    preds = service.cancer.predict(X)

    # Add uncertainty estimates if requested
    uncertainty = None
    if req.posterior_samples:
        # Simple bootstrap for uncertainty (illustrative)
        samples = []
        for _ in range(req.posterior_samples):
            boot_idx = np.random.choice(len(X), size=len(X))
            boot_preds = service.cancer.predict(X.iloc[boot_idx])
            samples.append(boot_preds)

        # Calculate confidence intervals
        samples = np.array(samples)
        uncertainty = [
            {
                "lower": float(np.percentile(samples[:, i], 2.5)),
                "upper": float(np.percentile(samples[:, i], 97.5))
            }
            for i in range(len(X))
        ]

    return PredictResponse(
        predictions=preds.tolist(),
        model_used="cancer_bayes",
        uncertainty=uncertainty
    )

@app.post(
    "/train/cancer/bayes/retrain",
    summary="Start background Bayesian retraining and promotion",
    response_model=dict,
)
async def retrain_cancer_bayes(
    req: CancerRetrainRequest,
    background: BackgroundTasks,
):
    """Spawn a background task that retrains the Bayesian Cancer model.

    The task runs **non-blocking** so the HTTP request returns instantly.
    It will:
      1. call :func:`train_bayes_logreg` with given hyper-params,
      2. register the resulting model in MLflow, and
      3. promote the newest version to *Production*.
    """
    def _job() -> None:
        import mlflow
        from src.backend.ML.mlops.training_bayes import train_bayes_logreg
        from mlflow.tracking import MlflowClient
        global service  # use the already initialised cache

        # Ensure we talk to the same MLflow store as the service
        mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "file:./mlruns_local"))

        # 1️⃣  Fit the model – this may take a while
        try:
            train_bayes_logreg(
                draws=req.draws,
                tune=req.tune,
                target_accept=req.target_accept,
                register=True,
            )
        except Exception as exc:  # pragma: no cover – background context
            logger.error("Retrain task failed: %s", exc)
            return

        # 2️⃣  Promote the latest *None* stage version
        client = MlflowClient()
        latest = client.get_latest_versions("breast_cancer_bayes", stages=["None"])
        if latest:
            mv = latest[0]
            client.transition_model_version_stage(
                name="breast_cancer_bayes",
                version=mv.version,
                stage="Production",
                archive_existing_versions=True,
            )
            logger.info("✅ Promoted breast_cancer_bayes v%s → Production", mv.version)

        # 3️⃣  Hot-reload in-memory cache so /health reflects the new version
        try:
            service.load_all()
        except Exception as exc:
            logger.warning("Service reload after retrain failed: %s", exc)

    background.add_task(_job)
    return {"status": "started", "detail": "Bayesian retraining job running in background"}

@metrics_router.get(
    "/iris/metrics",
    summary="Live metrics for Iris models (Production stage)"
)
def iris_metrics():
    """Get latest metrics for both Iris models in Production."""
    client = mlflow.tracking.MlflowClient()
    out: dict[str, dict[str, Any]] = {}
    for name in ("iris_random_forest", "iris_logreg"):
        mv = client.get_latest_versions(name, stages=["Production"])[0]
        run = client.get_run(mv.run_id)
        out[name] = {
            "version": mv.version,
            "accuracy": run.data.metrics.get("accuracy"),
            "f1_macro": run.data.metrics.get("f1_macro")
        }
    return out

@metrics_router.post("/reload", summary="Reload all Production models into memory")
def reload_models():
    """Force reloading cache after external changes (e.g. new Production model)."""
    service.load_all()
    return {"status": "reloaded"}

# Wire up routers
app.include_router(iris_router)
app.include_router(cancer_router)
app.include_router(metrics_router)

# ─── Smoke Test ────────────────────────────────────────────────────────────
if __name__ == "__main__":
    import asyncio
    try:
        import nest_asyncio  # type: ignore
        nest_asyncio.apply()
    except ImportError:
        pass  # nest_asyncio is optional; continue without if unavailable

    # Local import to avoid affecting prod runtime
    from fastapi.testclient import TestClient
    from sklearn.datasets import load_iris, load_breast_cancer
    import numpy as np

    async def test_app():
        """Run smoke tests with proper lifespan handling."""
        async with app.router.lifespan_context(app):
            client = TestClient(app)

            # Test health endpoint
            print("\n🏥 Testing /health endpoint...")
            health = client.get("/health").json()
            print(f"Response: {health}")

            # Test Iris endpoint with first Setosa sample
            print("\n🌸 Testing /predict/iris/rf endpoint with first Setosa sample...")
            iris = load_iris()
            setosa_sample = iris.data[0]  # First sample is Setosa
            iris_request = {
                "rows": [{
                    "sepal_length": float(setosa_sample[0]),
                    "sepal_width": float(setosa_sample[1]),
                    "petal_length": float(setosa_sample[2]),
                    "petal_width": float(setosa_sample[3])
                }]
            }
            iris_response = client.post("/predict/iris/rf", json=iris_request).json()
            print(f"True class: Setosa")
            print(f"Features: {setosa_sample}")
            print(f"Prediction: {iris_response}")

            # Test Cancer endpoint with first malignant sample
            print("\n🔬 Testing /predict/cancer/bayes endpoint with first malignant sample...")
            cancer = load_breast_cancer()
            malignant_idx = np.where(cancer.target == 1)[0][0]
            malignant_sample = cancer.data[malignant_idx]
            cancer_request = {
                "rows": [{
                    "values": [float(x) for x in malignant_sample]
                }],
                "posterior_samples": 100
            }
            cancer_response = client.post("/predict/cancer/bayes", json=cancer_request).json()
            print(f"True class: Malignant")
            print(f"Features: {malignant_sample[:5]}...")  # Show first 5 features
            print(f"Prediction with uncertainty: {cancer_response}")

            # Test metrics endpoint
            print("\n📊 Testing /models/iris/metrics endpoint...")
            metrics = client.get("/models/iris/metrics").json()
            print(f"Metrics: {metrics}")

    # Detect running loop (e.g. Jupyter) and execute accordingly
    try:
        _loop = asyncio.get_running_loop()
    except RuntimeError:
        _loop = None

    if _loop and _loop.is_running():
        _loop.run_until_complete(test_app())
    else:
        asyncio.run(test_app())


INFO:__main__:MLflow tracking URI set to http://mlflow:5000
INFO:__main__:Loading Iris Random Forest model...
  versions = client.get_latest_versions(name, stages=["Production"])
INFO:__main__:Loading Iris Logistic Regression model...
  versions = client.get_latest_versions(name, stages=["Production"])
INFO:__main__:Loading Breast Cancer Bayesian model...
  versions = client.get_latest_versions(name, stages=["Production"])
INFO:__main__:✅ All models loaded successfully
INFO:httpx:HTTP Request: GET http://testserver/health "HTTP/1.1 200 OK"
/tmp/ipykernel_32624/252283631.py:303: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  X = pd.DataFrame([r.dict() for r in req.rows])
INFO:httpx:HTTP Request: POST http://testserver/predict/iris/rf "HTTP/1.1 200 OK"
  return (1 / (1 + np.exp(-logits))).mean(0)
INFO:httpx:HTTP Request


🏥 Testing /health endpoint...
Response: {'status': 'healthy', 'models': {'iris_rf': {'loaded': True, 'info': {'version': '1', 'stage': 'Production', 'run_id': '510396fab8ac4cdda4fdf2bc4841fad1', 'creation_timestamp': 1751734398406, 'metrics': {'accuracy': 1.0, 'precision_macro': 1.0, 'recall_macro': 1.0, 'f1_macro': 1.0, 'precision_0': 1.0, 'recall_0': 1.0, 'f1_0': 1.0, 'support_0': 10.0, 'precision_1': 1.0, 'recall_1': 1.0, 'f1_1': 1.0, 'support_1': 9.0, 'precision_2': 1.0, 'recall_2': 1.0, 'f1_2': 1.0, 'support_2': 11.0, 'roc_auc_ovr_weighted': 1.0, 'log_loss': 2.2204460492503136e-16, 'mcc': 1.0, 'best_accuracy': 1.0, 'shapiq_total_interactions': 330.0, 'shapiq_unique_combinations': 11.0, 'shapiq_max_order': 2.0, 'shapiq_samples_analyzed': 30.0, 'shapiq_order0_': 0.29868026676967113, 'shapiq_order1_petal_length_cm': 0.11833661278222699, 'shapiq_order1_petal_width_cm': 0.11678626826884603, 'shapiq_order2_petal_length_cm_x_petal_width_cm': 0.0674760478235455, 'shapiq_order2_sepal_leng

INFO:httpx:HTTP Request: GET http://testserver/models/iris/metrics "HTTP/1.1 200 OK"


Metrics: {'iris_random_forest': {'version': '1', 'accuracy': 1.0, 'f1_macro': 1.0}, 'iris_logreg': {'version': '1', 'accuracy': 1.0, 'f1_macro': 1.0}}
