In [4]:
%%writefile src/mlops/config.py
"""Central MLflow configuration for consistent experiment tracking."""
import os

# ─── MLflow configuration ──────────────────────────────────────────────────
# Use Docker service-name so this works inside the compose network
# Falls back to local file store for standalone usage
TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000")
EXPERIMENT_NAME = "iris_classification"
ARTIFACT_ROOT = os.getenv("MLFLOW_ARTIFACT_ROOT", "./mlruns")

# ─── Model registry ────────────────────────────────────────────────────────
MODEL_NAME = "iris_classifier"
MODEL_STAGE_PRODUCTION = "Production"
MODEL_STAGE_STAGING = "Staging"

# ─── Dataset defaults ──────────────────────────────────────────────────────
RANDOM_STATE = 42
TEST_SIZE = 0.2 



Overwriting src/mlops/config.py


In [5]:
%%writefile src/mlops/logging.py
"""
Extended MLflow logging helpers.
"""
from __future__ import annotations
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Any, Sequence, Optional
from matplotlib.figure import Figure
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
    roc_auc_score,
    log_loss,
    matthews_corrcoef,
)


def _log_fig(fig: Figure, name: str) -> None:
    """Log a Matplotlib figure directly without temp files."""
    mlflow.log_figure(fig, artifact_file=name)
    plt.close(fig)


def log_full_metrics(
    y_true, y_pred, *, label_list: Optional[Sequence[int]] = None, prefix: str = ""
) -> Dict[str, float]:
    """
    Compute & log *all* useful classification metrics.

    Returns a flat dict so callers can unit-test easily.
    
    Args:
        y_true: True labels
        y_pred: Predicted labels  
        label_list: Optional list of label integers (for compatibility)
        prefix: Optional prefix for metric names
        
    Returns:
        Dictionary of all calculated metrics
    """
    # (1) macro metrics ------------------------------------------------------
    macro = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division="warn"
    )
    metrics: Dict[str, float] = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision_macro": float(macro[0]),
        "recall_macro": float(macro[1]),
        "f1_macro": float(macro[2]),
    }

    # (2) per-class ----------------------------------------------------------
    report = classification_report(y_true, y_pred, output_dict=True, zero_division="warn")
    if isinstance(report, dict):
        for klass, d in report.items():
            if isinstance(klass, str) and klass.isdigit():  # skip 'accuracy', 'macro avg', …
                k = int(klass)
                if isinstance(d, dict):
                    precision_val = d.get("precision", 0.0)
                    recall_val = d.get("recall", 0.0)
                    f1_val = d.get("f1-score", 0.0)
                    support_val = d.get("support", 0.0)
                    
                    metrics[f"precision_{k}"] = float(precision_val) if precision_val is not None else 0.0
                    metrics[f"recall_{k}"] = float(recall_val) if recall_val is not None else 0.0
                    metrics[f"f1_{k}"] = float(f1_val) if f1_val is not None else 0.0
                    metrics[f"support_{k}"] = float(support_val) if support_val is not None else 0.0

    # (3) derived – try/except so we never crash ----------------------------
    try:
        metrics["roc_auc_ovr_weighted"] = roc_auc_score(
            y_true, pd.get_dummies(y_pred), multi_class="ovr", average="weighted"
        )
    except Exception:
        pass
    try:
        metrics["log_loss"] = log_loss(y_true, pd.get_dummies(y_pred))
    except Exception:
        pass
    try:
        metrics["mcc"] = matthews_corrcoef(y_true, y_pred)
    except Exception:
        pass

    # (4) optional prefix for nested CV, etc. -------------------------------
    if prefix:
        metrics = {f"{prefix}_{k}": v for k, v in metrics.items()}

    mlflow.log_metrics(metrics)
    return metrics


def log_confusion_matrix(
    y_true, y_pred, *, class_names: Optional[Sequence[str]] = None, artifact_name: str = "confusion_matrix.png"
) -> None:
    """Create + log confusion matrix using mlflow.log_figure."""
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=class_names if class_names is not None else "auto",
        yticklabels=class_names if class_names is not None else "auto",
        ax=ax,
    )
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Confusion Matrix")
    _log_fig(fig, artifact_name)


def log_feature_importance(
    feature_names: list, importances: list, artifact_name: str = "feature_importance.png"
):
    """Bar plot logged via mlflow.log_figure (no disk I/O)."""
    imp_df = (
        pd.DataFrame({"feature": feature_names, "importance": importances})
        .sort_values("importance")
    )
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.barplot(data=imp_df, x="importance", y="feature", ax=ax)
    ax.set_title("Feature Importances")
    _log_fig(fig, artifact_name)


def log_parameters(params: Dict[str, Any]) -> None:
    """
    Log parameters to MLflow.
    
    Args:
        params: Dictionary of parameter names and values
    """
    mlflow.log_params(params)


def log_dataset_info(X_train, X_test, y_train, y_test) -> None:
    """
    Log dataset information as parameters.
    
    Args:
        X_train: Training features
        X_test: Test features
        y_train: Training labels
        y_test: Test labels
    """
    dataset_params = {
        "train_size": len(X_train),
        "test_size": len(X_test),
        "n_features": (X_train.shape[1] if hasattr(X_train, "shape") else len(X_train[0])),
        "n_classes": (len(set(y_train)) if hasattr(y_train, "__iter__") else 1),
    }

    log_parameters(dataset_params)


# Legacy compatibility - keep old function name as alias
log_model_metrics = log_full_metrics 

Overwriting src/mlops/logging.py


In [6]:
%%writefile src/mlops/experiment.py

"""Training utilities with MLflow integration."""
import mlflow
import optuna
from typing import Optional
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from .config import RANDOM_STATE, TEST_SIZE
from .experiment_utils import setup_mlflow_experiment

# Re-export for convenience
__all__ = ['setup_mlflow_experiment', 'load_and_prepare_iris_data',
           'train_logistic_regression', 'train_random_forest_with_optimization']
from .logging import (
    log_model_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)


# ─────────────────────────── src/mlops/training.py (excerpt) ───────────────
def load_and_prepare_iris_data(
    test_size: float = TEST_SIZE,
    random_state: int = RANDOM_STATE
) -> DatasetTuple:
    iris = load_iris()
    X, y = iris.data, iris.target
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    # ✅ re-wrap as DataFrame so feature names propagate downstream
    import pandas as pd
    feat_names = iris.feature_names
    X_train_df = pd.DataFrame(X_train_scaled, columns=feat_names)
    X_test_df  = pd.DataFrame(X_test_scaled,  columns=feat_names)

    return (X_train_df, X_test_df, y_train, y_test,
            feat_names, list(iris.target_names), scaler)



Overwriting src/mlops/experiment.py


In [7]:
%%writefile src/mlops/model_registry.py
"""MLflow model registry utilities."""
import mlflow
from typing import Optional, Dict, Any
from .config import MODEL_NAME, MODEL_STAGE_PRODUCTION


def register_model(model_uri: str, 
                   model_name: Optional[str] = None,
                   description: Optional[str] = None) -> str:
    """
    Register a model in the MLflow model registry using the fluent client API.
    
    Args:
        model_uri: URI of the model to register
        model_name: Name for the registered model
        description: Optional description
        
    Returns:
        Model version
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        # Create registered model if it doesn't exist
        if not client.get_registered_model(name, silent=True):
            client.create_registered_model(name)
            print(f"Created new registered model: {name}")
            
        # Create new version
        mv = client.create_model_version(
            name=name,
            source=model_uri,
            description=description
        )
        print(f"Created version {mv.version} of model {name}")
        return mv.version
        
    except Exception as e:
        print(f"Failed to register model: {e}")
        raise


def promote_model_to_stage(model_name: Optional[str] = None,
                           version: Optional[str] = None,
                           stage: str = MODEL_STAGE_PRODUCTION) -> None:
    """
    Promote a model version to a specific stage using the fluent client.
    
    Args:
        model_name: Name of the registered model
        version: Version to promote (if None, promotes latest)
        stage: Target stage
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        # Get latest version if not specified
        if version is None:
            latest = client.get_latest_versions(name, stages=["None"])
            if not latest:
                raise ValueError(f"No versions found for model {name}")
            version = latest[0].version
        
        # Transition to stage
        client.transition_model_version_stage(
            name=name,
            version=version,
            stage=stage
        )
        print(f"Promoted model {name} version {version} to {stage}")
        
    except Exception as e:
        print(f"Failed to promote model: {e}")
        raise


def load_model_from_registry(model_name: Optional[str] = None,
                             stage: str = MODEL_STAGE_PRODUCTION):
    """
    Load a model from the registry by name and stage.
    
    Args:
        model_name: Name of the registered model
        stage: Stage to load from
        
    Returns:
        Loaded model
    """
    name = model_name or MODEL_NAME
    model_uri = f"models:/{name}/{stage}"
    
    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model {name} from {stage} stage")
        return model
    except Exception as e:
        print(f"Failed to load model from registry: {e}")
        raise


def load_model_from_run(run_id: str, artifact_path: str = "model"):
    """
    Load a model from a specific run.
    
    Args:
        run_id: MLflow run ID
        artifact_path: Path to the model artifact
        
    Returns:
        Loaded model
    """
    model_uri = f"runs:/{run_id}/{artifact_path}"
    
    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model from run {run_id}")
        return model
    except Exception as e:
        print(f"Failed to load model from run: {e}")
        raise


def get_model_info(model_name: Optional[str] = None,
                   stage: str = MODEL_STAGE_PRODUCTION) -> Dict[str, Any]:
    """
    Get information about a registered model using the fluent client.
    
    Args:
        model_name: Name of the registered model
        stage: Stage to get info for
        
    Returns:
        Model information dictionary
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        model_version = client.get_latest_versions(name, stages=[stage])[0]
        
        return {
            "name": model_version.name,
            "version": model_version.version,
            "stage": model_version.current_stage,
            "description": model_version.description,
            "creation_timestamp": model_version.creation_timestamp,
            "last_updated_timestamp": model_version.last_updated_timestamp,
            "run_id": model_version.run_id
        }
    except Exception as e:
        print(f"Failed to get model info: {e}")
        raise 


Overwriting src/mlops/model_registry.py


In [8]:
%%writefile src/mlops/training.py
"""Training utilities with MLflow integration."""
import mlflow
from mlflow import sklearn  # type: ignore
from mlflow import models  # type: ignore
import optuna
from optuna.integration.mlflow import MLflowCallback
import numpy as np
import pandas as pd
from typing import Optional, Tuple, List, Callable, cast, Any, Dict, TypeAlias
from numpy.typing import NDArray
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import Bunch

from .config import RANDOM_STATE, TEST_SIZE
from .experiment_utils import setup_mlflow_experiment
from .logging import (
    log_full_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)
from .shapiq_utils import log_shapiq_interactions

# Type aliases for complex types
FloatArray: TypeAlias = NDArray[np.float64]
IntArray: TypeAlias = NDArray[np.int64]
DatasetTuple: TypeAlias = Tuple[FloatArray, FloatArray, IntArray, IntArray, List[str], List[str], StandardScaler]


def load_and_prepare_iris_data(
    test_size: float = TEST_SIZE,
    random_state: int = RANDOM_STATE
) -> DatasetTuple:
    """
    Load and prepare the Iris dataset.
    
    Args:
        test_size: Fraction of data to use for testing
        random_state: Random state for reproducibility
        
    Returns:
        Tuple of (X_train_scaled, X_test_scaled, y_train, y_test, 
                 feature_names, target_names, scaler)
    """
    # Load dataset
    iris: Any = load_iris()
    X: NDArray[np.float64] = cast(NDArray[np.float64], iris.data)
    y: NDArray[np.int64] = cast(NDArray[np.int64], iris.target)
    feature_names: List[str] = list(iris.feature_names)
    target_names: List[str] = list(iris.target_names)
    
    # Split data
    X_train: NDArray[np.float64]
    X_test: NDArray[np.float64]
    y_train: NDArray[np.int64]
    y_test: NDArray[np.int64]
    X_train, X_test, y_train, y_test = cast(
        Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int64], NDArray[np.int64]],
        train_test_split(X, y, test_size=test_size, random_state=random_state)
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.fit_transform(X_train))
    X_test_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.transform(X_test))
    
    return (X_train_scaled, X_test_scaled, y_train, y_test,
            feature_names, target_names, scaler)


# === (A) LOGISTIC REGRESSION (training only, NO dashboard) ================
def train_logistic_regression(
    X_train, y_train, X_test, y_test, feature_names, target_names,
    *, run_name: str = "lr_baseline", register: bool = True
) -> str:
    """Train logistic regression model without dashboard integration."""
    setup_mlflow_experiment()
    mlflow.sklearn.autolog(log_models=True)
    
    with mlflow.start_run(run_name=run_name) as run:
        model = LogisticRegression(random_state=RANDOM_STATE, max_iter=1_000).fit(
            X_train, y_train
        )

        y_pred = model.predict(X_test)
        log_full_metrics(y_test, y_pred)
        log_confusion_matrix(y_test, y_pred, class_names=target_names)

        signature = mlflow.models.infer_signature(X_train, model.predict(X_train))
        sklearn.log_model(
            model, "model",
            registered_model_name="iris_logreg" if register else None,
            signature=signature, input_example=X_test[:5],
        )
        
        # SHAP-IQ: compute & log feature interaction values
        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        log_shapiq_interactions(model, X_test_df, feature_names, max_order=2)
        
        return run.info.run_id


def _create_rf_objective(X_train, y_train, X_test, y_test) -> Callable[[optuna.trial.Trial], float]:
    """Create Optuna objective function for Random Forest optimization."""
    def objective(trial: optuna.trial.Trial) -> float:
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 10, 200),
            "max_depth": trial.suggest_int("max_depth", 2, 20),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "random_state": RANDOM_STATE,
        }
        m = RandomForestClassifier(**params).fit(X_train, y_train)
        return float(accuracy_score(y_test, m.predict(X_test)))
    return objective


# === (B) RANDOM-FOREST + Optuna (training only) ===========================
def train_random_forest_optimized(
    X_train, y_train, X_test, y_test, feature_names, target_names,
    *, n_trials: int = 50, run_name: str = "rf_optimized", register: bool = True
) -> str:
    """Train optimized Random Forest model without dashboard integration."""
    setup_mlflow_experiment()
    mlflow.sklearn.autolog(disable=True)        # Optuna will log

    with mlflow.start_run(run_name=run_name) as run:
        study = optuna.create_study(direction="maximize")
        study.optimize(_create_rf_objective(X_train, y_train, X_test, y_test), n_trials=n_trials,
                       callbacks=[MLflowCallback(
                           tracking_uri=mlflow.get_tracking_uri(),
                           metric_name="accuracy", mlflow_kwargs={"nested": True}
                       )])

        best = RandomForestClassifier(**study.best_params).fit(X_train, y_train)
        y_pred = best.predict(X_test)
        log_full_metrics(y_test, y_pred)
        log_confusion_matrix(y_test, y_pred, class_names=target_names)
        log_feature_importance(feature_names, best.feature_importances_)
        mlflow.log_metric("best_accuracy", study.best_value)

        signature = mlflow.models.infer_signature(X_train, best.predict(X_train))
        sklearn.log_model(
            best, "model",
            registered_model_name="iris_random_forest" if register else None,
            signature=signature, input_example=X_test[:5],
        )
        
        # SHAP-IQ: compute & log feature interaction values
        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        log_shapiq_interactions(best, X_test_df, feature_names, max_order=2)
        
        return run.info.run_id


# === (C) ONE-STOP helper: train both models ===============================
def run_all_trainings(*,
    test_size: float = TEST_SIZE, random_state: int = RANDOM_STATE, n_trials: int = 50) -> None:
    """Train both logistic regression and random forest models."""
    X_tr, X_te, y_tr, y_te, feats, tgts, _ = load_and_prepare_iris_data(
        test_size, random_state
    )
    train_logistic_regression(
        X_tr, y_tr, X_te, y_te, feats, tgts, run_name="lr_baseline"
    )
    train_random_forest_optimized(
        X_tr, y_tr, X_te, y_te, feats, tgts,
        n_trials=n_trials, run_name="rf_optimized"
    )


# === (D) Robust comparator ===============================================
def compare_models(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> None:
    """
    Print the best run according to *metric_key* while gracefully
    falling-back to common alternates when the preferred key is missing.
    """
    from .experiment_utils import get_best_run

    fallback_keys = ["accuracy_score", "best_accuracy"]
    try:
        best = get_best_run(experiment_name, metric_key, maximize)
        rid = best["run_id"]

        # choose first key that exists
        score = best.get(f"metrics.{metric_key}")
        if score is None:
            for alt in fallback_keys:
                score = best.get(f"metrics.{alt}")
                if score is not None:
                    metric_key = alt
                    break

        model_type = best.get("params.model_type", "unknown")
        print(f"🏆 Best run: {rid}")
        print(f"📈 {metric_key}: {score if score is not None else 'N/A'}")
        print(f"🔖 Model type: {model_type}")
    except Exception as err:
        print(f"❌ Error comparing models: {err}")


# Legacy compatibility functions (with dashboard support)
train_logistic_regression_autolog = train_logistic_regression
train_random_forest_with_optimization = train_random_forest_optimized


if __name__ == "__main__":
    run_all_trainings()


Overwriting src/mlops/training.py


In [9]:
%%writefile src/mlops/explainer.py
from __future__ import annotations
import os
import socket
import logging
from pathlib import Path
from typing import Any, Sequence, Optional
from contextlib import closing

import mlflow
import psutil  # lightweight; already added to pyproject deps
from sklearn.utils.multiclass import type_of_target
from explainerdashboard import (
    ClassifierExplainer,
    RegressionExplainer,
    ExplainerDashboard,
)

logging.basicConfig(level=logging.INFO)

__all__ = ["build_and_log_dashboard", "load_dashboard_yaml", "dashboard_best_run", "_first_free_port", "_port_details"]


# ---------------------------------------------------------------------------
def _port_details(port: int) -> str:
    """
    Return a one-line string with PID & cmdline of the process
    listening on *port*, or '' if none / not discoverable.
    """
    for c in psutil.net_connections(kind="tcp"):
        if c.status == psutil.CONN_LISTEN and c.laddr and c.laddr.port == port:
            try:
                p = psutil.Process(c.pid)
                return f"[PID {p.pid} – {p.name()}] cmd={p.cmdline()}"
            except psutil.Error:
                return f"[PID {c.pid}] (no detail)"
    return ""

def _first_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return first free TCP port ≥ *start* on localhost."""
    for port in range(start, start + tries):
        try:
            with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
                s.settimeout(0.05)
                s.bind(("127.0.0.1", port))
                return port
        except OSError:
            # Port is in use, try next one
            continue
    raise RuntimeError("⚠️  No free ports found in range")

def _next_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return the first free TCP port ≥ *start*. (Alias for backward compatibility)"""
    return _first_free_port(start, tries)

def _port_in_use(port: int) -> bool:
    """Check if a port is already in use on any interface."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.settimeout(0.05)
        # Check both localhost and 0.0.0.0 to be thorough
        try:
            # First check localhost (127.0.0.1)
            if s.connect_ex(("127.0.0.1", port)) == 0:
                return True
            # Also check if anything is bound to all interfaces
            if s.connect_ex(("0.0.0.0", port)) == 0:
                return True
        except (socket.gaierror, OSError):
            # If we can't connect, assume port is free
            pass
        return False


# ---------------------------------------------------------------------------
# -------------------------------------------------------------- #
#  src/mlops/explainer.py (only this function changed)           #
# -------------------------------------------------------------- #
def build_and_log_dashboard(
    model: Any,
    X_test,
    y_test,
    *,
    # ---- explainer kwargs (unchanged) -------------------------
    cats: Optional[Sequence[str]] = None,
    idxs: Optional[Sequence[Any]] = None,
    descriptions: Optional[dict[str, str]] = None,
    target: Optional[str] = None,
    labels: Optional[Sequence[str]] = None,
    X_background=None,
    model_output: str = "probability",
    shap: str = "guess",
    shap_interaction: bool = True,
    simple: bool = False,
    mode: str = "dash",         # 🆕 safest default for docker
    title: str = "Model Explainer",
    # ---- infra -----------------------------------------------
    run: mlflow.ActiveRun | None = None,
    port: int | None = None,
    serve: bool = False,
    server_backend: str = "waitress",   # 🆕 waitress|gunicorn|jupyterdash
    conflict_strategy: str = "next",
    max_tries: int = 20,
    save_yaml: bool = True,
    output_dir: os.PathLike | str | None = None,
) -> Path:
    """
    Build + (optionally) serve the dashboard.

    server_backend
        'waitress'    – production WSGI server (binds 0.0.0.0)  
        'gunicorn'    – spawn via subprocess (needs gunicorn installed)  
        'jupyterdash' – fallback; use only for notebook demos
    """
    # ------------ build explainer (unchanged) ------------------
    problem = type_of_target(y_test)
    ExplainerCls = RegressionExplainer if problem.startswith("continuous") else ClassifierExplainer
    expl_kwargs = dict(
        cats=cats, idxs=idxs, descriptions=descriptions, target=target,
        labels=labels, X_background=X_background, model_output=model_output, shap=shap,
    )
    expl_kwargs = {k: v for k, v in expl_kwargs.items() if v is not None}
    explainer = ExplainerCls(model, X_test, y_test, **expl_kwargs)

    dash = ExplainerDashboard(
        explainer, title=title, shap_interaction=shap_interaction,
        simple=simple, mode=mode,
    )

    out_dir = Path(output_dir or "."); out_dir.mkdir(parents=True, exist_ok=True)
    html_path = out_dir / "explainer_dashboard.html"; dash.save_html(html_path); mlflow.log_artifact(str(html_path))
    if save_yaml:
        yaml = out_dir / "dashboard.yaml"; dash.to_yaml(yaml); mlflow.log_artifact(str(yaml))

    # ------------ serve ----------------------------------------
    if not serve:
        return html_path

    chosen = port or _first_free_port()
    attempts = 0
    while _port_in_use(chosen):
        if conflict_strategy == "raise":
            raise RuntimeError(f"Port {chosen} in use {_port_details(chosen)}")
        if conflict_strategy == "kill":
            pid = int((_port_details(chosen) or "PID 0").split()[1]); psutil.Process(pid).terminate()
            break
        attempts += 1
        if attempts >= max_tries:
            raise RuntimeError(f"No free port after {max_tries} tries")
        chosen += 1

    logging.info("🌐 Dashboard on http://0.0.0.0:%s via %s", chosen, server_backend)

    if server_backend == "waitress":
        dash.run(chosen, host="0.0.0.0", use_waitress=True, mode="dash")
    elif server_backend == "gunicorn":
        import subprocess, shlex
        cmd = f"gunicorn -w 3 -b 0.0.0.0:{chosen} dashboard:app"
        subprocess.Popen(shlex.split(cmd), cwd=str(out_dir))
    else:  # jupyterdash
        dash.run(chosen, host="0.0.0.0")

    return html_path




# ---------------------------------------------------------------------------
def load_dashboard_yaml(path: os.PathLike | str) -> ExplainerDashboard:
    """Reload a YAML config – unchanged but kept for public API."""
    return ExplainerDashboard.from_config(path) 


# ────────────────────────────────────────────────────────────────────────────
def dashboard_best_run(metric: str = "accuracy",
                       maximize: bool = True,
                       *, port: int | None = None) -> None:
    """
    Load the *best* run (by `metric`) from the active experiment and
    launch an ExplainerDashboard **once** for that model.

    Example
    -------
    >>> from mlops.explainer import dashboard_best_run
    >>> dashboard_best_run("accuracy")      # opens http://0.0.0.0:8050
    """
    from .experiment_utils import get_best_run
    from .model_registry  import load_model_from_run
    from sklearn.datasets import load_iris
    import pandas as pd

    best = get_best_run(metric_key=metric, maximize=maximize)
    run_id = best["run_id"]
    model  = load_model_from_run(run_id)

    iris = load_iris()
    X_df  = pd.DataFrame(iris.data, columns=iris.feature_names)
    build_and_log_dashboard(
        model, X_df, iris.target,
        labels=list(iris.target_names),
        run=None, serve=True, port=port or 8050
    )

   

Overwriting src/mlops/explainer.py


In [10]:
%%writefile src/mlops/utils.py
from pathlib import Path
import os
# Add near the top of utils.py
import sys
from pathlib import Path
import inspect

def add_project_root_to_sys_path(levels_up: int = 2) -> Path:
    """
    Ensure the repository root (default: two directories up) is on sys.path.

    Returns
    -------
    Path
        The absolute Path object pointing to the directory inserted.
    """
    try:
        here = Path(__file__).resolve()
    except NameError:           # running in Jupyter / IPython
        # Use the file of the *caller* if possible,
        # otherwise fall back to the current working directory.
        caller = inspect.stack()[1].filename
        here = Path(caller).resolve() if caller != "<stdin>" else Path.cwd()

    root = here.parents[levels_up]
    sys.path.insert(0, str(root))
    return root


_added_src_flag: bool = False          # module-level cache

def project_root() -> Path:
    """
    Return the absolute path to the repo root *without* relying on __file__.

    • If running from a .py file, use that file's parent/parent (…/src/..)
    • If running interactively (no __file__), fall back to CWD.
    """
    if "__file__" in globals():
        return Path(__file__).resolve().parent.parent
    return Path.cwd()

def ensure_src_on_path(verbose: bool = True) -> None:
    """
    Ensure <repo-root>/src is the *first* entry in sys.path exactly once.
    The verbose flag prints the helper line the first time only.
    """
    import sys
    global _added_src_flag
    root = project_root()
    src_path = root / "src"

    if str(src_path) not in sys.path:
        sys.path.insert(0, str(src_path))
        if verbose and not _added_src_flag:
            print(f"🔧 Added {src_path} to sys.path")
        _added_src_flag = True



Overwriting src/mlops/utils.py


In [11]:
%%writefile src/mlops/experiment_utils.py
"""MLflow experiment utilities."""
import os
import pathlib
import mlflow
import mlflow.tracking
from typing import Optional, Dict, Any
import requests

from src.mlops.config import EXPERIMENT_NAME, TRACKING_URI

import re, shutil, logging

_HEALTH_ENDPOINTS = ("/health", "/version")
_hex32 = re.compile(r"^[0-9a-f]{32}$", re.I)
logger = logging.getLogger(__name__)

def _ping_tracking_server(uri: str, timeout: float = 2.0) -> bool:
    """Return True iff an HTTP MLflow server is reachable at *uri*."""
    if not uri.startswith("http"):
        return False                        # file store – nothing to ping
    try:
        # Use new health endpoints
        for ep in _HEALTH_ENDPOINTS:
            response = requests.get(uri.rstrip("/") + ep, timeout=timeout)
            response.raise_for_status()
        return True
    except Exception:
        return False


# ─────────────────────────── src/mlops/experiment_utils.py ──────────────────


def _sanitize_mlruns_dir(root: pathlib.Path) -> None:
    """
    Remove or archive directories inside *root* that cannot possibly be valid
    MLflow experiments (file-store experiments MUST be numeric).
    """
    for p in root.iterdir():
        if p.is_dir() and _hex32.match(p.name) and not (p / "meta.yaml").exists():
            logging.warning("🧹 Removing orphan MLflow dir %s", p)
            shutil.rmtree(p, ignore_errors=True)

def _fallback_uri() -> str:
    """Local file-store *outside* the default ./mlruns to avoid collisions."""
    local = pathlib.Path.cwd() / "mlruns_local"
    local.mkdir(exist_ok=True)
    _sanitize_mlruns_dir(local)          # one-time clean-up
    return f"file:{local}"

# ---------------------------------------------------------------------
# Add this just below the imports at module top (once per module)


# ---------------------------------------------------------------------
def setup_mlflow_experiment(experiment_name: Optional[str] = None) -> None:
    """
    Resolve a reachable MLflow tracking URI and make sure the experiment exists.
    Falls back to a local file store if the remote /health or /version ping fails.
    """
    from .config import EXPERIMENT_NAME, TRACKING_URI

    exp_name = experiment_name or EXPERIMENT_NAME
    uri = TRACKING_URI

    def _ping(u: str) -> bool:
        if not u.startswith("http"):
            return False
        try:
            for ep in ("/health", "/version"):
                r = requests.get(u.rstrip("/") + ep, timeout=2)
                r.raise_for_status()
            return True
        except requests.RequestException as exc:
            logger.debug("MLflow server ping failed: %s", exc)
            return False

    if not _ping(uri):
        uri = _fallback_uri()
        logger.warning("⚠️  MLflow server unreachable – using local store %s", uri)

    mlflow.set_tracking_uri(uri)

    # guarantee the experiment exists
    if mlflow.get_experiment_by_name(exp_name) is None:
        mlflow.create_experiment(exp_name, artifact_location=f"{uri}/artifacts")

    mlflow.set_experiment(exp_name)
    logger.info("🗂  Using MLflow experiment '%s' @ %s", exp_name, uri)


def get_best_run(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> Dict[str, Any]:
    """
    Return a *shallow* dict with run_id, metrics.*, and params.* keys
    so downstream code can use predictable dotted paths.
    """
    exp_name = experiment_name or EXPERIMENT_NAME
    setup_mlflow_experiment(exp_name)

    client = mlflow.tracking.MlflowClient()
    exp = mlflow.get_experiment_by_name(exp_name)
    if exp is None:
        raise ValueError(f"Experiment '{exp_name}' not found")

    order = "DESC" if maximize else "ASC"
    run = client.search_runs(
        [exp.experiment_id],
        order_by=[f"metrics.{metric_key} {order}"],
        max_results=1,
    )[0]

    # Build a *flat* mapping -------------------------------------------------
    flat: Dict[str, Any] = {"run_id": run.info.run_id}

    # Metrics
    for k, v in run.data.metrics.items():
        flat[f"metrics.{k}"] = v

    # Params
    for k, v in run.data.params.items():
        flat[f"params.{k}"] = v

    # Tags (optional but handy)
    for k, v in run.data.tags.items():
        flat[f"tags.{k}"] = v

    return flat



Overwriting src/mlops/experiment_utils.py


In [2]:
%%writefile src/examples/shapiq_demo.py
#!/usr/bin/env python3
"""
SHAP-IQ Integration Demo

This script demonstrates the new SHAP-IQ (Shapley Interaction) functionality
integrated into the MLOps pipeline. It shows how Shapley interaction values
are computed and logged alongside regular model metrics.

Usage:
    python src/examples/shapiq_demo.py
"""

from __future__ import annotations
import logging

# ─── Path setup ─────────────────────────────────────────────────────────────
from src.mlops.utils import add_project_root_to_sys_path
PROJECT_ROOT = add_project_root_to_sys_path(levels_up=2)  # safe in both .py and interactive :contentReference[oaicite:8]{index=8}

# ─── Imports ────────────────────────────────────────────────────────────────
from src.mlops.training import (
    load_and_prepare_iris_data,
    train_logistic_regression,
    train_random_forest_optimized
)
from src.mlops.shapiq_utils import (
    compute_shapiq_interactions,
    log_shapiq_interactions,
    get_top_interactions
)
from src.mlops.experiment_utils import setup_mlflow_experiment, get_best_run
import mlflow
import pandas as pd

# ─── Logging Setup ─────────────────────────────────────────────────────────
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def demo_standalone_shapiq():
    """Demonstrate standalone SHAP-IQ computation without MLflow logging."""
    print("🔬 SHAP-IQ Standalone Demo")
    print("=" * 50)
    
    # Load data and train a simple model
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()
    
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=20, random_state=42)
    model.fit(X_train, y_train)
    
    print(f"✓ Trained RandomForest on {len(X_train)} samples")
    print(f"✓ Test accuracy: {model.score(X_test, y_test):.3f}")
    
    # Compute SHAP-IQ interactions
    X_test_df = pd.DataFrame(X_test, columns=feature_names)
    print(f"\n🧮 Computing SHAP-IQ interactions...")
    
    shapiq_df = compute_shapiq_interactions(
        model, 
        X_test_df.head(10),  # Use subset for demo
        feature_names, 
        max_order=2,
        budget=128
    )
    
    if not shapiq_df.empty:
        print(f"✓ Computed {len(shapiq_df)} interaction values")
        
        # Show top interactions
        top_interactions = get_top_interactions(shapiq_df, top_n=5)
        print(f"\n🏆 Top 5 Feature Interactions:")
        print("-" * 60)
        
        for idx, row in top_interactions.iterrows():
            feature_combo = ' × '.join(row['feature_names'])
            if not feature_combo:
                feature_combo = "baseline"
            print(f"  {feature_combo:30} | Order {row['order']} | {row['abs_mean']:.4f}")
        
        # Show order breakdown
        order_counts = shapiq_df['order'].value_counts().sort_index()
        print(f"\n📊 Interaction Order Breakdown:")
        for order, count in order_counts.items():
            if order == 0:
                print(f"  Order {order} (main effects):     {count:4d} values")
            elif order == 1:
                print(f"  Order {order} (individual):       {count:4d} values")
            elif order == 2:
                print(f"  Order {order} (pairwise):         {count:4d} values")
            else:
                print(f"  Order {order} (higher-order):     {count:4d} values")
    else:
        print("⚠️  No interactions computed (this can happen with simple models/data)")


def demo_integrated_training():
    """Demonstrate SHAP-IQ integration in the training pipeline."""
    print("\n\n🚀 SHAP-IQ Integrated Training Demo") 
    print("=" * 50)
    
    # Setup MLflow experiment
    setup_mlflow_experiment("shapiq_demo")
    
    # Load data
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()
    print(f"✓ Loaded Iris dataset: {len(X_train)} train, {len(X_test)} test samples")
    
    # Train model with SHAP-IQ integration
    print(f"\n🤖 Training Logistic Regression with SHAP-IQ...")
    lr_run_id = train_logistic_regression(
        X_train, y_train, X_test, y_test, 
        feature_names, target_names,
        run_name="lr_with_shapiq"
    )
    print(f"✓ Logistic Regression complete: {lr_run_id[:8]}")
    
    print(f"\n🌲 Training Random Forest with SHAP-IQ...")
    rf_run_id = train_random_forest_optimized(
        X_train, y_train, X_test, y_test,
        feature_names, target_names,
        n_trials=10,  # Reduced for demo
        run_name="rf_with_shapiq"
    )
    print(f"✓ Random Forest complete: {rf_run_id[:8]}")
    
    # Show logged SHAP-IQ metrics
    print(f"\n📊 SHAP-IQ Metrics from MLflow:")
    print("-" * 50)
    
    try:
        # Get the latest run (Random Forest)
        with mlflow.start_run(run_id=rf_run_id):
            run_data = mlflow.get_run(rf_run_id)
            metrics = run_data.data.metrics
            
            # Filter SHAP-IQ metrics
            shapiq_metrics = {k: v for k, v in metrics.items() if k.startswith('shapiq_')}
            
            if shapiq_metrics:
                print(f"Found {len(shapiq_metrics)} SHAP-IQ metrics:")
                for metric, value in sorted(shapiq_metrics.items()):
                    if 'order' in metric and 'count' not in metric:
                        print(f"  {metric:35} = {value:.6f}")
                    elif 'total' in metric or 'unique' in metric or 'max' in metric:
                        print(f"  {metric:35} = {int(value)}")
            else:
                print("  No SHAP-IQ metrics found (may take longer to compute)")
                
    except Exception as e:
        print(f"  Error retrieving metrics: {e}")
    
    # Compare models
    print(f"\n🏆 Comparing Models:")
    print("-" * 30)
    try:
        best_run = get_best_run("accuracy", maximize=True)
        run_id = best_run["run_id"]
        accuracy = best_run.get("metrics.accuracy", "N/A")
        print(f"Best model: {run_id[:8]} (accuracy: {accuracy})")
        
        # Check if SHAP-IQ metrics are available for best model
        shapiq_count = best_run.get("metrics.shapiq_total_interactions")
        if shapiq_count:
            print(f"SHAP-IQ interactions: {int(shapiq_count)} computed")
        
    except Exception as e:
        print(f"Error comparing models: {e}")


def demo_manual_shapiq_logging():
    """Demonstrate manual SHAP-IQ logging outside of training."""
    print(f"\n\n🔧 Manual SHAP-IQ Logging Demo")
    print("=" * 50)
    
    # Load data and train model
    X_train, X_test, y_train, y_test, feature_names, target_names, _ = load_and_prepare_iris_data()
    
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    
    # Manual MLflow run with SHAP-IQ logging
    setup_mlflow_experiment("shapiq_demo") 
    
    with mlflow.start_run(run_name="manual_shapiq_demo"):
        # Log basic metrics
        accuracy = model.score(X_test, y_test)
        mlflow.log_metric("accuracy", accuracy)
        
        # Log SHAP-IQ interactions
        X_test_df = pd.DataFrame(X_test, columns=feature_names)
        print("Computing and logging SHAP-IQ interactions...")
        
        log_shapiq_interactions(
            model, 
            X_test_df,
            feature_names,
            max_order=2,
            top_n=5,
            budget=64,
            n_samples=15  # Sample for faster computation
        )
        
        current_run = mlflow.active_run()
        print(f"✓ SHAP-IQ logged to run: {current_run.info.run_id[:8]}")


def main():
    """Run all SHAP-IQ demos."""
    print("🌟 SHAP-IQ Integration Demonstration")
    print("=" * 60)
    print("This demo shows how Shapley interactions are computed and logged")
    print("in the MLOps pipeline to understand feature interactions.")
    print()
    
    try:
        # Demo 1: Standalone computation
        demo_standalone_shapiq()
        
        # Demo 2: Integrated training
        demo_integrated_training()
        
        # Demo 3: Manual logging
        demo_manual_shapiq_logging()
        
        print(f"\n\n🎉 SHAP-IQ Demo Complete!")
        print("=" * 60)
        print("✓ Standalone SHAP-IQ computation")
        print("✓ Integrated training with automatic SHAP-IQ logging") 
        print("✓ Manual SHAP-IQ logging")
        print()
        print("🔍 Check MLflow UI to see logged SHAP-IQ metrics and artifacts:")
        print("   - Metrics: shapiq_order1_*, shapiq_order2_*, etc.")
        print("   - Artifacts: shapiq_interactions.csv, shapiq_interactions_summary.csv")
        
    except Exception as e:
        logger.error(f"Demo failed: {e}")
        print(f"\n❌ Demo failed: {e}")
        print("This might be due to SHAP-IQ dependency issues or data problems.")


if __name__ == "__main__":
    main() 

Overwriting src/examples/shapiq_demo.py


In [4]:
# %%writefile src/examples/select_best_and_dashboard.py
#!/usr/bin/env python3
"""
Select best model and launch dashboard (training is done elsewhere).

Usage:
    python src/examples/select_best_and_dashboard.py
"""

from __future__ import annotations

from src.mlops.utils import add_project_root_to_sys_path
PROJECT_ROOT = add_project_root_to_sys_path()

from src.mlops.experiment_utils import get_best_run
from src.mlops.model_registry import load_model_from_run
from src.mlops.explainer import dashboard_best_run

# Configuration variables
METRIC = "accuracy"  # Metric to optimize (e.g., 'accuracy', 'f1')
PORT = 8050           # Port for the dashboard
MAXIMIZE = True       # Whether to maximize (True) or minimize (False) the metric

def main() -> None:
    print(f"🔍 Searching MLflow runs by {METRIC}…")

    # Retrieve the best run based on the specified metric
    best = get_best_run(metric_key=METRIC, maximize=MAXIMIZE)
    run_id = best["run_id"]
    score = best.get(f"metrics.{METRIC}", "N/A")

    print(f"🏆 Best run: {run_id[:8]} — {METRIC}: {score}")

    # Load the model from the run registry
    model = load_model_from_run(run_id)
    if model is None:
        raise RuntimeError("Model could not be loaded from registry")

    print("✓ Model loaded – launching dashboard")
    # Launch the explainer dashboard for the best model
    dashboard_best_run(METRIC, maximize=MAXIMIZE, port=PORT)

if __name__ == "__main__":
    main()



🔍 Searching MLflow runs by accuracy…


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns
🏆 Best run: 94f66ced — accuracy: 1.0


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Loaded model from run 94f66ceda2834bf090861cfadb7a2012
✓ Model loaded – launching dashboard


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Loaded model from run 94f66ceda2834bf090861cfadb7a2012
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
Generating layout...
Calculating shap values...
Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating shap interaction values... (this may take a while)
Reminder: TreeShap computational complexity is O(TLD^2), where T is the number of trees, L is the maximum number of leaves in any tree and D 


invalid value encountered in scalar divide


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names



Calculating predictions...
Calculating ShadowDecTree for each individual decision tree...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...



X has feature names, but RandomForestClassifier was fitted without feature names


invalid value encountered in scalar divide


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names



Dumping configuration .yaml to /workspace/dashboard.yaml...


INFO:root:🌐 Dashboard on http://0.0.0.0:8050 via waitress


Starting ExplainerDashboard on http://172.18.0.2:8050


INFO:waitress:Serving on http://0.0.0.0:8050

X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClas

In [3]:
# %%writefile src/scripts/run_training.py
#!/usr/bin/env python3
"""
Simple training runner script.

Run with:
    python src/scripts/run_training.py
    # or inside Jupyter:
    %run src/scripts/run_training.py
"""

from src.mlops.utils import add_project_root_to_sys_path

# Ensure src/ is importable in both script and notebook contexts
PROJECT_ROOT = add_project_root_to_sys_path()

from src.mlops.training import run_all_trainings


def main() -> None:
    print("🚀 Running all training pipelines from", PROJECT_ROOT)
    run_all_trainings(n_trials=20)
    print("✅ Training complete!")


if __name__ == "__main__":
    main()


🚀 Running all training pipelines from /workspace


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


Registered model 'iris_logreg' already exists. Creating a new version of this model...
Created version '2' of model 'iris_logreg'.
Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


[I 2025-06-26 00:19:57,309] A new study created in memory with name: no-name-392926de-1feb-42f2-8d3b-ccb7cab304f6
[I 2025-06-26 00:19:57,344] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 44, 'max_depth': 3, 'min_samples_split': 8, 'min_samples_leaf': 5}. Best is trial 0 with value: 1.0.
Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line

✅ Training complete!


In [3]:
# %%writefile src/examples/iris_classification_example.py
#!/usr/bin/env python3
"""
Iris Classification Example (argparse-free, notebook-safe).

Configuration:
    • export EXPLAINER_DASHBOARD=1   # launch dashboard
    • export EXPLAINER_PORT=8150     # optional port override
"""

from __future__ import annotations
import os
import logging

from src.mlops.utils import ensure_src_on_path
ensure_src_on_path()

from src.mlops.training import (
    load_and_prepare_iris_data,
    train_logistic_regression,
    train_random_forest_optimized,
    compare_models,
)
from src.mlops.model_registry import load_model_from_run
from src.mlops.experiment_utils import get_best_run

logging.basicConfig(level=logging.INFO)


def _bool_env(var: str, default: bool = False) -> bool:
    v = os.getenv(var)
    return default if v is None else v.lower() in {"1", "true", "yes"}


def main(*, dashboard: bool = False, dashboard_port: int | None = None) -> None:
    print("🌸 Iris Classification with MLflow\n" + "=" * 50)

    # 1 Load data ------------------------------------------------------------
    X_train, X_test, y_train, y_test, feat_names, tgt_names, _ = (
        load_and_prepare_iris_data()
    )
    print(f"✓ Training samples: {len(X_train)} | Test: {len(X_test)}")

    # 2 Logistic Regression --------------------------------------------------
    lr_run = train_logistic_regression(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        run_name="lr_baseline",
        register=True,
    )
    print(f"✓ Logistic run {lr_run[:8]}")

    # 3 Random Forest + Optuna ----------------------------------------------
    rf_run = train_random_forest_optimized(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        n_trials=20,
        run_name="rf_optimized",
        register=True,
    )
    print(f"✓ RF run {rf_run[:8]}")

    # 4 Compare & test best --------------------------------------------------
    compare_models()
    best = get_best_run()
    mdl = load_model_from_run(best["run_id"])
    if mdl is not None:
        acc = (mdl.predict(X_test) == y_test).mean()
        print(f"🏆 Best model accuracy: {acc:.4f}")
    else:
        print("❌ Could not load best model")

    if dashboard:
        port = dashboard_port or int(os.getenv("EXPLAINER_PORT", "8050"))
        print(f"\n🚀 ExplainerDashboard running on http://localhost:{port}")
        # Import and run dashboard for best model
        from src.mlops.explainer import dashboard_best_run
        dashboard_best_run("accuracy", port=port)


if __name__ == "__main__":
    main(
        dashboard=_bool_env("EXPLAINER_DASHBOARD", False),
        dashboard_port=int(os.getenv("EXPLAINER_PORT", "8050")),
    ) 

🔧 Added /workspace/src/src to sys.path
🌸 Iris Classification with MLflow
✓ Training samples: 120 | Test: 30


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


Successfully registered model 'iris_logreg'.
Created version '1' of model 'iris_logreg'.


✓ Logistic run 947454c6


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


[I 2025-06-26 00:06:49,084] A new study created in memory with name: no-name-7a2cffd4-ca19-4882-ab8a-aaa9e68469cc
  callbacks=[MLflowCallback(
[I 2025-06-26 00:06:49,184] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 172, 'max_depth': 6, 'min_samples_split': 8, 'min_samples_leaf': 2}. Best is trial 0 with value: 1.0.
Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store

✓ RF run b72eb478


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns
🏆 Best run: b72eb478af964ec08c534478bbf7e404
📈 accuracy: 1.0
🔖 Model type: unknown


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

⚠️  MLflow server unreachable at http://mlflow:5000 – falling back to local store file:/workspace/mlruns


Traceback (most recent call last):
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 356, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 454, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1595, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/store/tracking/file_store.py", line 1588, in _read_helper
    result = read_yaml(root, file_name)
  File "/workspace/.venv/lib/python3.10/site-packages/mlflow/utils/yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' does not exist.")
mlflow.exceptions.MissingConfigException: Yaml file '/workspace

🗂  Using MLflow experiment 'iris_classification' @ file:/workspace/mlruns


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Loaded model from run b72eb478af964ec08c534478bbf7e404
🏆 Best model accuracy: 1.0000


In [3]:
%%writefile src/mlops/shapiq_utils.py
"""
SHAP-IQ (Shapley Interaction) utilities for MLflow integration.

This module provides functions to compute and log Shapley interaction values
for machine learning models. Shapley interactions help understand how features
work together to influence model predictions.
"""

from __future__ import annotations
import os
import pandas as pd
import numpy as np
import mlflow
from shapiq import TabularExplainer
from typing import Optional, Sequence, Union
import logging

logger = logging.getLogger(__name__)


def compute_shapiq_interactions(
    model,
    X: pd.DataFrame,
    feature_names: Sequence[str],
    max_order: int = 2,
    budget: int = 256,
    n_samples: Optional[int] = None,
) -> pd.DataFrame:
    """
    Robust wrapper around shapiq.TabularExplainer to return a tidy DataFrame
    with Shapley-interaction values.  Handles the two public APIs:
      •  .dict_values   (mapping)
      •  .values        (np.ndarray)  →  use  .to_dict()
    """
    logger.info(
        "Computing SHAP-IQ (max_order=%s, budget=%s, n_samples=%s)",
        max_order,
        budget,
        n_samples,
    )

    X_sample = (
        X.sample(n=n_samples, random_state=42) if n_samples and len(X) > n_samples else X
    )

    explainer = TabularExplainer(
        model=model,
        data=X_sample.values,
        index="k-SII",
        max_order=max_order,
    )

    rows: list[dict[str, Any]] = []
    for i, vec in enumerate(X_sample.values):
        try:
            iv = explainer.explain(vec, budget=budget)

            # --- unify both APIs ------------------------------------------------
            if hasattr(iv, "dict_values"):                    # shapiq ≥ 0.4
                items = iv.dict_values.items()
            elif hasattr(iv, "to_dict"):                      # fallback
                items = iv.to_dict().items()
            else:
                # last resort – try attribute access
                items = dict(iv.values).items()

            for combo, val in items:
                rows.append(
                    {
                        "sample_idx": i,
                        "combination": combo,
                        "value": float(val),
                        "order": len(combo),
                        "feature_names": tuple(feature_names[j] for j in combo)
                        if combo
                        else (),
                    }
                )
        except Exception as exc:  # noqa: BLE001
            logger.warning("SHAP-IQ failed on sample %s: %s", i, exc)

    df = pd.DataFrame(rows)
    logger.info("✓ %s interaction rows computed", len(df))
    return df



def log_shapiq_interactions(
    model,
    X: pd.DataFrame,
    feature_names: Sequence[str],
    max_order: int = 2,
    top_n: int = 10,
    budget: int = 256,
    n_samples: Optional[int] = None,
    output_path: Optional[str] = None
) -> None:
    """
    Compute Shapley interaction values and log them to MLflow.

    This function:
    1. Computes interactions using compute_shapiq_interactions
    2. Logs the top N interactions as MLflow metrics
    3. Saves the full interaction table as CSV and logs as artifact

    Args:
        model: Trained sklearn-like model.
        X: DataFrame of features.
        feature_names: List of feature column names.
        max_order: Maximum interaction order (default: 2).
        top_n: Number of top interactions to log as metrics (default: 10).
        budget: Evaluation budget for interaction approximation (default: 256).
        n_samples: If provided, sample this many rows for computation.
        output_path: Optional path for CSV output (default: "shapiq_interactions.csv").
    """
    logger.info("Starting SHAP-IQ interaction logging")
    
    # Compute interactions
    df = compute_shapiq_interactions(
        model, X, feature_names, max_order, budget, n_samples
    )
    
    if df.empty:
        logger.warning("No interactions computed - skipping logging")
        return
    
    # Aggregate: mean absolute value per combination across all samples
    agg = (
        df.groupby(['combination', 'feature_names', 'order'])['value']
          .apply(lambda x: x.abs().mean())
          .reset_index()
          .sort_values('value', ascending=False)
    )
    
    # Log summary statistics
    mlflow.log_metric("shapiq_total_interactions", len(df))
    mlflow.log_metric("shapiq_unique_combinations", len(agg))
    mlflow.log_metric("shapiq_max_order", max_order)
    mlflow.log_metric("shapiq_samples_analyzed", len(X) if n_samples is None else min(n_samples, len(X)))
    
    # Log top N interactions as metrics
    logger.info(f"Logging top {top_n} interactions as MLflow metrics")
    for idx, row in agg.head(top_n).iterrows():
        combo = row['combination']
        feature_combo = row['feature_names'] 
        value = row['value']
        order = row['order']
        
        # Create metric name from feature names or indices
        if feature_combo:
            name = f"shapiq_order{order}_{'_x_'.join(feature_combo)}"
        else:
            name = f"shapiq_order{order}_{'_'.join(map(str, combo))}"
        
        # Sanitize metric name (MLflow has restrictions)
        name = name.replace(' ', '_').replace('(', '').replace(')', '').replace(',', '_')[:250]
        
        mlflow.log_metric(name, float(value))
    
    # Log order-specific summaries
    order_summary = df.groupby('order')['value'].agg(['count', 'mean', 'std']).fillna(0)
    for order_val in order_summary.index:
        mlflow.log_metric(f"shapiq_order{order_val}_count", order_summary.loc[order_val, 'count'])
        mlflow.log_metric(f"shapiq_order{order_val}_mean_abs", abs(order_summary.loc[order_val, 'mean']))
        if order_summary.loc[order_val, 'std'] > 0:
            mlflow.log_metric(f"shapiq_order{order_val}_std", order_summary.loc[order_val, 'std'])
    
    # Save and log full DataFrame as artifact
    output_file = output_path or "shapiq_interactions.csv"
    
    try:
        # Add readable feature names to the full DataFrame
        df_export = df.copy()
        df_export['feature_names_str'] = df_export['feature_names'].apply(lambda x: ' x '.join(x) if x else 'baseline')
        
        df_export.to_csv(output_file, index=False)
        mlflow.log_artifact(output_file)
        logger.info(f"Logged SHAP-IQ interactions artifact: {output_file}")
        
        # Also create and log a summary file
        summary_file = output_path.replace('.csv', '_summary.csv') if output_path else "shapiq_interactions_summary.csv"
        agg_export = agg.copy()
        agg_export['feature_names_str'] = agg_export['feature_names'].apply(lambda x: ' x '.join(x) if x else 'baseline')
        agg_export.to_csv(summary_file, index=False)
        mlflow.log_artifact(summary_file)
        logger.info(f"Logged SHAP-IQ summary artifact: {summary_file}")
        
    except Exception as e:
        logger.error(f"Error saving SHAP-IQ artifacts: {e}")
    
    logger.info("SHAP-IQ interaction logging completed")


def get_top_interactions(
    shapiq_df: pd.DataFrame,
    top_n: int = 10,
    order: Optional[int] = None
) -> pd.DataFrame:
    """
    Extract top interactions from a SHAP-IQ DataFrame.
    
    Args:
        shapiq_df: DataFrame returned by compute_shapiq_interactions.
        top_n: Number of top interactions to return.
        order: If provided, filter to interactions of this order only.
    
    Returns:
        DataFrame with top interactions, aggregated across samples.
    """
    df = shapiq_df.copy()
    
    if order is not None:
        df = df[df['order'] == order]
    
    if df.empty:
        return df
    
    # Aggregate and sort by absolute mean value
    agg = (
        df.groupby(['combination', 'feature_names', 'order'])['value']
          .agg(['mean', 'std', 'count'])
          .reset_index()
    )
    agg['abs_mean'] = agg['mean'].abs()
    agg = agg.sort_values('abs_mean', ascending=False)
    
    return agg.head(top_n)


Overwriting src/mlops/shapiq_utils.py


In [12]:
%%writefile tests/test_mlflow_integration.py

"""Tests for MLflow integration modules."""
import sys
import os

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))

from mlops.experiment import setup_mlflow_experiment
from mlops.training import (
    load_and_prepare_iris_data, 
    train_logistic_regression
)
from mlops.model_registry import load_model_from_run


def test_data_loading():
    """Test that data loading works correctly."""
    data = load_and_prepare_iris_data()
    X_train, X_test, y_train, y_test, feature_names, target_names, scaler = data
    
    assert len(X_train) > 0
    assert len(X_test) > 0
    assert len(feature_names) == 4
    assert len(target_names) == 3
    assert X_train.shape[1] == 4  # 4 features


def test_experiment_setup():
    """Test that MLflow experiment setup works."""
    # This should not raise an exception
    setup_mlflow_experiment("test_experiment")
    

def test_model_training_and_loading():
    """Test end-to-end model training and loading."""
    # Load data
    data = load_and_prepare_iris_data()
    X_train, X_test, y_train, y_test, feature_names, target_names, scaler = data
    
    # Train a simple model
    run_id = train_logistic_regression(
        X_train, y_train, X_test, y_test,
        feature_names, target_names,
        run_name="test_lr",
        register=False  # Don't register for tests
    )
    
    assert run_id is not None
    assert len(run_id) > 0
    
    # Load the model back
    model = load_model_from_run(run_id, "model")
    
    # Test prediction
    predictions = model.predict(X_test)
    assert len(predictions) == len(y_test)
    
    # Check accuracy is reasonable (should be > 0.8 for iris)
    accuracy = (predictions == y_test).mean()
    assert accuracy > 0.8


if __name__ == "__main__":
    # Run tests
    test_data_loading()
    print("✓ Data loading test passed")
    
    test_experiment_setup()
    print("✓ Experiment setup test passed")
    
    test_model_training_and_loading()
    print("✓ Model training and loading test passed")
    
    print("\nAll tests passed! 🎉") 

Overwriting tests/test_mlflow_integration.py


In [13]:
%%writefile tests/test_explainer.py
import sys
import os
import pytest
from pathlib import Path

# Add src to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

def test_yaml_roundtrip(tmp_path):
    """Test that a dashboard can be saved to YAML and reloaded."""
    from src.mlops.explainer import build_and_log_dashboard, load_dashboard_yaml
    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    import mlflow
    import pandas as pd

    iris = load_iris()
    X, y = iris.data, iris.target
    X_df = pd.DataFrame(X, columns=iris.feature_names)
    model = LogisticRegression(max_iter=1000).fit(X, y)
    with mlflow.start_run():
        yaml_path = build_and_log_dashboard(
            model, X_df, y,
            serve=False,
            save_yaml=True,
            output_dir=tmp_path
        )
        # Reload
        dash = load_dashboard_yaml(yaml_path)
        assert dash.explainer.model.__class__.__name__ == "LogisticRegression"


def test_build_dashboard(tmp_path):
    """Test that a dashboard can be built and saved."""
    from src.mlops.explainer import build_and_log_dashboard
    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    import mlflow
    import pandas as pd

    iris = load_iris()
    X, y = iris.data, iris.target
    X_df = pd.DataFrame(X, columns=iris.feature_names)
    model = LogisticRegression(max_iter=1000).fit(X, y)
    with mlflow.start_run():
        html = build_and_log_dashboard(
            model, X_df, y,
            serve=False,
            save_yaml=False,
            output_dir=tmp_path
        )
        assert html.exists() and html.suffix == ".html" 


Overwriting tests/test_explainer.py


In [None]:
%%writefile tests/test_shapiq_utils.py
"""
Tests for SHAP-IQ utilities module.
"""

import sys
import os
import pytest
import pandas as pd
import numpy as np
import tempfile
from unittest.mock import patch, MagicMock

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification

from mlops.shapiq_utils import (
    compute_shapiq_interactions,
    log_shapiq_interactions,
    get_top_interactions
)


@pytest.fixture
def sample_data():
    """Create sample classification data for testing."""
    X, y = make_classification(
        n_samples=50,
        n_features=4,
        n_informative=3,
        n_redundant=1,
        random_state=42
    )
    feature_names = [f"feature_{i}" for i in range(4)]
    X_df = pd.DataFrame(X, columns=feature_names)
    return X_df, y, feature_names


@pytest.fixture
def trained_model(sample_data):
    """Create a trained model for testing."""
    X_df, y, _ = sample_data
    model = RandomForestClassifier(n_estimators=10, random_state=42)
    model.fit(X_df, y)
    return model


def test_compute_shapiq_interactions_basic(sample_data, trained_model):
    """Test basic functionality of compute_shapiq_interactions."""
    X_df, _, feature_names = sample_data
    
    # Test with small sample to speed up test
    result_df = compute_shapiq_interactions(
        trained_model, 
        X_df.head(5),  # Use only 5 samples for testing
        feature_names, 
        max_order=2, 
        budget=64  # Small budget for fast testing
    )
    
    # Check structure
    expected_columns = ['sample_idx', 'combination', 'value', 'order', 'feature_names']
    assert all(col in result_df.columns for col in expected_columns)
    
    # Check data types
    assert result_df['sample_idx'].dtype in [np.int64, int]
    assert result_df['value'].dtype in [np.float64, float]
    assert result_df['order'].dtype in [np.int64, int]
    
    # Check that we have interactions of different orders
    if not result_df.empty:
        orders = result_df['order'].unique()
        assert len(orders) > 0
        assert all(order <= 2 for order in orders)  # max_order=2


def test_compute_shapiq_interactions_with_sampling(sample_data, trained_model):
    """Test compute_shapiq_interactions with n_samples parameter."""
    X_df, _, feature_names = sample_data
    
    result_df = compute_shapiq_interactions(
        trained_model, 
        X_df, 
        feature_names, 
        max_order=1,  # Simple interactions only
        budget=32,
        n_samples=3   # Sample only 3 rows
    )
    
    if not result_df.empty:
        # Should have at most 3 different sample indices
        unique_samples = result_df['sample_idx'].nunique()
        assert unique_samples <= 3


def test_compute_shapiq_interactions_empty_result():
    """Test handling of edge cases that might result in empty results."""
    # Create trivial data that might not generate interactions
    X = pd.DataFrame([[1, 1], [1, 1]], columns=['a', 'b'])
    y = [0, 0]
    
    model = LogisticRegression()
    model.fit(X, y)
    
    result_df = compute_shapiq_interactions(
        model, X, ['a', 'b'], max_order=1, budget=16
    )
    
    # Should return a DataFrame with correct structure even if empty
    expected_columns = ['sample_idx', 'combination', 'value', 'order', 'feature_names']
    assert all(col in result_df.columns for col in expected_columns)


@patch('mlflow.log_metric')
@patch('mlflow.log_artifact')
def test_log_shapiq_interactions(mock_log_artifact, mock_log_metric, sample_data, trained_model):
    """Test log_shapiq_interactions with mocked MLflow calls."""
    X_df, _, feature_names = sample_data
    
    with tempfile.TemporaryDirectory() as tmpdir:
        output_path = os.path.join(tmpdir, "test_interactions.csv")
        
        # Call the function
        log_shapiq_interactions(
            trained_model,
            X_df.head(5),  # Small sample for testing
            feature_names,
            max_order=2,
            top_n=3,
            budget=32,
            output_path=output_path
        )
        
        # Check that MLflow functions were called
        assert mock_log_metric.called
        assert mock_log_artifact.called
        
        # Check some expected metric calls
        metric_calls = [call[0][0] for call in mock_log_metric.call_args_list]
        expected_metrics = [
            "shapiq_total_interactions",
            "shapiq_unique_combinations", 
            "shapiq_max_order",
            "shapiq_samples_analyzed"
        ]
        
        for expected in expected_metrics:
            assert any(expected in call for call in metric_calls), f"Expected metric {expected} not found"


def test_get_top_interactions(sample_data, trained_model):
    """Test get_top_interactions utility function."""
    X_df, _, feature_names = sample_data
    
    # First compute interactions
    shapiq_df = compute_shapiq_interactions(
        trained_model, 
        X_df.head(10), 
        feature_names, 
        max_order=2, 
        budget=64
    )
    
    if not shapiq_df.empty:
        # Test getting top interactions
        top_interactions = get_top_interactions(shapiq_df, top_n=5)
        assert len(top_interactions) <= 5
        
        # Check structure
        expected_columns = ['combination', 'feature_names', 'order', 'mean', 'std', 'count', 'abs_mean']
        assert all(col in top_interactions.columns for col in expected_columns)
        
        # Test filtering by order
        if len(shapiq_df['order'].unique()) > 1:
            order_filtered = get_top_interactions(shapiq_df, top_n=3, order=1)
            if not order_filtered.empty:
                assert all(order_filtered['order'] == 1)


def test_compute_shapiq_interactions_error_handling():
    """Test error handling in compute_shapiq_interactions."""
    # Create data that might cause issues
    X = pd.DataFrame([[np.nan, 1], [2, np.nan]], columns=['a', 'b'])
    y = [0, 1]
    
    model = LogisticRegression()
    
    # This should handle errors gracefully and return empty DataFrame
    try:
        model.fit([[1, 1], [2, 2]], [0, 1])  # Fit with clean data
        result_df = compute_shapiq_interactions(model, X, ['a', 'b'], max_order=1, budget=16)
        
        # Should return DataFrame with expected structure even on error
        expected_columns = ['sample_idx', 'combination', 'value', 'order', 'feature_names']
        assert all(col in result_df.columns for col in expected_columns)
        
    except Exception:
        # If an exception occurs, that's also acceptable for this edge case
        pass


@patch('mlflow.log_metric')
@patch('mlflow.log_artifact')
def test_log_shapiq_interactions_empty_result(mock_log_artifact, mock_log_metric):
    """Test log_shapiq_interactions when no interactions are computed."""
    # Mock compute_shapiq_interactions to return empty DataFrame
    with patch('mlops.shapiq_utils.compute_shapiq_interactions') as mock_compute:
        mock_compute.return_value = pd.DataFrame(columns=['sample_idx', 'combination', 'value', 'order', 'feature_names'])
        
        # This should handle empty results gracefully
        log_shapiq_interactions(
            MagicMock(),  # Mock model
            pd.DataFrame([[1, 2]], columns=['a', 'b']),
            ['a', 'b'],
            max_order=1
        )
        
        # Should not log metrics or artifacts for empty results
        assert not mock_log_metric.called
        assert not mock_log_artifact.called


if __name__ == "__main__":
    pytest.main([__file__]) 