In [1]:
%%writefile src/mlops/config.py
"""Central MLflow configuration for consistent experiment tracking."""
import os

# ─── MLflow configuration ──────────────────────────────────────────────────
# Use Docker service-name so this works inside the compose network
# Falls back to local file store for standalone usage
TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000")
EXPERIMENT_NAME = "iris_classification"
ARTIFACT_ROOT = os.getenv("MLFLOW_ARTIFACT_ROOT", "./mlruns")

# ─── Model registry ────────────────────────────────────────────────────────
MODEL_NAME = "iris_classifier"
MODEL_STAGE_PRODUCTION = "Production"
MODEL_STAGE_STAGING = "Staging"

# ─── Dataset defaults ──────────────────────────────────────────────────────
RANDOM_STATE = 42
TEST_SIZE = 0.2 



Overwriting src/mlops/config.py


In [2]:
%%writefile src/mlops/logging.py
"""MLflow logging utilities for metrics, parameters, and artifacts."""
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Any, Optional
from matplotlib.figure import Figure
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)


def log_model_metrics(y_true, y_pred, prefix: str = "") -> Dict[str, float]:
    """
    Calculate and log model evaluation metrics.
    
    Args:
        y_true: True labels
        y_pred: Predicted labels
        prefix: Optional prefix for metric names
        
    Returns:
        Dictionary of calculated metrics
    """
    metrics = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision_macro": precision_score(y_true, y_pred, average='macro'),
        "recall_macro": recall_score(y_true, y_pred, average='macro'),
        "f1_macro": f1_score(y_true, y_pred, average='macro')
    }
    
    # Add prefix if provided
    if prefix:
        metrics = {f"{prefix}_{k}": v for k, v in metrics.items()}
    
    # Log metrics to MLflow
    mlflow.log_metrics(metrics)
    
    return metrics


def _log_fig(fig: Figure, artifact_name: str) -> None:
    """Log a Matplotlib figure directly without temp files."""
    mlflow.log_figure(fig, artifact_file=artifact_name)
    plt.close(fig)


def log_confusion_matrix(y_true, y_pred, *, class_names=None,
                         artifact_name: str = "confusion_matrix.png"):
    """Create + log confusion matrix using mlflow.log_figure."""
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=class_names, yticklabels=class_names, ax=ax)
    ax.set(xlabel="Predicted", ylabel="Actual", title="Confusion Matrix")
    _log_fig(fig, artifact_name)


def log_feature_importance(feature_names: list, importances: list,
                           artifact_name: str = "feature_importance.png"):
    """Bar plot logged via mlflow.log_figure (no disk I/O)."""
    imp_df = (pd.DataFrame({"feature": feature_names,
                            "importance": importances})
              .sort_values("importance"))
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.barplot(data=imp_df, x="importance", y="feature", ax=ax)
    ax.set_title("Feature Importances")
    _log_fig(fig, artifact_name)


def log_parameters(params: Dict[str, Any]) -> None:
    """
    Log parameters to MLflow.
    
    Args:
        params: Dictionary of parameter names and values
    """
    mlflow.log_params(params)


def log_dataset_info(X_train, X_test, y_train, y_test) -> None:
    """
    Log dataset information as parameters.
    
    Args:
        X_train: Training features
        X_test: Test features
        y_train: Training labels
        y_test: Test labels
    """
    dataset_params = {
        "train_size": len(X_train),
        "test_size": len(X_test),
        "n_features": (X_train.shape[1] if hasattr(X_train, 'shape') 
                       else len(X_train[0])),
        "n_classes": (len(set(y_train)) if hasattr(y_train, '__iter__') 
                      else 1)
    }
    
    log_parameters(dataset_params) 


Overwriting src/mlops/logging.py


In [3]:
%%writefile src/mlops/experiment.py

"""Training utilities with MLflow integration."""
import mlflow
import optuna
from typing import Optional
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from .config import RANDOM_STATE, TEST_SIZE
from .experiment_utils import setup_mlflow_experiment

# Re-export for convenience
__all__ = ['setup_mlflow_experiment', 'load_and_prepare_iris_data',
           'train_logistic_regression', 'train_random_forest_with_optimization']
from .logging import (
    log_model_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)


def load_and_prepare_iris_data(test_size: float = TEST_SIZE,
                               random_state: int = RANDOM_STATE):
    """
    Load and prepare the Iris dataset.
    
    Args:
        test_size: Fraction of data to use for testing
        random_state: Random state for reproducibility
        
    Returns:
        Tuple of (X_train_scaled, X_test_scaled, y_train, y_test, 
                 feature_names, target_names, scaler)
    """
    # Load dataset
    iris = load_iris()
    X, y = iris.data, iris.target
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return (X_train_scaled, X_test_scaled, y_train, y_test,
            iris.feature_names, iris.target_names, scaler)



Overwriting src/mlops/experiment.py


In [4]:
%%writefile src/mlops/model_registry.py
"""MLflow model registry utilities."""
import mlflow
from typing import Optional, Dict, Any
from .config import MODEL_NAME, MODEL_STAGE_PRODUCTION


def register_model(model_uri: str, 
                   model_name: Optional[str] = None,
                   description: Optional[str] = None) -> str:
    """
    Register a model in the MLflow model registry using the fluent client API.
    
    Args:
        model_uri: URI of the model to register
        model_name: Name for the registered model
        description: Optional description
        
    Returns:
        Model version
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        # Create registered model if it doesn't exist
        if not client.get_registered_model(name, silent=True):
            client.create_registered_model(name)
            print(f"Created new registered model: {name}")
            
        # Create new version
        mv = client.create_model_version(
            name=name,
            source=model_uri,
            description=description
        )
        print(f"Created version {mv.version} of model {name}")
        return mv.version
        
    except Exception as e:
        print(f"Failed to register model: {e}")
        raise


def promote_model_to_stage(model_name: Optional[str] = None,
                           version: Optional[str] = None,
                           stage: str = MODEL_STAGE_PRODUCTION) -> None:
    """
    Promote a model version to a specific stage using the fluent client.
    
    Args:
        model_name: Name of the registered model
        version: Version to promote (if None, promotes latest)
        stage: Target stage
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        # Get latest version if not specified
        if version is None:
            latest = client.get_latest_versions(name, stages=["None"])
            if not latest:
                raise ValueError(f"No versions found for model {name}")
            version = latest[0].version
        
        # Transition to stage
        client.transition_model_version_stage(
            name=name,
            version=version,
            stage=stage
        )
        print(f"Promoted model {name} version {version} to {stage}")
        
    except Exception as e:
        print(f"Failed to promote model: {e}")
        raise


def load_model_from_registry(model_name: Optional[str] = None,
                             stage: str = MODEL_STAGE_PRODUCTION):
    """
    Load a model from the registry by name and stage.
    
    Args:
        model_name: Name of the registered model
        stage: Stage to load from
        
    Returns:
        Loaded model
    """
    name = model_name or MODEL_NAME
    model_uri = f"models:/{name}/{stage}"
    
    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model {name} from {stage} stage")
        return model
    except Exception as e:
        print(f"Failed to load model from registry: {e}")
        raise


def load_model_from_run(run_id: str, artifact_path: str = "model"):
    """
    Load a model from a specific run.
    
    Args:
        run_id: MLflow run ID
        artifact_path: Path to the model artifact
        
    Returns:
        Loaded model
    """
    model_uri = f"runs:/{run_id}/{artifact_path}"
    
    try:
        model = mlflow.sklearn.load_model(model_uri)
        print(f"Loaded model from run {run_id}")
        return model
    except Exception as e:
        print(f"Failed to load model from run: {e}")
        raise


def get_model_info(model_name: Optional[str] = None,
                   stage: str = MODEL_STAGE_PRODUCTION) -> Dict[str, Any]:
    """
    Get information about a registered model using the fluent client.
    
    Args:
        model_name: Name of the registered model
        stage: Stage to get info for
        
    Returns:
        Model information dictionary
    """
    name = model_name or MODEL_NAME
    client = mlflow.tracking.MlflowClient()
    
    try:
        model_version = client.get_latest_versions(name, stages=[stage])[0]
        
        return {
            "name": model_version.name,
            "version": model_version.version,
            "stage": model_version.current_stage,
            "description": model_version.description,
            "creation_timestamp": model_version.creation_timestamp,
            "last_updated_timestamp": model_version.last_updated_timestamp,
            "run_id": model_version.run_id
        }
    except Exception as e:
        print(f"Failed to get model info: {e}")
        raise 


Overwriting src/mlops/model_registry.py


In [5]:
%%writefile src/mlops/training.py
"""Training utilities with MLflow integration."""
import mlflow
from mlflow import sklearn  # type: ignore
from mlflow import models  # type: ignore
import optuna
from optuna.integration.mlflow import MLflowCallback
import numpy as np
import pandas as pd
from typing import Optional, Tuple, List, Callable, cast, Any, Dict, NoReturn, TypeAlias
from numpy.typing import NDArray
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import Bunch

from .config import RANDOM_STATE, TEST_SIZE
from .experiment_utils import setup_mlflow_experiment
from .logging import (
    log_model_metrics,
    log_confusion_matrix,
    log_feature_importance,
    log_dataset_info,
    log_parameters
)
from .explainer import build_and_log_dashboard

# Type aliases for complex types
FloatArray: TypeAlias = NDArray[np.float64]
IntArray: TypeAlias = NDArray[np.int64]
DatasetTuple: TypeAlias = Tuple[FloatArray, FloatArray, IntArray, IntArray, List[str], List[str], StandardScaler]

def load_and_prepare_iris_data(
    test_size: float = TEST_SIZE,
    random_state: int = RANDOM_STATE
) -> DatasetTuple:
    """
    Load and prepare the Iris dataset.
    
    Args:
        test_size: Fraction of data to use for testing
        random_state: Random state for reproducibility
        
    Returns:
        Tuple of (X_train_scaled, X_test_scaled, y_train, y_test, 
                 feature_names, target_names, scaler)
    """
    # Load dataset
    iris: Any = load_iris()
    X: NDArray[np.float64] = cast(NDArray[np.float64], iris.data)
    y: NDArray[np.int64] = cast(NDArray[np.int64], iris.target)
    feature_names: List[str] = list(iris.feature_names)
    target_names: List[str] = list(iris.target_names)
    
    # Split data
    X_train: NDArray[np.float64]
    X_test: NDArray[np.float64]
    y_train: NDArray[np.int64]
    y_test: NDArray[np.int64]
    X_train, X_test, y_train, y_test = cast(
        Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int64], NDArray[np.int64]],
        train_test_split(X, y, test_size=test_size, random_state=random_state)
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.fit_transform(X_train))
    X_test_scaled: NDArray[np.float64] = cast(NDArray[np.float64], scaler.transform(X_test))
    
    return (X_train_scaled, X_test_scaled, y_train, y_test,
            feature_names, target_names, scaler)


# === (A) Logistic-Regression baseline =====================================
def train_logistic_regression_autolog(
    X_train: FloatArray,
    y_train: IntArray,
    X_test: FloatArray,
    y_test: IntArray,
    feature_names: list[str],
    target_names: list[str],
    run_name: str = "lr_autolog",
    register: bool = True,
    dashboard: bool = False,
    dashboard_port: int | None = None,
) -> str:
    """
    Fit + evaluate a Logistic-Regression baseline.

    Key improvements
    ----------------
    • Manually logs `accuracy` (and friends) via `log_model_metrics`
      so downstream code can rely on the key.  
    • Keeps the robust signature / input_example logic.
    """
    setup_mlflow_experiment()
    mlflow.sklearn.autolog(log_models=True)

    with mlflow.start_run(run_name=run_name) as run:
        model = LogisticRegression(random_state=RANDOM_STATE, max_iter=1_000).fit(
            X_train, y_train
        )

        # ── 1) manual metric logging ──────────────────────────────────────
        y_pred_test = model.predict(X_test)
        log_model_metrics(y_test, y_pred_test)              # <-- new
        log_confusion_matrix(y_test, y_pred_test, class_names=target_names)

        # ── 2) model artefact with signature ──────────────────────────────
        signature = mlflow.models.infer_signature(X_train, model.predict(X_train))
        sklearn.log_model(
            model,
            artifact_path="model",
            registered_model_name="iris_logistic_regression" if register else None,
            signature=signature,
            input_example=X_test[:5],
        )

        mlflow.evaluate(
            model=f"runs:/{run.info.run_id}/model",
            data=X_test,
            targets=y_test,
            model_type="classifier",
            evaluator_config={"label_list": list(range(len(target_names)))},
        )

        # ── 3) optional dashboard ─────────────────────────────────────────
        if dashboard:
            X_test_df = pd.DataFrame(X_test, columns=feature_names)
            build_and_log_dashboard(
                model, X_test_df, y_test, labels=target_names,
                run=run, port=dashboard_port, serve=True
            )
        return run.info.run_id



def create_rf_objective(
    X_train: NDArray[np.float64],
    y_train: NDArray[np.int64],
    X_valid: NDArray[np.float64],
    y_valid: NDArray[np.int64],
) -> Callable[[optuna.trial.Trial], float]:
    """
    Optuna objective that returns validation accuracy – *no* MLflow calls here.
    All logging is delegated to Optuna's MLflowCallback.
    """
    def objective(trial: optuna.trial.Trial) -> float:
        params: Dict[str, Any] = {
            "n_estimators": trial.suggest_int("n_estimators", 10, 200),
            "max_depth": trial.suggest_int("max_depth", 2, 20),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "random_state": RANDOM_STATE,
        }
        clf = RandomForestClassifier(**params)
        clf.fit(X_train, y_train)
        preds = clf.predict(X_valid)
        return accuracy_score(y_valid, preds)

    return objective


# === (B) Random-Forest with Optuna ========================================
def train_random_forest_with_optimization(
    X_train: FloatArray,
    y_train: IntArray,
    X_test: FloatArray,
    y_test: IntArray,
    feature_names: list[str],
    target_names: list[str],
    *,
    n_trials: int = 50,
    run_name: str = "rf_optimization",
    register: bool = True,
    dashboard: bool = False,
    dashboard_port: int | None = None,
) -> str:
    """
    Optuna hyper-parameter search + robust metric logging (now includes `accuracy`).
    """
    setup_mlflow_experiment()
    disable_flag = mlflow.sklearn.autolog(disable=True)

    try:
        with mlflow.start_run(run_name=run_name) as parent:
            log_dataset_info(X_train, X_test, y_train, y_test)

            study = optuna.create_study(direction="maximize")
            study.optimize(
                create_rf_objective(X_train, y_train, X_test, y_test),
                n_trials=n_trials,
                callbacks=[
                    MLflowCallback(
                        tracking_uri=mlflow.get_tracking_uri(),
                        metric_name="accuracy",
                        mlflow_kwargs={"nested": True},
                    )
                ],
            )

            best_model = RandomForestClassifier(**study.best_params).fit(X_train, y_train)

            # ── 1) manual metric logging ──────────────────────────────────
            y_pred_test = best_model.predict(X_test)
            log_model_metrics(y_test, y_pred_test)           # <-- new
            log_confusion_matrix(y_test, y_pred_test, class_names=target_names)
            log_feature_importance(feature_names, best_model.feature_importances_)

            mlflow.log_metric("best_accuracy", study.best_value)

            # ── 2) model artefact with signature ──────────────────────────
            signature = mlflow.models.infer_signature(X_train, best_model.predict(X_train))
            sklearn.log_model(
                best_model,
                artifact_path="model",
                registered_model_name="iris_random_forest" if register else None,
                signature=signature,
                input_example=X_test[:5],
            )

            mlflow.evaluate(
                model=f"runs:/{parent.info.run_id}/model",
                data=X_test,
                targets=y_test,
                model_type="classifier",
                evaluator_config={"label_list": list(range(len(target_names)))},
            )

            if dashboard:
                X_test_df = pd.DataFrame(X_test, columns=feature_names)
                build_and_log_dashboard(
                    best_model, X_test_df, y_test, labels=target_names,
                    run=parent, port=dashboard_port, serve=True
                )
            return parent.info.run_id
    finally:
        if not disable_flag:
            mlflow.sklearn.autolog(disable=False)





# === (C) Robust comparator ===============================================
def compare_models(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> None:
    """
    Print the best run according to *metric_key* while gracefully
    falling-back to common alternates when the preferred key is missing.
    """
    from .experiment_utils import get_best_run

    fallback_keys = ["accuracy_score", "best_accuracy"]
    try:
        best = get_best_run(experiment_name, metric_key, maximize)
        rid = best["run_id"]

        # choose first key that exists
        score = best.get(f"metrics.{metric_key}")
        if score is None:
            for alt in fallback_keys:
                score = best.get(f"metrics.{alt}")
                if score is not None:
                    metric_key = alt
                    break

        model_type = best.get("params.model_type", "unknown")
        print(f"🏆 Best run: {rid}")
        print(f"📈 {metric_key}: {score if score is not None else 'N/A'}")
        print(f"🔖 Model type: {model_type}")
    except Exception as err:
        print(f"❌ Error comparing models: {err}")


Overwriting src/mlops/training.py


In [6]:
%%writefile src/mlops/explainer.py
from __future__ import annotations
import os
import socket
import logging
from pathlib import Path
from typing import Any, Sequence, Optional
from contextlib import closing

import mlflow
import psutil  # lightweight; already added to pyproject deps
from sklearn.utils.multiclass import type_of_target
from explainerdashboard import (
    ClassifierExplainer,
    RegressionExplainer,
    ExplainerDashboard,
)

logging.basicConfig(level=logging.INFO)

__all__ = ["build_and_log_dashboard", "load_dashboard_yaml", "_first_free_port", "_port_details"]


# ---------------------------------------------------------------------------
def _port_details(port: int) -> str:
    """
    Return a one-line string with PID & cmdline of the process
    listening on *port*, or '' if none / not discoverable.
    """
    for c in psutil.net_connections(kind="tcp"):
        if c.status == psutil.CONN_LISTEN and c.laddr and c.laddr.port == port:
            try:
                p = psutil.Process(c.pid)
                return f"[PID {p.pid} – {p.name()}] cmd={p.cmdline()}"
            except psutil.Error:
                return f"[PID {c.pid}] (no detail)"
    return ""

def _first_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return first free TCP port ≥ *start* on localhost."""
    for port in range(start, start + tries):
        try:
            with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
                s.settimeout(0.05)
                s.bind(("127.0.0.1", port))
                return port
        except OSError:
            # Port is in use, try next one
            continue
    raise RuntimeError("⚠️  No free ports found in range")

def _next_free_port(start: int = 8050, tries: int = 50) -> int:
    """Return the first free TCP port ≥ *start*. (Alias for backward compatibility)"""
    return _first_free_port(start, tries)

def _port_in_use(port: int) -> bool:
    """Check if a port is already in use on any interface."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.settimeout(0.05)
        # Check both localhost and 0.0.0.0 to be thorough
        try:
            # First check localhost (127.0.0.1)
            if s.connect_ex(("127.0.0.1", port)) == 0:
                return True
            # Also check if anything is bound to all interfaces
            if s.connect_ex(("0.0.0.0", port)) == 0:
                return True
        except (socket.gaierror, OSError):
            # If we can't connect, assume port is free
            pass
        return False


# ---------------------------------------------------------------------------
def build_and_log_dashboard(
    model: Any,
    X_test,                        # 2-D ndarray / DataFrame
    y_test,                        # 1-D labels or targets
    *,
    # ---- new kwargs mirrored from ExplainerDashboard ----------------------
    cats: Optional[Sequence[str]] = None,
    idxs: Optional[Sequence[Any]] = None,
    descriptions: Optional[dict[str, str]] = None,
    target: Optional[str] = None,
    labels: Optional[Sequence[str]] = None,
    X_background=None,
    model_output: str = "probability",
    shap: str = "guess",
    shap_interaction: bool = True,
    simple: bool = False,
    mode: str = "external",        # inline · jupyterlab · external
    title: str = "Model Explainer",
    # ---- infra ------------------------------------------------------------
    run: mlflow.ActiveRun | None = None,
    port: int | None = None,
    serve: bool = False,
    save_yaml: bool = True,
    output_dir: os.PathLike | str | None = None,
) -> Path:
    """
    Build + log ExplainerDashboard, with improved port handling:

    • If *port* is None we auto-select the first free port ≥ 8050  
    • If *port* is occupied we print owner details & **abort** (caller decides)  
      -- avoids silent failure that confused you earlier.

    Returns
    -------
    Path to the saved ``dashboard.yaml``    (or the HTML file if save_yaml=False)
    """
    # 1️⃣ Pick correct explainer ------------------------------------------------
    problem = type_of_target(y_test)
    if problem in {"continuous", "continuous-multioutput"}:
        ExplainerCls = RegressionExplainer
        # RegressionExplainer doesn't support 'labels' or 'model_output' parameters
        explainer_kwargs = {
            "cats": cats,
            "idxs": idxs,
            "descriptions": descriptions,
            "target": target,
            "X_background": X_background,
            "shap": shap,
        }
    else:
        ExplainerCls = ClassifierExplainer
        # ClassifierExplainer supports all parameters
        explainer_kwargs = {
            "cats": cats,
            "idxs": idxs,
            "descriptions": descriptions,
            "target": target,
            "labels": labels,
            "X_background": X_background,
            "model_output": model_output,
            "shap": shap,
        }

    # Filter out None values to avoid issues
    explainer_kwargs = {k: v for k, v in explainer_kwargs.items() if v is not None}
    
    explainer = ExplainerCls(
        model,
        X_test,
        y_test,
        **explainer_kwargs
    )

    dash = ExplainerDashboard(
        explainer,
        title=title,
        shap_interaction=shap_interaction,
        simple=simple,
        mode=mode,
    )

    # 2️⃣ Persist + log artefacts ----------------------------------------------
    out_dir = Path(output_dir or ".")
    out_dir.mkdir(parents=True, exist_ok=True)

    html_path = out_dir / "explainer_dashboard.html"
    dash.save_html(html_path)
    mlflow.log_artifact(str(html_path))

    yaml_path: Path | None = None
    if save_yaml:
        yaml_path = out_dir / "dashboard.yaml"
        dash.to_yaml(yaml_path)
        mlflow.log_artifact(str(yaml_path))

    # 3️⃣ Optional serving ----------------------------------------------------
    if serve:
        chosen = port or _first_free_port()
        if _port_in_use(chosen):
            details = _port_details(chosen)
            raise RuntimeError(
                f"❌ Port {chosen} already in use {details}. "
                "Either pass a different --port or stop the process."
            )
        logging.info("🌐 Serving dashboard on http://0.0.0.0:%s", chosen)
        dash.run(chosen, host="0.0.0.0", use_waitress=True, open_browser=False)

    return yaml_path or html_path

# ---------------------------------------------------------------------------
def load_dashboard_yaml(path: os.PathLike | str) -> ExplainerDashboard:
    """Reload a YAML config – unchanged but kept for public API."""
    return ExplainerDashboard.from_config(path) 

   

Overwriting src/mlops/explainer.py


In [7]:
%%writefile src/mlops/utils.py
from pathlib import Path
import os

_added_src_flag: bool = False          # module-level cache

def project_root() -> Path:
    """
    Return the absolute path to the repo root *without* relying on __file__.

    • If running from a .py file, use that file's parent/parent (…/src/..)
    • If running interactively (no __file__), fall back to CWD.
    """
    if "__file__" in globals():
        return Path(__file__).resolve().parent.parent
    return Path.cwd()

def ensure_src_on_path(verbose: bool = True) -> None:
    """
    Ensure <repo-root>/src is the *first* entry in sys.path exactly once.
    The verbose flag prints the helper line the first time only.
    """
    import sys
    global _added_src_flag
    root = project_root()
    src_path = root / "src"

    if str(src_path) not in sys.path:
        sys.path.insert(0, str(src_path))
        if verbose and not _added_src_flag:
            print(f"🔧 Added {src_path} to sys.path")
        _added_src_flag = True



Overwriting src/mlops/utils.py


In [8]:
%%writefile src/mlops/experiment_utils.py
"""MLflow experiment utilities."""
import os
import pathlib
import mlflow
import mlflow.tracking
from typing import Optional, Dict, Any
import requests

from .config import EXPERIMENT_NAME, TRACKING_URI

_HEALTH_ENDPOINTS = ("/health", "/version")


def _ping_tracking_server(uri: str, timeout: float = 2.0) -> bool:
    """Return True iff an HTTP MLflow server is reachable at *uri*."""
    if not uri.startswith("http"):
        return False                        # file store – nothing to ping
    try:
        # Use new health endpoints
        for ep in _HEALTH_ENDPOINTS:
            response = requests.get(uri.rstrip("/") + ep, timeout=timeout)
            response.raise_for_status()
        return True
    except Exception:
        return False


def _fallback_uri() -> str:
    """Return a local file-based URI relative to the repo root."""
    root = pathlib.Path.cwd()
    return f"file:{root}/mlruns"


def setup_mlflow_experiment(experiment_name: Optional[str] = None) -> None:
    """
    Idempotently configure MLflow **and** guarantee the experiment exists.
    • If TRACKING_URI points at an HTTP server that is unreachable, we fall
      back to a local file store so the script never crashes.
    
    Args:
        experiment_name: Name of the experiment. If None, uses default.
    """
    # Use provided name or fall back to config default
    exp_name = experiment_name or EXPERIMENT_NAME
    
    uri = TRACKING_URI
    if not _ping_tracking_server(uri):
        local_uri = _fallback_uri()
        print(
            f"⚠️  MLflow server unreachable at {uri} – "
            f"falling back to local store {local_uri}"
        )
        uri = local_uri

    mlflow.set_tracking_uri(uri)

    if mlflow.get_experiment_by_name(exp_name) is None:
        mlflow.create_experiment(
            exp_name,
            artifact_location=os.getenv("MLFLOW_ARTIFACT_ROOT",
                                        _fallback_uri())
        )
    mlflow.set_experiment(exp_name)
    print(f"🗂  Using MLflow experiment '{exp_name}' @ {uri}")


def get_best_run(
    experiment_name: Optional[str] = None,
    metric_key: str = "accuracy",
    maximize: bool = True,
) -> Dict[str, Any]:
    """
    Return a *shallow* dict with run_id, metrics.*, and params.* keys
    so downstream code can use predictable dotted paths.
    """
    exp_name = experiment_name or EXPERIMENT_NAME
    setup_mlflow_experiment(exp_name)

    client = mlflow.tracking.MlflowClient()
    exp = mlflow.get_experiment_by_name(exp_name)
    if exp is None:
        raise ValueError(f"Experiment '{exp_name}' not found")

    order = "DESC" if maximize else "ASC"
    run = client.search_runs(
        [exp.experiment_id],
        order_by=[f"metrics.{metric_key} {order}"],
        max_results=1,
    )[0]

    # Build a *flat* mapping -------------------------------------------------
    flat: Dict[str, Any] = {"run_id": run.info.run_id}

    # Metrics
    for k, v in run.data.metrics.items():
        flat[f"metrics.{k}"] = v

    # Params
    for k, v in run.data.params.items():
        flat[f"params.{k}"] = v

    # Tags (optional but handy)
    for k, v in run.data.tags.items():
        flat[f"tags.{k}"] = v

    return flat



Overwriting src/mlops/experiment_utils.py


In [9]:
# %%writefile src/examples/iris_classification_example.py
#!/usr/bin/env python3
"""
Iris Classification Example (argparse-free, notebook-safe).

Configuration:
    • export EXPLAINER_DASHBOARD=1   # launch dashboard
    • export EXPLAINER_PORT=8150     # optional port override
"""

from __future__ import annotations
import os, logging

from src.mlops.utils import ensure_src_on_path
ensure_src_on_path()

from src.mlops.training import (
    load_and_prepare_iris_data,
    train_logistic_regression_autolog,
    train_random_forest_with_optimization,
    compare_models,
)
from src.mlops.model_registry import load_model_from_run
from src.mlops.experiment_utils import get_best_run

logging.basicConfig(level=logging.INFO)


def _bool_env(var: str, default: bool = False) -> bool:
    v = os.getenv(var)
    return default if v is None else v.lower() in {"1", "true", "yes"}


def main(*, dashboard: bool = False, dashboard_port: int | None = None) -> None:
    print("🌸 Iris Classification with MLflow\n" + "=" * 50)

    # 1 Load data ------------------------------------------------------------
    X_train, X_test, y_train, y_test, feat_names, tgt_names, _ = (
        load_and_prepare_iris_data()
    )
    print(f"✓ Training samples: {len(X_train)} | Test: {len(X_test)}")

    # 2 Logistic Regression --------------------------------------------------
    lr_run = train_logistic_regression_autolog(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        run_name="lr_baseline",
        register=True,
        dashboard=dashboard,
        dashboard_port=dashboard_port,
    )
    print(f"✓ Logistic run {lr_run[:8]}")

    # 3 Random Forest + Optuna ----------------------------------------------
    rf_run = train_random_forest_with_optimization(
        X_train,
        y_train,
        X_test,
        y_test,
        feat_names,
        tgt_names,
        n_trials=20,
        run_name="rf_optimized",
        register=True,
        dashboard=dashboard,
        dashboard_port=dashboard_port,
    )
    print(f"✓ RF run {rf_run[:8]}")

    # 4 Compare & test best --------------------------------------------------
    compare_models()
    best = get_best_run()
    mdl = load_model_from_run(best["run_id"])
    acc = (mdl.predict(X_test) == y_test).mean()
    print(f"🏆 Best model accuracy: {acc:.4f}")

    if dashboard:
        port = dashboard_port or int(os.getenv("EXPLAINER_PORT", "8050"))
        print(f"\n🚀 ExplainerDashboard running on http://localhost:{port}")


if __name__ == "__main__":
    main(
        dashboard=_bool_env("EXPLAINER_DASHBOARD", True),
        dashboard_port=int(os.getenv("EXPLAINER_PORT", "8050")),
    )


🔧 Added /workspace/src/src to sys.path


🌸 Iris Classification with MLflow
✓ Training samples: 120 | Test: 30
🗂  Using MLflow experiment 'iris_classification' @ http://mlflow:5000


Registered model 'iris_logistic_regression' already exists. Creating a new version of this model...
2025/06/25 18:15:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_logistic_regression, version 2
Created version '2' of model 'iris_logistic_regression'.


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

2025/06/25 18:15:39 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as multiclass dataset, number of classes is inferred as 3. If this is incorrect, please specify the `label_list` parameter in `evaluator_config`.
2025/06/25 18:15:39 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2025/06/25 18:15:40 INFO mlflow.models.evaluation.evaluators.shap: Shap explainer ExactExplainer is used.


🏃 View run lr_baseline at: http://mlflow:5000/#/experiments/1/runs/643f66f3664946d5b9b94fee5c29679c
🧪 View experiment at: http://mlflow:5000/#/experiments/1


KeyboardInterrupt: 

In [12]:
%%writefile tests/test_mlflow_integration.py

"""Tests for MLflow integration modules."""
import sys
import os

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))

from mlops.experiment import setup_mlflow_experiment
from mlops.training import (
    load_and_prepare_iris_data, 
    train_logistic_regression
)
from mlops.model_registry import load_model_from_run


def test_data_loading():
    """Test that data loading works correctly."""
    data = load_and_prepare_iris_data()
    X_train, X_test, y_train, y_test, feature_names, target_names, scaler = data
    
    assert len(X_train) > 0
    assert len(X_test) > 0
    assert len(feature_names) == 4
    assert len(target_names) == 3
    assert X_train.shape[1] == 4  # 4 features


def test_experiment_setup():
    """Test that MLflow experiment setup works."""
    # This should not raise an exception
    setup_mlflow_experiment("test_experiment")
    

def test_model_training_and_loading():
    """Test end-to-end model training and loading."""
    # Load data
    data = load_and_prepare_iris_data()
    X_train, X_test, y_train, y_test, feature_names, target_names, scaler = data
    
    # Train a simple model
    run_id = train_logistic_regression(
        X_train, y_train, X_test, y_test,
        feature_names, target_names,
        run_name="test_lr",
        register=False  # Don't register for tests
    )
    
    assert run_id is not None
    assert len(run_id) > 0
    
    # Load the model back
    model = load_model_from_run(run_id, "model")
    
    # Test prediction
    predictions = model.predict(X_test)
    assert len(predictions) == len(y_test)
    
    # Check accuracy is reasonable (should be > 0.8 for iris)
    accuracy = (predictions == y_test).mean()
    assert accuracy > 0.8


if __name__ == "__main__":
    # Run tests
    test_data_loading()
    print("✓ Data loading test passed")
    
    test_experiment_setup()
    print("✓ Experiment setup test passed")
    
    test_model_training_and_loading()
    print("✓ Model training and loading test passed")
    
    print("\nAll tests passed! 🎉") 

Overwriting tests/test_mlflow_integration.py


In [13]:
%%writefile tests/test_explainer.py
import sys
import os
import pytest
from pathlib import Path

# Add src to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

def test_yaml_roundtrip(tmp_path):
    """Test that a dashboard can be saved to YAML and reloaded."""
    from src.mlops.explainer import build_and_log_dashboard, load_dashboard_yaml
    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    import mlflow
    import pandas as pd

    iris = load_iris()
    X, y = iris.data, iris.target
    X_df = pd.DataFrame(X, columns=iris.feature_names)
    model = LogisticRegression(max_iter=1000).fit(X, y)
    with mlflow.start_run():
        yaml_path = build_and_log_dashboard(
            model, X_df, y,
            serve=False,
            save_yaml=True,
            output_dir=tmp_path
        )
        # Reload
        dash = load_dashboard_yaml(yaml_path)
        assert dash.explainer.model.__class__.__name__ == "LogisticRegression"


def test_build_dashboard(tmp_path):
    """Test that a dashboard can be built and saved."""
    from src.mlops.explainer import build_and_log_dashboard
    from sklearn.datasets import load_iris
    from sklearn.linear_model import LogisticRegression
    import mlflow
    import pandas as pd

    iris = load_iris()
    X, y = iris.data, iris.target
    X_df = pd.DataFrame(X, columns=iris.feature_names)
    model = LogisticRegression(max_iter=1000).fit(X, y)
    with mlflow.start_run():
        html = build_and_log_dashboard(
            model, X_df, y,
            serve=False,
            save_yaml=False,
            output_dir=tmp_path
        )
        assert html.exists() and html.suffix == ".html" 


Overwriting tests/test_explainer.py
