In [None]:
mlflow server \
--host 0.0.0.0 --port 5000 \
--backend-store-uri sqlite:///mlflow.db \
--default-artifact-root ./mlruns

# Sklearn Autologging (Easy Mode)

In [4]:
import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error

# ตั้งค่า Tracking server
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("sklearn-demo")
mlflow.sklearn.autolog()  # log params/metrics/model อัตโนมัติ

# เตรียม dataset
X, y = load_diabetes(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)

# เริ่ม run
with mlflow.start_run():
    model = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)
    model.fit(Xtr, ytr)
    preds = model.predict(Xte)

    # คำนวณ RMSE (แก้ไขตรงนี้)
    mse = mean_squared_error(yte, preds)
    rmse = np.sqrt(mse)

    mlflow.log_metric("rmse", rmse)


🏃 View run fun-trout-767 at: http://localhost:5000/#/experiments/1/runs/f15de8c72b0c4cd4806352ed62370c07
🧪 View experiment at: http://localhost:5000/#/experiments/1


In [5]:
# --- imports ---
import mlflow
import mlflow.sklearn

import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# --- MLflow setup ---
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("sklearn-demo")
mlflow.sklearn.autolog()  # will log params/metrics/model for each run automatically

# --- data ---
X, y = load_diabetes(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)

# --- model zoo & small param grids (tweak as you like) ---
model_spaces = {
    "ElasticNet": {
        "cls": ElasticNet,
        "grid": ParameterGrid({"alpha": [0.01, 0.1, 1.0], "l1_ratio": [0.2, 0.5, 0.8], "random_state": [42]})
    },
    "RandomForest": {
        "cls": RandomForestRegressor,
        "grid": ParameterGrid({"n_estimators": [200, 500], "max_depth": [None, 8, 16], "random_state": [42], "n_jobs": [-1]})
    },
    "GradientBoosting": {
        "cls": GradientBoostingRegressor,
        "grid": ParameterGrid({"n_estimators": [200, 500], "learning_rate": [0.05, 0.1], "max_depth": [2, 3], "random_state": [42]})
    },
    "SVR": {
        "cls": SVR,
        "grid": ParameterGrid({"C": [1.0, 10.0], "epsilon": [0.1, 0.2], "kernel": ["rbf"]})
        # Note: SVR usually benefits from feature scaling. For a quick demo we skip it.
    },
    "KNN": {
        "cls": KNeighborsRegressor,
        "grid": ParameterGrid({"n_neighbors": [3, 5, 11], "weights": ["uniform", "distance"]})
    },
}

summary_rows = []

# --- Parent run to group everything ---
with mlflow.start_run(run_name="multi-model sweep") as parent_run:
    mlflow.set_tag("purpose", "multi-model-comparison")
    mlflow.set_tag("data", "sklearn.load_diabetes")
    mlflow.set_tag("split", "80/20 random_state=42")

    for model_name, spec in model_spaces.items():
        for params in spec["grid"]:
            # child run per configuration
            with mlflow.start_run(run_name=f"{model_name}", nested=True) as child_run:
                mlflow.set_tag("model_name", model_name)

                # build & fit
                ModelClass = spec["cls"]
                model = ModelClass(**params)
                model.fit(Xtr, ytr)

                # predict & metrics
                preds = model.predict(Xte)

                # RMSE (robust across sklearn versions)
                mse = mean_squared_error(yte, preds)     # squared=True by default
                rmse = float(np.sqrt(mse))               # <-- safe RMSE
                mae  = float(mean_absolute_error(yte, preds))
                r2   = float(r2_score(yte, preds))

                # additional explicit logs (autolog already logs many things)
                mlflow.log_metric("rmse", rmse)
                mlflow.log_metric("mae", mae)
                mlflow.log_metric("r2", r2)

                # keep for summary
                row = {"run_id": child_run.info.run_id, "model": model_name, **params, "RMSE": rmse, "MAE": mae, "R2": r2}
                summary_rows.append(row)

    # after all children finish, log a summary artifact
    df_summary = pd.DataFrame(summary_rows)
    # Sort best (lowest RMSE)
    df_summary_sorted = df_summary.sort_values(by=["RMSE", "MAE"], ascending=[True, True]).reset_index(drop=True)

    # save & log
    summary_path = "multi_model_summary.csv"
    df_summary_sorted.to_csv(summary_path, index=False)
    mlflow.log_artifact(summary_path)

# show quick top-10 in notebook output
df_summary_sorted.head(10)


🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/82f4539a314242879d055bf67d441ebe
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/789bd1419cc54e8991fb29c7e0a5c02b
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/4f27e464897341a298626d1b8f203ef9
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/daeceb4804ee43a7b25498bbbbaae4ff
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/820c5bffa4664f0f8c5ad8267129fc6e
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: http://localhost:5000/#/experiments/1/runs/d8579ee3e22c4aef90417dcad491fbe3
🧪 View experiment at: http://localhost:5000/#/experiments/1
🏃 View run ElasticNet at: ht

Unnamed: 0,run_id,model,alpha,l1_ratio,random_state,RMSE,MAE,R2,max_depth,n_estimators,n_jobs,learning_rate,C,epsilon,kernel,n_neighbors,weights
0,4c7faa4abe3d44afbbc554b5931c3e81,SVR,,,,51.754973,41.230223,0.494432,,,,,10.0,0.2,rbf,,
1,3b845c81714f4d8e9382a4a8ad9467c7,SVR,,,,51.766236,41.243197,0.494212,,,,,10.0,0.1,rbf,,
2,6f3045264247484a8045f9a185e0184b,GradientBoosting,,,42.0,52.871462,42.778329,0.472384,2.0,200.0,,0.05,,,,,
3,1611d7dfda514cfb8e47178107c3780b,GradientBoosting,,,42.0,53.183771,43.420623,0.466132,3.0,200.0,,0.05,,,,,
4,c0d93ee475fb4829b00f959cdcaa0351,RandomForest,,,42.0,54.235353,44.004402,0.444811,8.0,200.0,-1.0,,,,,,
5,4c00427ada9a4a9e80e25ed9140e8105,KNN,,,,54.408039,44.198823,0.44127,,,,,,,,11.0,distance
6,525dedc4c13e49b0900616bfab79c219,RandomForest,,,42.0,54.461217,44.276124,0.440178,,200.0,-1.0,,,,,,
7,49f29b43a2524de287d397b7d3a2a2a1,RandomForest,,,42.0,54.523721,44.292855,0.438892,8.0,500.0,-1.0,,,,,,
8,eb9a3454163d4df89b89af7f4b1a396b,KNN,,,,54.526545,42.260874,0.438834,,,,,,,,5.0,distance
9,8d80a910275f4a328084d3bb61ad551e,RandomForest,,,42.0,54.550112,44.28676,0.438349,16.0,200.0,-1.0,,,,,,
