In [1]:
%env MLFLOW_TRACKING_URI=http://127.0.0.1:5001

env: MLFLOW_TRACKING_URI=http://127.0.0.1:5001


In [2]:
import mlflow

import numpy as np
from sklearn import datasets, metrics
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split


def eval_metrics(pred, actual):
    rmse = np.sqrt(metrics.mean_squared_error(actual, pred))
    mae = metrics.mean_absolute_error(actual, pred)
    r2 = metrics.r2_score(actual, pred)
    return rmse, mae, r2

In [3]:
# Set th experiment name
mlflow.set_experiment("wine-quality")
# Enable auto-logging to MLflow
mlflow.sklearn.autolog()

In [4]:
# Load wine quality dataset
X, y = datasets.load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Start a run and train a model
with mlflow.start_run(run_name="default-params"):
    lr = ElasticNet()
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_test)
    rmse, mae, r2 = eval_metrics(y_pred, y_test)
    mlflow.log_metrics(
        {
            "mean_squared_error_X_test": rmse,
            "mean_absolute_error_X_test": mae,
            "r2_score_X_test": r2,
        }
    )

2024/10/29 20:40:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run default-params at: http://127.0.0.1:5001/#/experiments/180333391132533584/runs/24503bffd09e4f57934598769de968a6.
2024/10/29 20:40:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5001/#/experiments/180333391132533584.


In [5]:
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV

lr = ElasticNet()

# Define distribution to pick parameter values from
distributions = dict(
    alpha=uniform(loc=0, scale=10),  # sample alpha uniformly from [-5.0, 5.0]
    l1_ratio=uniform(),  # sample l1_ratio uniformlyfrom [0, 1.0]
)

# Initialize random search instance
clf = RandomizedSearchCV(
    estimator=lr,
    param_distributions=distributions,
    # Optimize for mean absolute error
    scoring="neg_mean_absolute_error",
    # Use 5-fold cross validation
    cv=5,
    # Try 100 samples. Note that MLflow only logs the top 5 runs.
    n_iter=100,
)

# Start a parent run
with mlflow.start_run(run_name="hyperparameter-tuning"):
    search = clf.fit(X_train, y_train)

    # Evaluate the best model on test dataset
    y_pred = clf.best_estimator_.predict(X_test)
    rmse, mae, r2 = eval_metrics(y_pred, y_test)
    mlflow.log_metrics(
        {
            "mean_squared_error_X_test": rmse,
            "mean_absolute_error_X_test": mae,
            "r2_score_X_test": r2,
        }
    )


2024/10/29 20:40:45 INFO mlflow.sklearn.utils: Logging the 5 best runs, 95 runs will be omitted.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run sedate-bee-74 at: http://127.0.0.1:5001/#/experiments/180333391132533584/runs/75b7d0ddc46942cbbd3d4606dbe9123a.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5001/#/experiments/180333391132533584.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run clumsy-ray-675 at: http://127.0.0.1:5001/#/experiments/180333391132533584/runs/4fc418d9abad4661b7ddcefbf586d6ce.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5001/#/experiments/180333391132533584.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run sedate-shoat-834 at: http://127.0.0.1:5001/#/experiments/180333391132533584/runs/49b3440174844572bd9b4b6cc769da3d.
2024/10/29 20:40:46 INFO mlflow.trackin

2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run nosy-tern-179 at: http://127.0.0.1:5001/#/experiments/180333391132533584/runs/5fc75329917a4751866d73acb071d2ed.
2024/10/29 20:40:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5001/#/experiments/180333391132533584.
