In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.datasets import load_breast_cancer
import mlflow
import mlflow.sklearn

In [11]:
np.random.seed(42)

# loading the dataset
X, y = load_breast_cancer(return_X_y=True)

#split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

exp_name = "mlflow-randomforest-cancer"
exiting_exp = mlflow.get_experiment_by_name(exp_name)
if not exiting_exp:
    exp_id = mlflow.create_experiment(exp_name, artifact_location="...")
else:
    exp_id = dict(exiting_exp)['experiment_id']
mlflow.set_experiment(exp_name)


<Experiment: artifact_location='file:///home/ajeet/Documents/Officials/AI-for-Engineers/Tools-and-Frameworks/MLFlow/mlruns/824072893854305905', creation_time=1753425995835, experiment_id='824072893854305905', last_update_time=1753425995835, lifecycle_stage='active', name='mlflow-randomforest-cancer', tags={}>

In [12]:
def eval_metrics(actual, pred, pred_proba):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    roc_auc = roc_auc_score(actual, pred_proba)
    return rmse, roc_auc

In [13]:
for idx, n_estimators in enumerate([5, 10, 20]):
    rf=RF(n_estimators= n_estimators, random_state=42)
    rf.fit(X_train, y_train)

    pred_probs = rf.predict_proba(X_test)
    pred_labels = rf.predict(X_test)

    # compute rms and roc-auc for the random forest model
    # prediction on the test set
    rmse, roc_auc  = eval_metrics(actual=y_test,
    pred = pred_labels,
    pred_proba = [iter[1] for iter in pred_probs])

    # start mlflow
    RUN_NAME = f"run_{idx}"
    with mlflow.start_run(experiment_id=exp_id, run_name=RUN_NAME) as run:
        # retrieve run id
        RUN_ID = run.info.run_id
        # track parameters
        mlflow.log_param("n_estimators", n_estimators)
        # track metrics
        mlflow.log_metric("rmse", rmse)
        # track metrics
        mlflow.log_metric("roc_auc", roc_auc)
        # track model
        mlflow.sklearn.log_model(rf, "model")






In [16]:
from mlflow.tracking import  MlflowClient

exp_name = 'mlflow-randomforest-cancer'

client = MlflowClient()

# retrieve the experiment information
exp_id = client.get_experiment_by_name(exp_name).experiment_id

In [17]:
# retrieve runs information (parameter: 'n_estimators', metric: 'roc_auc')
experiment_info = mlflow.search_runs([exp_id])
# extracting run ids for the specified experiment
runs_id = experiment_info.run_id.values
# extracting parameters of different runs
runs_param = [client.get_run(run_id).data.params["n_estimators"] for run_id in runs_id]
# extracting roc-auc across different runs
runs_metric = [client.get_run(run_id).data.metrics["roc_auc"] for run_id in runs_id]

In [18]:
# retrieve artifact from best run
df = mlflow.search_runs([exp_id], order_by=["metrics.roc_auc"])
best_run_id = df.loc[0,'run_id']
best_model_path = client.download_artifacts(best_run_id, "model")
best_model = mlflow.sklearn.load_model(best_model_path)
print("Best model: {}".format(best_model))

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 6450.79it/s] 

Best model: RandomForestClassifier(n_estimators=5, random_state=42)





Run the following command to Envoke the MLFlow UI for Tracking Experiments 
mlflow ui --port 5000


In [20]:

# delete runs (mke sure you are certain about deleting the runs)
for run_id in runs_id:
    client.delete_run(run_id)

# delete experiment (make sure you are certain about deleting the experiment)
client.delete_experiment(exp_id)

In [None]:
https://mlflow.org/docs/latest/ml/