In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
# 1. Set MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5000")

In [3]:
# 2. Prepare data
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Let's only keep two classes for simplicity (e.g., 0 or 1).
X = X[y != 2]
y = y[y != 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [4]:
# 3. Start MLflow experiment & train model
experiment_name = "my_experiment"
mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name="rf-run"):
    # Hyperparameters
    n_estimators = 50
    max_depth = 5

    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    rf.fit(X_train, y_train)

    # Evaluate
    accuracy = rf.score(X_test, y_test)

    # 4. Log params, metrics
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_metric("accuracy", accuracy)

    # 5. Log model
    mlflow.sklearn.log_model(rf, artifact_path="model", registered_model_name="my_sklearn_model")

    # from mlflow.tracking import MlflowClient
    # client = MlflowClient()
    # client.transition_model_version_stage(
    #     name="my_sklearn_model",
    #     version=1,
    #     stage="Production"
    # )

    print(f"Model logged with accuracy: {accuracy}")

2025/02/23 19:18:15 INFO mlflow.tracking.fluent: Experiment with name 'my_experiment' does not exist. Creating a new experiment.
Successfully registered model 'my_sklearn_model'.
2025/02/23 19:18:30 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: my_sklearn_model, version 1


Model logged with accuracy: 1.0


Created version '1' of model 'my_sklearn_model'.
