In [19]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature

# NOTE: review the links mentioned above for guidance on connecting to a managed tracking server, such as the Databricks Managed MLflow

mlflow.set_tracking_uri(uri="http://127.0.0.1:8081")

In [26]:
# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model hyperparameters
params = {"solver": "lbfgs", "max_iter": 100, "multi_class": "auto", "random_state": 42}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate accuracy as a target loss metric
accuracy = accuracy_score(y_test, y_pred)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
mlflow.set_experiment("MLflow Quickstart")

2025/06/29 13:38:14 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Quickstart' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/693506764269325148', creation_time=1751184494117, experiment_id='693506764269325148', last_update_time=1751184494117, lifecycle_stage='active', name='MLflow Quickstart', tags={}>

In [27]:
# Start an MLflow run
with mlflow.start_run():
  # Log the hyperparameters
  mlflow.log_params(params)

  # Log the loss metric
  mlflow.log_metric("accuracy", accuracy)

  # Set a tag that we can use to remind ourselves what this run was for
  mlflow.set_tag("Training Info", "Basic LR model for iris data")

  # Infer the model signature
  signature = infer_signature(X_train, lr.predict(X_train))

  # Log the model
  model_info = mlflow.sklearn.log_model(
      sk_model=lr,
      name="iris_model",
      signature=signature,
      input_example=X_train,
      registered_model_name="tracking-quickstart",
  )

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/06/29 13:40:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 2


🏃 View run tasteful-lynx-296 at: http://127.0.0.1:8081/#/experiments/693506764269325148/runs/4c8fd403cdb8432b839c196c8aa39cbd
🧪 View experiment at: http://127.0.0.1:8081/#/experiments/693506764269325148


Created version '2' of model 'tracking-quickstart'.


In [29]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [30]:
predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

# Convert X_test validation feature data to a Pandas DataFrame
result = pd.DataFrame(X_test, columns=iris_feature_names)

# Add the actual classes to the DataFrame
result["actual_class"] = y_test

# Add the model predictions to the DataFrame
result["predicted_class"] = predictions

result[:4]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1
