## 1 - Train a model and prepare metadata for logging

In [1]:
import mlflow
from mlflow.models import infer_signature
from datetime import datetime, UTC

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import os
from dotenv import load_dotenv

load_dotenv()

True

### Train Model

In [2]:
# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)



### Log Model and Metadata

In [3]:
# Log the model to MLflow

# Set our tracking server uri for logging
tracking_server = os.getenv("MLFLOW_TRACKING_URI")
print(tracking_server)
mlflow.set_tracking_uri(uri=tracking_server)

# Set variables for model name, version, and experiment
model_name = "dev.ml_team.iris_classifier"
model_version = "1"
model_experiment = "ml_team/iris_classifier"
model_artifact_path = os.path.join("models", model_name, model_version)

# Create a new MLflow Experiment
mlflow.set_experiment(model_experiment)

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path=model_artifact_path,
        signature=signature,
        input_example=X_train,
        registered_model_name=model_name,
    )

http://4.236.9.68


2024/10/24 20:55:55 INFO mlflow.tracking.fluent: Experiment with name 'ml_team/iris_classifier' does not exist. Creating a new experiment.
Successfully registered model 'dev.ml_team.iris_classifier'.
2024/10/24 20:56:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: dev.ml_team.iris_classifier, version 1
Created version '1' of model 'dev.ml_team.iris_classifier'.


In [4]:

print(f"model_name='{model_name}'\nmodel_version='{model_version}'")

model_name='dev.ml_team.iris_classifier'
model_version='1'


In [5]:

# Save variables to the environment file
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
env_file = os.path.join(project_root, ".env")
iso_date_utc = datetime.now(UTC).isoformat()
with open(env_file, 'a') as f:
    f.write("\n")
    f.write("# Script 01_train_model.ipynb output variables\n")
    f.write(f"# Generated on {iso_date_utc}\n")
    f.write(f"MODEL_NAME={model_name}\n")
    f.write(f"MODEL_VERSION={model_version}\n")