In [38]:
# mlflow models serve -m "models:/<model_name>/<version>" --host 0.0.0.0 --port 5001

In [46]:
# mlflow models serve -m "4/064c1ab8713c40149f92bd6849cee7fc/artifacts/logistic_regression/" --no-conda --host 0.0.0.0 --port 5001

In [47]:
# kill -9 $(lsof -ti :5000)

In [1]:
import numpy as np
import requests
import json
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier

In [2]:
data = fetch_covtype()
X, y = data.data, data.target

In [3]:
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

In [4]:
df_sampled = df.sample(n=1000, random_state=42)

In [5]:
X_sampled = df_sampled.drop(columns=['target'])
y_sampled = df_sampled['target']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_sampled, y_sampled, test_size=0.2, random_state=42, stratify=y_sampled)

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, solver='saga'),
    "Decision Tree": DecisionTreeClassifier(max_depth=10),
    "CatBoost": CatBoostClassifier(verbose=0)
}

In [9]:
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"{name}: Accuracy = {acc:.4f}")

Logistic Regression: Accuracy = 0.7350
Decision Tree: Accuracy = 0.6600
CatBoost: Accuracy = 0.7800


In [10]:
mlflow.get_tracking_uri()

'file:///home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns'

In [11]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
# mlflow.create_experiment("new_experiment")
# mlflow.set_experiment("new_experiment")

In [12]:
# Start MLflow experiment
mlflow.set_experiment("experiment_1")

2025/02/10 15:54:33 INFO mlflow.tracking.fluent: Experiment with name 'experiment_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns/169155279765312299', creation_time=1739195673073, experiment_id='169155279765312299', last_update_time=1739195673073, lifecycle_stage='active', name='experiment_1', tags={}>

In [13]:
input_example = X_train[0]

In [14]:
input_example = input_example.reshape(1, -1)

In [15]:
models

{'Logistic Regression': LogisticRegression(max_iter=1000, solver='saga'),
 'Decision Tree': DecisionTreeClassifier(max_depth=10),
 'CatBoost': <catboost.core.CatBoostClassifier at 0x75eb21338a60>}

In [16]:

results = {}
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)

        # Log model parameters
        if hasattr(model, "get_params"):
            params = model.get_params()
            mlflow.log_params(params)

        # Log additional metadata
        mlflow.log_param("model_name", name)
        mlflow.log_param("dataset_size", len(X_sampled))
        mlflow.log_param("test_size_ratio", 0.2)
        mlflow.log_param("feature_scaling", "StandardScaler")

        # Log metrics
        mlflow.log_metric("accuracy", acc)

        # Log model itself
        mlflow.sklearn.log_model(model, name, input_example=input_example)

        results[name] = acc
        print(f"{name}: Accuracy = {acc:.4f}")

print("\n✅ Experiment tracking complete. Run `mlflow ui` to view results.")


Logistic Regression: Accuracy = 0.7350
🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/169155279765312299/runs/9b179cf37dd247fcaa54dcc36f2e2d98
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/169155279765312299
Decision Tree: Accuracy = 0.6800
🏃 View run Decision Tree at: http://127.0.0.1:5000/#/experiments/169155279765312299/runs/91a4b196f0da4fa296cbd6c08296ab27
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/169155279765312299
CatBoost: Accuracy = 0.7800
🏃 View run CatBoost at: http://127.0.0.1:5000/#/experiments/169155279765312299/runs/7efe2da061c64680b9ed0eeca36e59d0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/169155279765312299

✅ Experiment tracking complete. Run `mlflow ui` to view results.


In [31]:
print(mlflow.get_tracking_uri())

http://127.0.0.1:5000


# mlflow Registry

In [17]:
mlflow.set_experiment("experiment_2")

2025/02/10 15:55:01 INFO mlflow.tracking.fluent: Experiment with name 'experiment_2' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns/497038454285270979', creation_time=1739195701664, experiment_id='497038454285270979', last_update_time=1739195701664, lifecycle_stage='active', name='experiment_2', tags={}>

In [18]:
mdl = LogisticRegression()
mdl.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [19]:
acc = accuracy_score(y_test, y_pred)
print(acc)

0.78


In [20]:
model = mdl

with mlflow.start_run():
    # Log model parameters (optional)
    mlflow.log_param("model_type", "logistic_regression")

    # Log the trained model
    model_uri = "logistic_regression"
    mlflow.sklearn.log_model(model, model_uri, input_example=input_example)

    # Register the model in the MLflow Model Registry
    result = mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/{model_uri}", "logistic_regression")

print(f"Model registered as: {result.name}, version: {result.version}")


Successfully registered model 'logistic_regression'.
2025/02/10 15:55:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression, version 1


🏃 View run nimble-shrew-62 at: http://127.0.0.1:5000/#/experiments/497038454285270979/runs/939817291cba41449a81e2d5fa70ff97
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/497038454285270979
Model registered as: logistic_regression, version: 1


Created version '1' of model 'logistic_regression'.


In [21]:
mlflow.get_tracking_uri()

'http://127.0.0.1:5000'

In [22]:
client = MlflowClient()

In [23]:
model_versions = client.get_latest_versions("logistic_regression")
for v in model_versions:
    print(f"Version {v.version} - {v.current_stage}")

Version 1 - None


  model_versions = client.get_latest_versions("logistic_regression")


In [24]:
mdl.predict(input_example)

array([2], dtype=int32)

In [26]:
url = "http://localhost:5001/invocations"

data = json.dumps({"instances": input_example.tolist()})

headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=data)

print(response.json())

{'predictions': [2]}
