In [1]:
# mlflow server --backend-store-uri mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000

In [2]:
# mlflow models serve -m "4/064c1ab8713c40149f92bd6849cee7fc/artifacts/logistic_regression/" --no-conda --host 0.0.0.0 --port 5001

In [3]:
# kill -9 $(lsof -ti :5000)

In [4]:
import numpy as np
import requests
import json
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from catboost import CatBoostClassifier

In [5]:
data = fetch_covtype()
X, y = data.data, data.target

In [6]:
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

In [7]:
df_sampled = df.sample(n=1000, random_state=42)

In [8]:
X_sampled = df_sampled.drop(columns=['target'])
y_sampled = df_sampled['target']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_sampled, y_sampled, test_size=0.2, random_state=42, stratify=y_sampled)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, solver='saga'),
    "Decision Tree": DecisionTreeClassifier(max_depth=10),
    "CatBoost": CatBoostClassifier(verbose=0)
}

In [12]:
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"{name}: Accuracy = {acc:.4f}")

Logistic Regression: Accuracy = 0.7350
Decision Tree: Accuracy = 0.6800
CatBoost: Accuracy = 0.7800


In [13]:
mlflow.get_tracking_uri()

'file:///home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns'

In [15]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
# mlflow.create_experiment("new_experiment")
# mlflow.set_experiment("new_experiment")

In [16]:
mlflow.get_tracking_uri()

'http://127.0.0.1:5000'

In [17]:
# Start MLflow experiment
mlflow.set_experiment("experiment_3")

2025/02/10 17:50:43 INFO mlflow.tracking.fluent: Experiment with name 'experiment_3' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns/125197709437508121', creation_time=1739202643386, experiment_id='125197709437508121', last_update_time=1739202643386, lifecycle_stage='active', name='experiment_3', tags={}>

In [18]:
input_example = X_train[0]

In [19]:
input_example = input_example.reshape(1, -1)

In [20]:
models

{'Logistic Regression': LogisticRegression(max_iter=1000, solver='saga'),
 'Decision Tree': DecisionTreeClassifier(max_depth=10),
 'CatBoost': <catboost.core.CatBoostClassifier at 0x7beb615b42b0>}

In [21]:
results = {}
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)

        # Log model parameters
        if hasattr(model, "get_params"):
            params = model.get_params()
            mlflow.log_params(params)

        # Log additional metadata
        mlflow.log_param("model_name", name)
        mlflow.log_param("dataset_size", len(X_sampled))
        mlflow.log_param("test_size_ratio", 0.2)
        mlflow.log_param("feature_scaling", "StandardScaler")

        # Log metrics
        mlflow.log_metric("accuracy", acc)

        # Log model itself
        mlflow.sklearn.log_model(model, name, input_example=input_example)

        results[name] = acc
        print(f"{name}: Accuracy = {acc:.4f}")

print("\n✅ Experiment tracking complete. Run `mlflow ui` to view results.")


Logistic Regression: Accuracy = 0.7350
🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/125197709437508121/runs/c7ba49aafeb349cd9ed1212193e82322
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/125197709437508121
Decision Tree: Accuracy = 0.6800
🏃 View run Decision Tree at: http://127.0.0.1:5000/#/experiments/125197709437508121/runs/55b2a32fbea548129e6719e2055f73bf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/125197709437508121
CatBoost: Accuracy = 0.7800
🏃 View run CatBoost at: http://127.0.0.1:5000/#/experiments/125197709437508121/runs/7b2509af2181454fa4b28c567496df4d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/125197709437508121

✅ Experiment tracking complete. Run `mlflow ui` to view results.


In [23]:
mlflow.get_tracking_uri()

'http://127.0.0.1:5000'

# mlflow Registry

In [24]:
mlflow.set_experiment("registry_1")

2025/02/10 17:58:35 INFO mlflow.tracking.fluent: Experiment with name 'registry_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/chen/Desktop/lectures/10x-ML-engineer/mlflow/mlruns/578789159822799541', creation_time=1739203115541, experiment_id='578789159822799541', last_update_time=1739203115541, lifecycle_stage='active', name='registry_1', tags={}>

In [25]:
mdl = DecisionTreeClassifier()
mdl.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [26]:
acc = accuracy_score(y_test, y_pred)
print(acc)

0.78


In [None]:
model = mdl

In [29]:
with mlflow.start_run():
    # Log model parameters (optional)
    mlflow.log_param("model_type", "decision tree")

    # Log the trained model
    model_uri = "decision tree"
    mlflow.sklearn.log_model(model, model_uri, input_example=input_example)

    # Register the model in the MLflow Model Registry
    result = mlflow.register_model(f"runs:/{mlflow.active_run().info.run_id}/{model_uri}", "decision_tree")

print(f"Model registered as: {result.name}, version: {result.version}")


Successfully registered model 'decision_tree'.
2025/02/10 18:02:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree, version 1


🏃 View run gaudy-quail-824 at: http://127.0.0.1:5000/#/experiments/578789159822799541/runs/e80775f90a0d4c95a70266637c579046
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/578789159822799541
Model registered as: decision_tree, version: 1


Created version '1' of model 'decision_tree'.


In [None]:
mlflow.get_tracking_uri()

In [30]:
client = MlflowClient()

In [34]:
model_versions = client.get_latest_versions("decision_tree")
for v in model_versions:
    print(f"Version {v.version} - {v.current_stage}")

Version 1 - None


  model_versions = client.get_latest_versions("decision_tree")


In [48]:
mdl = LogisticRegression()
mdl.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [49]:
mdl.predict(input_example)

array([2], dtype=int32)

In [46]:
url = "http://localhost:5001/invocations"

data = json.dumps({"instances": input_example.tolist()})

headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=data)

print(response.json())

{'predictions': [2]}
