In [19]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.datasets import load_iris
import pandas as pd

# Set MLflow tracking server
mlflow.set_tracking_uri("http://localhost:8080")
mlflow.set_experiment("iris_classification")




<Experiment: artifact_location='mlflow-artifacts:/841751806059865456', creation_time=1744756879890, experiment_id='841751806059865456', last_update_time=1744756879890, lifecycle_stage='active', name='iris_classification', tags={}>

In [20]:
# Load and split data
X, y = load_iris(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)



In [21]:
models = {
    "RandomForest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}



In [25]:
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import pandas as pd

# Example input
input_example = pd.DataFrame({
    "sepal length (cm)": [5.1],
    "sepal width (cm)": [3.5],
    "petal length (cm)": [1.4],
    "petal width (cm)": [0.2]
})

# Iterate over models and log them with MLflow
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Train the model
        model.fit(X_train, y_train)
        
        # Make predictions
        preds = model.predict(X_test)
        
        # Calculate metrics
        acc = accuracy_score(y_test, preds)
        f1 = f1_score(y_test, preds, average="macro")
        
        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("f1_score", f1)
        
        # Infer model signature after training
        signature = infer_signature(X_train, model.predict(X_train))
        
        # Log the model with signature and input example
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            registered_model_name="iris_model",
            signature=signature,
            input_example=input_example,
            pip_requirements=[
                "scikit-learn",
                "xgboost",
                "pandas",
                "mlflow",
                "fastapi",
                "evidently"
            ]
        )
        
        # Print metrics
        print(f"{name} - Accuracy: {acc:.4f}, F1: {f1:.4f}")


Registered model 'iris_model' already exists. Creating a new version of this model...
2025/04/15 20:21:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_model, version 9
Created version '9' of model 'iris_model'.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


RandomForest - Accuracy: 1.0000, F1: 1.0000
🏃 View run RandomForest at: http://localhost:8080/#/experiments/841751806059865456/runs/3cbe421323f54f44959cd4169df209b6
🧪 View experiment at: http://localhost:8080/#/experiments/841751806059865456


Registered model 'iris_model' already exists. Creating a new version of this model...
2025/04/15 20:21:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_model, version 10


XGBoost - Accuracy: 1.0000, F1: 1.0000
🏃 View run XGBoost at: http://localhost:8080/#/experiments/841751806059865456/runs/19a032fae0324da9b73ec51b9b8e703e
🧪 View experiment at: http://localhost:8080/#/experiments/841751806059865456


Created version '10' of model 'iris_model'.


In [27]:
from mlflow.sklearn import load_model

# Load the model
model_uri = "models:/iris_model/latest"
model = load_model(model_uri)

# Test inference
test_input = pd.DataFrame({
    "sepal length (cm)": [5.1],
    "sepal width (cm)": [3.5],
    "petal length (cm)": [1.4],
    "petal width (cm)": [0.2]
})
predictions = model.predict(test_input)
print(predictions)

Downloading artifacts:  29%|██▊       | 2/7 [00:00<00:00, 89.46it/s]

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 98.06it/s] 

[0]





In [32]:
import requests
import pandas as pd

# Create input data
data = pd.DataFrame(
    [[5.1, 3.5, 1.4, 0.2]],
    columns=["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]
)


# Prediction endpoint
url = "http://127.0.0.1:12345/invocations"

# Send the request using dataframe_split format
response = requests.post(
    url,
    headers={"Content-Type": "application/json"},
    json={
        "dataframe_split": {
            "columns": data.columns.tolist(),
            "data": data.values.tolist()
        }
    }
)

# Response
raw_prediction = response.json()

# Assuming the prediction is in a key like 'predictions' or similar
# Adjust this based on the actual structure of the response
predicted_class = raw_prediction[0] if isinstance(raw_prediction, list) else raw_prediction.get("predictions", [])[0]

# Map class to label
label_map = {0: "setosa", 1: "versicolor", 2: "virginica"}
label = label_map.get(predicted_class, "unknown")

print(f"Prediction: {label}")

Prediction: setosa
