<a href="https://colab.research.google.com/github/ever-oli/MLby22/blob/main/MLOPsPipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install mlflow fastapi uvicorn pydantic scikit-learn pandas

Collecting mlflow
  Downloading mlflow-3.10.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.10.0 (from mlflow)
  Downloading mlflow_skinny-3.10.0-py3-none-any.whl.metadata (32 kB)
Collecting mlflow-tracing==3.10.0 (from mlflow)
  Downloading mlflow_tracing-3.10.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<26 (from mlflow)
  Downloading gunicorn-25.1.0-py3-none-any.whl.metadata (5.5 kB)
Collecting huey<3,>=2.5.4 (from mlflow)
  Downloading huey-2.6.0-py3-none-any.whl.metadata (4.3 kB)
Collecting skops<1 (from mlflow)
  Downloading skops-0.13.0-py3-none-any.whl.metadata (5.6 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.10.0->mlflow)
  D

In [2]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1. Set up the MLflow Experiment
# This creates a local directory called 'mlruns' in your Colab environment
mlflow.set_experiment("Iris_Classification_Pipeline")

# 2. Load and prepare data
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Define hyperparameters
n_estimators = 50
max_depth = 5

# 4. Start the MLflow Run
with mlflow.start_run() as run:
    # Initialize and train the model
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )
    model.fit(X_train, y_train)

    # Evaluate the model
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Model Accuracy: {accuracy:.4f}")

    # --- MLOps Logging Phase ---

    # Log Parameters (The "settings" used for this run)
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)

    # Log Metrics (The "results" of this run)
    mlflow.log_metric("accuracy", accuracy)

    # Log the Model itself (Creates a versioned artifact)
    mlflow.sklearn.log_model(model, "random_forest_model")

    # Save the Run ID so our API script can find this specific model later
    run_id = run.info.run_id
    print(f"Run successfully logged to MLflow! Run ID: {run_id}")

2026/02/26 00:08:54 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/02/26 00:08:54 INFO mlflow.store.db.utils: Updating database tables
2026/02/26 00:08:57 INFO mlflow.tracking.fluent: Experiment with name 'Iris_Classification_Pipeline' does not exist. Creating a new experiment.


Model Accuracy: 1.0000
Run successfully logged to MLflow! Run ID: 1c171fe9303a41edbb6fae5c4798e7a5


In [3]:
# Double-check that our model and metrics were saved locally
logged_model_uri = f"runs:/{run_id}/random_forest_model"
print(f"Model URI to be used for deployment: {logged_model_uri}")

# Load the model back from MLflow's registry to prove it works
loaded_model = mlflow.pyfunc.load_model(logged_model_uri)
test_sample = X_test.iloc[[0]]

print("Prediction from MLflow loaded model:", loaded_model.predict(test_sample))

Model URI to be used for deployment: runs:/1c171fe9303a41edbb6fae5c4798e7a5/random_forest_model


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Prediction from MLflow loaded model: [1]


In [4]:
%%writefile app.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import mlflow.pyfunc
import pandas as pd
import os

# Initialize FastAPI app
app = FastAPI(title="Iris Classifier API", description="MLOps Model Deployment")

# Define the expected input data structure using Pydantic
class IrisFeatures(BaseModel):
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float

# Global variable to hold our loaded model
model = None

@app.on_event("startup")
def load_model():
    global model
    try:
        # In a real pipeline, you would fetch this dynamically or set it via environment variables.
        # We are hardcoding a specific Run ID path for demonstration.
        # Ensure you replace "YOUR_RUN_ID_HERE" if deploying manually!
        run_id = os.getenv("MLFLOW_RUN_ID", "YOUR_RUN_ID_HERE")
        model_uri = f"runs:/{run_id}/random_forest_model"

        print(f"Loading model from {model_uri}...")
        model = mlflow.pyfunc.load_model(model_uri)
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {e}")

@app.post("/predict")
def predict_iris(features: IrisFeatures):
    if model is None:
        raise HTTPException(status_code=500, detail="Model is not loaded.")

    # Convert incoming JSON data to a Pandas DataFrame
    input_df = pd.DataFrame([{
        "sepal length (cm)": features.sepal_length,
        "sepal width (cm)": features.sepal_width,
        "petal length (cm)": features.petal_length,
        "petal width (cm)": features.petal_width
    }])

    # Make the prediction
    prediction = model.predict(input_df)

    # Map the numeric prediction back to a human-readable label
    target_names = ['setosa', 'versicolor', 'virginica']
    predicted_class = target_names[int(prediction[0])]

    return {
        "prediction_raw": int(prediction[0]),
        "predicted_species": predicted_class
    }

@app.get("/")
def health_check():
    return {"status": "healthy", "model_loaded": model is not None}

Writing app.py
