# End-to-End UAT Test: Wine Quality Predictor

This notebook demonstrates a complete end-to-end machine learning pipeline using Kubeflow, MLflow, and KServe. The pipeline covers the following steps:
1. **Data Ingestion**: Downloading a wine quality dataset from a public URL.
2. **Data Preprocessing**: Cleaning and transforming the dataset into a format suitable for model training.
3. **Model Training**: Training an ElasticNet regression model to predict wine quality, with automatic logging of model artifacts to MLflow.
4. **Model Deployment**: Deploying the trained model as a scalable inference service using KServe.
5. **Model Inference**: Making predictions on new data using the deployed model and verifying the end-to-end functionality.
6. **Cleanup**: Removing the deployed inference service after the test is completed to free up resources.

This UAT test serves as a demonstration of the seamless integration of Kubeflow Pipelines with MLflow for model management and KServe for model deployment, along with proper resource management by cleaning up the deployed services.

In [None]:
!pip install -r requirements.txt

In [None]:
import kfp
import mlflow
import os
import requests

from kfp.dsl import Input, Model, component
from kfp.dsl import InputPath, OutputPath, pipeline, component
from kserve import KServeClient
from mlflow.tracking import MlflowClient
from tenacity import retry, stop_after_attempt, wait_exponential

In [None]:
HTTP_PROXY = HTTPS_PROXY = NO_PROXY = None

if os.environ.get("HTTP_PROXY") and os.environ.get("HTTPS_PROXY") and os.environ.get("NO_PROXY"):
    HTTP_PROXY = os.environ["HTTP_PROXY"]
    HTTPS_PROXY = os.environ["HTTPS_PROXY"]
    # add `.kubeflow` to NO_PROXY needed for pipelines
    NO_PROXY = os.environ["NO_PROXY"]


def add_proxy(obj, http_proxy=HTTP_PROXY, https_proxy=HTTPS_PROXY, no_proxy=NO_PROXY):
    """Adds the proxy env vars to the PipelineTask object."""
    return (
        obj.set_env_variable(name="http_proxy", value=http_proxy)
        .set_env_variable(name="https_proxy", value=https_proxy)
        .set_env_variable(name="HTTP_PROXY", value=http_proxy)
        .set_env_variable(name="HTTPS_PROXY", value=https_proxy)
        .set_env_variable(name="no_proxy", value=no_proxy)
        .set_env_variable(name="NO_PROXY", value=no_proxy)
    )


def proxy_envs_set() -> bool:
    if HTTP_PROXY and HTTPS_PROXY and NO_PROXY:
        return True
    return False

In [None]:
# Define a constant for the Inference Service name
ISVC_NAME = "wine-regressor3"
MLFLOW_RUN_NAME = "elastic_net_models"
MLFLOW_MODEL_NAME = "wine-elasticnet"


@component(
    base_image="python:3.11",  # Use Python 3.11 base image
    packages_to_install=["requests==2.32.5", "pandas==2.3.3"],
)
def download_dataset(url: str, dataset_path: OutputPath("Dataset")) -> None:
    import requests
    import pandas as pd

    # Download the dataset from the provided URL
    response = requests.get(url)
    response.raise_for_status()

    # Convert the response content to a Pandas DataFrame
    from io import StringIO

    dataset = pd.read_csv(StringIO(response.text), header=0, sep=";")

    # Save the DataFrame to a CSV file at the specified output path
    dataset.to_csv(dataset_path, index=False)


@component(
    base_image="python:3.11",  # Use Python 3.11 base image
    packages_to_install=["pandas==2.3.3", "pyarrow==19.0.1"],
)
def preprocess_dataset(dataset: InputPath("Dataset"), output_file: OutputPath("Dataset")) -> None:
    import pandas as pd

    # Read the CSV file into a DataFrame
    df = pd.read_csv(dataset, header=0)

    # Preprocess the DataFrame by standardizing column names
    df.columns = [c.lower().replace(" ", "_") for c in df.columns]

    # Save the preprocessed DataFrame as a Parquet file
    df.to_parquet(output_file)


@component(
    base_image="python:3.11",  # Use Python 3.11 base image
    packages_to_install=[
        "pandas==2.3.3",
        "scikit-learn==1.8.0",
        "mlflow==2.22.4",
        "pyarrow==19.0.1",
        "boto3==1.42.37",
    ],
)
def train_model(dataset: InputPath("Dataset"), run_name: str, model_name: str) -> str:
    import os
    import mlflow
    import pandas as pd
    from sklearn.linear_model import ElasticNet
    from sklearn.model_selection import train_test_split

    # Load the preprocessed dataset
    df = pd.read_parquet(dataset)

    # Define the target column for prediction
    target_column = "quality"

    # Split the data into training and testing sets
    train_x, test_x, train_y, test_y = train_test_split(
        df.drop(columns=[target_column]),
        df[target_column],
        test_size=0.25,
        random_state=42,
        stratify=df[target_column],
    )

    # Enable MLflow auto logging for scikit-learn models
    mlflow.sklearn.autolog()

    # Start an MLflow run and train the model
    with mlflow.start_run(run_name=run_name) as run:
        mlflow.set_tag("author", "kf-testing")
        lr = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)
        lr.fit(train_x, train_y)
        mlflow.sklearn.log_model(lr, "model", registered_model_name=model_name)

        # Return the model artifact URI as a string
        model_uri = f"{run.info.artifact_uri}/model"
        print(model_uri)
        return model_uri


@component(
    base_image="python:3.11",  # Use Python 3.11 base image
    packages_to_install=["kserve==0.15.2", "kubernetes==30.1.0", "tenacity==9.1.2"],
)
def deploy_model_with_kserve(model_uri: str, isvc_name: str) -> str:
    from kubernetes.client import V1ObjectMeta
    from kserve import (
        constants,
        KServeClient,
        V1beta1InferenceService,
        V1beta1InferenceServiceSpec,
        V1beta1PredictorSpec,
        V1beta1SKLearnSpec,
    )
    from tenacity import retry, wait_exponential, stop_after_attempt

    # Initialize the Inference Service specification
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND_INFERENCESERVICE,
        metadata=V1ObjectMeta(
            name=isvc_name,
            annotations={"sidecar.istio.io/inject": "false"},
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=V1beta1PredictorSpec(
                service_account_name="kserve-controller-s3",
                sklearn=V1beta1SKLearnSpec(storage_uri=model_uri),
            )
        ),
    )

    # Deploy the Inference Service using KServe
    client = KServeClient()
    client.create(isvc)

    # Retry logic to ensure the Inference Service is ready
    @retry(
        wait=wait_exponential(multiplier=2, min=1, max=10),
        stop=stop_after_attempt(30),
        reraise=True,
    )
    def assert_isvc_created(client, isvc_name):
        assert client.is_isvc_ready(isvc_name), f"Failed to create Inference Service {isvc_name}."

    # Wait until the service is ready and get the service URL
    assert_isvc_created(client, isvc_name)
    isvc_resp = client.get(isvc_name)
    isvc_url = isvc_resp["status"]["address"]["url"]
    print("Inference URL:", isvc_url)

    return isvc_url


# Fetch environment variables for MLflow tracking and AWS credentials
# These are guaranteed to be present because of the mlflow's poddefault please refer to [this guide](https://documentation.ubuntu.com/charmed-mlflow/en/latest/tutorial/mlflow-kubeflow/

mlflow_tracking_uri = os.getenv("MLFLOW_TRACKING_URI")
mlflow_s3_endpoint_url = os.getenv("MLFLOW_S3_ENDPOINT_URL")
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")


@pipeline(name="download-preprocess-train-deploy-pipeline")
def download_preprocess_train_deploy_pipeline(url: str):
    # Step 1: Download the dataset from the URL
    download_task = download_dataset(url=url)

    # Step 2: Preprocess the downloaded dataset
    preprocess_task = preprocess_dataset(dataset=download_task.outputs["dataset_path"])

    # Step 3: Train the model on the preprocessed dataset
    train_task = (
        train_model(
            dataset=preprocess_task.outputs["output_file"],
            run_name=MLFLOW_RUN_NAME,
            model_name=MLFLOW_MODEL_NAME,
        )
        .set_env_variable(name="MLFLOW_TRACKING_URI", value=mlflow_tracking_uri)
        .set_env_variable(name="MLFLOW_S3_ENDPOINT_URL", value=mlflow_s3_endpoint_url)
        .set_env_variable(name="AWS_ACCESS_KEY_ID", value=aws_access_key_id)
        .set_env_variable(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_access_key)
    )

    # Step 4: Deploy the trained model with KServe
    deploy_task = deploy_model_with_kserve(
        model_uri=train_task.output, isvc_name=ISVC_NAME
    ).set_env_variable(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_access_key)


# This pipeline definition is identical to the one above with the only difference being
# that environment variables are added to each step of the pipeline, in order to enable them
# to run behind a proxy. Which pipeline is used is defined in the next cell according to if
# such environment variables are set.
@pipeline(name="download-preprocess-train-deploy-pipeline")
def download_preprocess_train_deploy_pipeline_proxy(url: str):
    # Step 1: Download the dataset from the URL
    download_task = add_proxy(download_dataset(url=url))

    # Step 2: Preprocess the downloaded dataset
    preprocess_task = add_proxy(preprocess_dataset(dataset=download_task.outputs["dataset_path"]))

    # Step 3: Train the model on the preprocessed dataset
    train_task = add_proxy(
        train_model(
            dataset=preprocess_task.outputs["output_file"],
            run_name=MLFLOW_RUN_NAME,
            model_name=MLFLOW_MODEL_NAME,
        )
        .set_env_variable(name="MLFLOW_TRACKING_URI", value=mlflow_tracking_uri)
        .set_env_variable(name="MLFLOW_S3_ENDPOINT_URL", value=mlflow_s3_endpoint_url)
        .set_env_variable(name="AWS_ACCESS_KEY_ID", value=aws_access_key_id)
        .set_env_variable(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_access_key)
    )

    # Step 4: Deploy the trained model with KServe
    deploy_task = add_proxy(
        deploy_model_with_kserve(
            model_uri=train_task.output, isvc_name=ISVC_NAME
        ).set_env_variable(name="AWS_SECRET_ACCESS_KEY", value=aws_secret_access_key)
    )

In [None]:
# Initialize a KFP client
# This client is used to interact with the Kubeflow Pipelines API.
client = kfp.Client()

# Define the URL for the dataset
# This URL points to the dataset that will be downloaded and processed in the pipeline.
url = "https://raw.githubusercontent.com/canonical/kubeflow-examples/main/e2e-wine-kfp-mlflow/winequality-red.csv"

# If proxy environment variables are set, use the `_proxy` pipeline definition.
if proxy_envs_set():
    # Compile the pipeline to a YAML file
    # This step translates the Python-based pipeline definition into a YAML file
    # that can be used to run the pipeline in Kubeflow Pipelines.
    kfp.compiler.Compiler().compile(
        download_preprocess_train_deploy_pipeline_proxy,
        "download_preprocess_train_deploy_pipeline_proxy.yaml",
    )
    # Run the pipeline
    # This command starts a new run of the compiled pipeline, passing in the dataset URL as an argument.
    # Setting enable_caching to False to overcome https://github.com/canonical/bundle-kubeflow/issues/1067
    run = client.create_run_from_pipeline_func(
        download_preprocess_train_deploy_pipeline_proxy,
        arguments={"url": url},
        enable_caching=False,
    )
else:
    # Compile the pipeline to a YAML file
    # This step translates the Python-based pipeline definition into a YAML file
    # that can be used to run the pipeline in Kubeflow Pipelines.
    kfp.compiler.Compiler().compile(
        download_preprocess_train_deploy_pipeline, "download_preprocess_train_deploy_pipeline.yaml"
    )
    # Run the pipeline
    # This command starts a new run of the compiled pipeline, passing in the dataset URL as an argument.
    # Setting enable_caching to False to overcome https://github.com/canonical/bundle-kubeflow/issues/1067
    run = client.create_run_from_pipeline_func(
        download_preprocess_train_deploy_pipeline, arguments={"url": url}, enable_caching=False
    )

In [None]:
@retry(
    wait=wait_exponential(multiplier=2, min=1, max=10),
    stop=stop_after_attempt(90),
    reraise=True,
)
def assert_kfp_run_succeeded(client, run_id):
    """Wait for the run to complete successfully."""
    run = client.get_run(run_id=run_id)
    state = run.state  # Assuming the status is directly under run object in V2beta1Run
    assert state == "SUCCEEDED", f"KFP run is in {state} state."

In [None]:
assert_kfp_run_succeeded(client, run.run_id)

In [None]:
# Initialize the KServe client
# This client is used to interact with the KServe Inference Service.
kserve_client = KServeClient()

# Retrieve the Inference Service details
# Fetches the Inference Service by name and extracts the URL for making predictions.
isvc_resp = kserve_client.get(ISVC_NAME)
inference_service_url = isvc_resp["status"]["address"]["url"]
print("Inference URL:", inference_service_url)

# Define the input data for prediction
# This data matches the expected input format of the deployed model, with each instance being a list of feature values.
input_data = {
    "instances": [
        [7.4, 0.7, 0.0, 1.9, 0.076, 11.0, 34.0, 0.9978, 3.51, 0.56, 9.4],
        [7.8, 0.88, 0.0, 2.6, 0.098, 25.0, 67.0, 0.9968, 3.2, 0.68, 9.8],
    ]
}

# Send a prediction request to the Inference Service
# This sends the input data to the model for prediction via a POST request and prints the response.
response = requests.post(f"{inference_service_url}/v1/models/{ISVC_NAME}:predict", json=input_data)
print(response.text)

## Delete Inference Service

In [None]:
kserve_client.delete(ISVC_NAME)

In [None]:
@retry(
    wait=wait_exponential(multiplier=2, min=1, max=10),
    stop=stop_after_attempt(30),
    reraise=True,
)
def assert_isvc_deleted(kserve_client, isvc_name):
    """Wait for the Inference Service to be deleted."""
    try:
        # try fetching the ISVC to verify it was deleted successfully
        isvc = kserve_client.get(isvc_name)
        assert not isvc, f"Failed to delete Inference Service {isvc_name}!"
    except RuntimeError as err:
        assert "Not Found" in str(err), f"Caught unexpected exception: {err}"

In [None]:
assert_isvc_deleted(kserve_client, ISVC_NAME)

# Delete MLflow data

In [None]:
client = MlflowClient()

# Remove a registered model
client.delete_registered_model(name=MLFLOW_MODEL_NAME)