In [1]:
! pip install mlflow  kfp-kubernetes boto3

Collecting mlflow
  Using cached mlflow-2.18.0-py3-none-any.whl.metadata (29 kB)
Collecting kfp-kubernetes
  Using cached kfp_kubernetes-1.4.0-py3-none-any.whl
Collecting boto3
  Downloading boto3-1.35.77-py3-none-any.whl.metadata (6.7 kB)
Collecting mlflow-skinny==2.18.0 (from mlflow)
  Using cached mlflow_skinny-2.18.0-py3-none-any.whl.metadata (30 kB)
Collecting Flask<4 (from mlflow)
  Using cached flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Using cached alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Using cached docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Using cached graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting pyarrow<19,>=4.0.0 (from mlflow)
  Using cached pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting sqlalchemy<3,>=1.4.0 (from mlflow)
  Using cached SQLAlchemy-2.0.36-cp311-cp311-manylinu

In [60]:
from kfp import dsl, compiler
from kfp.dsl import InputPath, OutputPath, Output
import os

# First two components remain the same
@dsl.component(base_image="python:3.9", packages_to_install=["kagglehub", "pandas", "scikit-learn"])
def download_and_split_data(output_train: OutputPath(), output_val: OutputPath(), output_test: OutputPath()):
    import kagglehub
    import pandas as pd
    from sklearn.model_selection import train_test_split
   
    path = kagglehub.dataset_download("nelgiriyewithana/credit-card-fraud-detection-dataset-2023")
    df = pd.read_csv(f"{path}/creditcard_2023.csv")
   
    train_df, temp_df = train_test_split(df, train_size=0.6, random_state=42)
    val_df, test_df = train_test_split(temp_df, train_size=0.5, random_state=42)
   
    train_df.to_csv(output_train, index=False)
    val_df.to_csv(output_val, index=False)
    test_df.to_csv(output_test, index=False)

@dsl.component(
    base_image="python:3.9",
    packages_to_install=["tensorflow", "scikit-learn", "pandas", "onnx", "onnxruntime", "tf2onnx", "mlflow", "boto3"],
)
def train_model(
    train_data: InputPath(), 
    val_data: InputPath(), 
    model_output: OutputPath(),
    mlflow_tracking_uri: str,
    minio_endpoint: str,
    aws_access_key_id: str,
    aws_secret_access_key: str,
):
    import pandas as pd
    import tensorflow as tf
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    import tf2onnx
    import onnx
    import mlflow
    import os
    
    # Set MLflow tracking URI
    mlflow.set_tracking_uri(mlflow_tracking_uri)
    
    # Configure MLflow for MinIO
    os.environ['MLFLOW_S3_ENDPOINT_URL'] = minio_endpoint
    os.environ['AWS_ACCESS_KEY_ID'] = aws_access_key_id
    os.environ['AWS_SECRET_ACCESS_KEY'] = aws_secret_access_key
    
    # Start MLflow run
    with mlflow.start_run(run_name="fraud_detection_training") as run:
        df_train = pd.read_csv(train_data)
        df_val = pd.read_csv(val_data)
        
        X_train = df_train.drop(['Class', 'id'], axis=1)
        y_train = df_train['Class']
        X_val = df_val.drop(['Class', 'id'], axis=1)
        y_val = df_val['Class']
        
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)
        
        # Create a sample input for model signature
        input_example = X_train[0:1]
        
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(29,)),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])
        
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        
        class_weights = {0: 1, 1: len(y_train[y_train==0])/len(y_train[y_train==1])}
        
        history = model.fit(
            X_train, y_train,
            epochs=3,
            validation_data=(X_val, y_val),
            class_weight=class_weights
        )
        
        # Log parameters and metrics
        mlflow.log_param("epochs", 3)
        mlflow.log_param("batch_size", 32)
        mlflow.log_param("optimizer", "adam")
        
        for epoch, metrics in enumerate(history.history['val_accuracy']):
            mlflow.log_metric("val_accuracy", metrics, step=epoch)
            mlflow.log_metric("train_accuracy", history.history['accuracy'][epoch], step=epoch)
            mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)
            mlflow.log_metric("train_loss", history.history['loss'][epoch], step=epoch)
        
        # Log model with input example
        mlflow.tensorflow.log_model(
            model,
            "model",
            registered_model_name="fraud_detection_model",
            input_example=input_example  # Add input example for signature
        )
        # Log model with input example
        mlflow.tensorflow.log_model(
            model,
            "model",
            registered_model_name="fraud_detection_model",
            input_example=input_example
        )
        

@dsl.component(
    base_image="python:3.9",
    packages_to_install=[
        "mlflow==2.8.1",
        "kserve==0.14.0",
        "kubernetes==25.3.0",
        "urllib3==1.26.18"
    ]
)
def deploy_model_kserve(
    mlflow_tracking_uri: str,
    minio_endpoint: str,
    aws_access_key_id: str,
    aws_secret_access_key: str,
    namespace: str = "admin"
) -> str:
    import mlflow
    import os
    from kubernetes import client as k8s_client, config
    from kserve import KServeClient
    from kserve import V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1ModelSpec, V1beta1ModelFormat
    import time

    # Initialize Kubernetes and KServe clients
    config.load_incluster_config()
    kserve_client = KServeClient()

    # Get the latest model version
    mlflow.set_tracking_uri(mlflow_tracking_uri)
    mlflow_client = mlflow.MlflowClient()
    model_name = "fraud_detection_model"
    versions = mlflow_client.search_model_versions(f"name='{model_name}'")
    latest_version = sorted(versions, key=lambda x: x.creation_timestamp, reverse=True)[0]

    # Create service name
    service_name = f"fraud-detection-{latest_version.version}"

    # Create InferenceService
    isvc = V1beta1InferenceService(
        api_version='serving.kserve.io/v1beta1',
        kind='InferenceService',
        metadata=k8s_client.V1ObjectMeta(
            name=service_name,
            namespace=namespace,
            annotations={
                "serving.kubeflow.org/s3-endpoint": minio_endpoint,
                "serving.kubeflow.org/s3-usehttps": "0"
            }
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=V1beta1PredictorSpec(
                service_account_name='kserve-service-credentials',
                model=V1beta1ModelSpec(
                    model_format=V1beta1ModelFormat(name="tensorflow"),
                    storage_uri=f"s3://mlflow/0/{latest_version.run_id}/artifacts/model",
                    resources=k8s_client.V1ResourceRequirements(
                        requests={"cpu": "100m", "memory": "1Gi"},
                        limits={"cpu": "1", "memory": "2Gi"}
                    )
                )
            )
        )
    )

    try:
        # Delete existing service if it exists
        try:
            kserve_client.delete(name=service_name, namespace=namespace)
            print(f"Deleted existing service {service_name}")
            time.sleep(10)
        except Exception as e:
            print(f"Error deleting service (may not exist): {str(e)}")

        # Create new service
        response = kserve_client.create(isvc)
        print(f"Created new service {service_name}")
        print(f"Response: {response}")

        # Wait for the service to be ready
        kserve_client.wait_isvc_ready(
            name=service_name,
            namespace=namespace,
            timeout_seconds=300
        )

        # Get the URL
        service = kserve_client.get_inference_service(
            name=service_name,
            namespace=namespace
        )

        return f"Model deployed at: {service.get('status', {}).get('url', 'URL not available')}"

    except Exception as e:
        print(f"Full error details: {str(e)}")
        raise e


@dsl.pipeline(name='fraud-detection-pipeline')
def fraud_detection_pipeline(accuracy_threshold: float = 0.9):
    # Get environment variables
    mlflow_tracking_uri = os.getenv('MLFLOW_TRACKING_URI')
    minio_endpoint = os.getenv('MLFLOW_S3_ENDPOINT_URL')
    aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
    aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
    
    data_op = download_and_split_data()
    
    train_op = train_model(
        train_data=data_op.outputs['output_train'],
        val_data=data_op.outputs['output_val'],
        mlflow_tracking_uri=mlflow_tracking_uri,
        minio_endpoint=minio_endpoint,
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key
    )
    
    deploy_op = deploy_model_kserve(
        mlflow_tracking_uri=mlflow_tracking_uri,
        minio_endpoint=minio_endpoint,
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key
    )
    

if __name__ == '__main__':
    compiler.Compiler().compile(
        pipeline_func=fraud_detection_pipeline,
        package_path='fraud_detection_pipeline.yaml'
    )

In [59]:
import kfp
client = kfp.Client()
client.create_run_from_pipeline_package(
    'fraud_detection_pipeline.yaml',
    arguments={},
    experiment_name='Fraud Detection'
)

RunPipelineResult(run_id=59915719-bae9-4568-b248-b1b27be8587d)