In [12]:
import kfp
from kfp.v2 import dsl
from kfp.v2.dsl import (Dataset, Input, Model, Output)
from kfp.v2.compiler import Compiler

@dsl.component(
    base_image='python:3.8',
    packages_to_install=['pandas', 'minio']
)
def load_data_from_minio(output_data: Output[Dataset]):
    from minio import Minio
    import pandas as pd
    
    client = Minio(
        "10.96.204.210:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    
    # Download the dataset
    client.fget_object("penguin-dataset", "penguins_size.csv", "/tmp/penguins_size.csv")
    
    data = pd.read_csv('/tmp/penguins_size.csv')
    data.to_csv(output_data.path, index=False)

@dsl.component(
    base_image='python:3.8',
    packages_to_install=['pandas', 'scikit-learn']
)
def preprocess_data(input_data: Input[Dataset], output_train_data: Output[Dataset], output_test_data: Output[Dataset]):
    import pandas as pd
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import LabelEncoder
    from sklearn.model_selection import train_test_split
    
    data = pd.read_csv(input_data.path)
    
    # Handling missing values
    imputer = SimpleImputer(strategy='most_frequent')
    data.iloc[:, :] = imputer.fit_transform(data)
    
    # Encoding categorical data
    lb = LabelEncoder()
    data["sex"] = lb.fit_transform(data["sex"])
    
    # Split the data into training and test sets
    train_data, test_data = train_test_split(data, test_size=0.9, random_state=42)
    
    train_data.to_csv(output_train_data.path, index=False)
    test_data.to_csv(output_test_data.path, index=False)

@dsl.component(
    base_image='python:3.8',
    packages_to_install=['pandas', 'tensorflow', 'scikit-learn', 'joblib', 'minio']
)
def train_model(input_train_data: Input[Dataset], model: Output[Model], model_path: str):
    import pandas as pd
    from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
    from sklearn.compose import ColumnTransformer
    import tensorflow as tf
    from minio import Minio
    import os

    # Initialize MinIO client
    client = Minio(
        "10.96.204.210:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )

    bucket_name = "mlpipeline"
    found = client.bucket_exists(bucket_name)
    if not found:
        client.make_bucket(bucket_name)

    train_data = pd.read_csv(input_train_data.path)
    X_train = train_data.drop('species', axis=1)  # Assuming 'species' is the target
    y_train = train_data['species']
    
    # Define which columns are categorical and which are numerical
    categorical_features = X_train.select_dtypes(include=['object']).columns
    numerical_features = X_train.select_dtypes(include=['number']).columns
    
    # Create a column transformer that will apply OneHotEncoder to categorical columns
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(), categorical_features)
        ]
    )
    
    # Preprocess the features
    X_train = preprocessor.fit_transform(X_train)
    
    # Encode the labels
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    
    # Define a simple neural network model
    model_keras = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')  # Assuming 3 classes for species
    ])
    
    model_keras.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Train the model
    model_keras.fit(X_train, y_train, epochs=1)
    
    # Save the model locally
    #local_model_path = "/tmp/saved_model"
    model_keras.save(model.path)
    # Upload the model to MinIO
    #for root, _, files in os.walk(local_model_path):
        #for file in files:
            #local_file_path = os.path.join(root, file)
            #remote_file_path = os.path.relpath(local_file_path, local_model_path)
            #client.fput_object(bucket_name, f"{model_path}/{remote_file_path}", local_file_path)
    model.uri = model.uri+"/1/"
    print("MAtni: " + model.uri)
    
@dsl.component(
    base_image='python:3.8',
    packages_to_install=['kserve', 'kubernetes', 'tensorflow']
)
def model_serving(model_trained: Input[Model]):
    """
    Create kserve instance
    """
    from kubernetes import client
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TFServingSpec
    from datetime import datetime
    import time
    
    # Get model uri
    uri = model_trained.uri
    # Replace minio with s3
    print(f"matni: {uri}")
    uri = uri.replace("minio", "s3")
    # Removing last subfolder /1/
    uri = uri.rsplit("/", 2)[0]
    print(uri)
    
    # Inference server config
    namespace = utils.get_default_target_namespace()
    name = "penguin-classifier"
    kserve_version = 'v1beta1'
    api_version = constants.KSERVE_GROUP + '/' + kserve_version

    isvc = V1beta1InferenceService(api_version=api_version,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=name, namespace=namespace, annotations={'sidecar.istio.io/inject': 'false'}),
                                   spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       service_account_name="sa-minio-kserve",
                                       tensorflow=V1beta1TFServingSpec(
                                           storage_uri=uri)
                                   )
                               )
    )

    kserve = KServeClient()
    
    # Replace old inference service with a new one
    try:
        kserve.delete(name=name, namespace=namespace)
        print("Previous model deleted")
    except:
        print("Cannot delete previous model")
    time.sleep(10)
    
    kserve.create(isvc)

@dsl.pipeline(
    name='penguin_classification_pipeline',
    description='A pipeline to classify penguins using a TensorFlow model and KServe'
)
def penguin_pipeline():
    load_data_task = load_data_from_minio()
    preprocess_task = preprocess_data(input_data=load_data_task.outputs['output_data'])
    train_task = train_model(
        input_train_data=preprocess_task.outputs['output_train_data'],
        model_path='penguin-model'
    )
    serve_task = model_serving(model_trained=train_task.outputs['model'])
    
    # Ensure serve_task runs after train_task
    serve_task.after(train_task)

# Compile the pipeline
Compiler().compile(pipeline_func=penguin_pipeline, package_path='penguin_pipeline.yaml')

# Run the pipeline
client = kfp.Client()

client.create_run_from_pipeline_package(
    pipeline_file='penguin_pipeline.yaml',
    arguments={}
)


RunPipelineResult(run_id=e73d8917-73ef-4ff0-87f9-59255f9eddd3)

In [10]:
def model_serving():
    """
    Create kserve instance
    """
    from kubernetes import client
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TFServingSpec
    from datetime import datetime
    import time
    
    # Get model uri
    uri = "minio://mlpipeline/v2/artifacts/penguin-classification-pipeline/716929e6-4592-4377-b7e2-e5e4b94e1edb/train-model/model"
    # Replace minio with s3
    print(f"matni: {uri}")
    uri = uri.replace("minio", "s3")
    # Removing last subfolder /1/
    # uri = uri.rsplit("/", 2)[0]
    print(uri)
    
    # Inference server config
    namespace = utils.get_default_target_namespace()
    name = "penguin-classifier"
    kserve_version = 'v1beta1'
    api_version = constants.KSERVE_GROUP + '/' + kserve_version

    isvc = V1beta1InferenceService(api_version=api_version,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=name, namespace=namespace, annotations={'sidecar.istio.io/inject': 'false'}),
                                   spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       service_account_name="sa-minio-kserve",
                                       tensorflow=V1beta1TFServingSpec(
                                           storage_uri=uri)
                                   )
                               )
    )

    kserve = KServeClient()
    
    # Replace old inference service with a new one
    try:
        kserve.delete(name=name, namespace=namespace)
        print("Previous model deleted")
    except:
        print("Cannot delete previous model")
    time.sleep(10)
    
    kserve.create(isvc)
    
model_serving()
    

matni: minio://mlpipeline/v2/artifacts/penguin-classification-pipeline/716929e6-4592-4377-b7e2-e5e4b94e1edb/train-model/model
s3://mlpipeline/v2/artifacts/penguin-classification-pipeline/716929e6-4592-4377-b7e2-e5e4b94e1edb/train-model/model
Previous model deleted
