In [None]:

import kfp
from kfp import dsl, kubernetes
from kfp.dsl import Dataset, Input, Output, Model
from typing import NamedTuple

# Step 1: 加载、构建、训练模型.
@dsl.component(
    base_image="ghcr.io/kubeflow/kubeflow/notebook-servers/jupyter-tensorflow-full:v1.10.0",
    packages_to_install=["pandas", "minio", "numpy"]
)
def train_model():
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from tensorflow import keras
    import tensorflow as tf
    import os
    import minio
    import glob
    from minio import Minio

    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
    # check shape of the data

    print(f"x_train shape: {x_train.shape}")
    print(f"y_train shape: {y_train.shape}")

    print(f"x_test shape: {x_test.shape}")
    print(f"y_test shape: {y_test.shape}")
    # visualize single data instances

    img_no = 0 #change the number to display other examples

    first_number = x_train[img_no]
    plt.imshow(first_number, cmap='gray') # visualize the numbers in gray mode
    plt.show()
    print(f"correct number: {y_train[img_no]}")
    # reshaping the data
    # reshaping pixels in a 28x28px image with greyscale, canal = 1. This is needed for the Keras API
    x_train = x_train.reshape(-1,28,28,1)
    x_test = x_test.reshape(-1,28,28,1)

    # normalizing the data
    # each pixel has a value between 0-255. Here we divide by 255, to get values from 0-1
    x_train = x_train / 255
    x_test = x_test / 255

    print(f"train X shape: {x_train.shape}")
    print(f"test X shape: {x_test.shape}")
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)))
    model.add(keras.layers.MaxPool2D(2, 2))

    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPool2D(2, 2))

    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPool2D(2, 2))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))

    model.add(keras.layers.Dense(32, activation='relu'))

    model.add(keras.layers.Dense(10, activation='softmax')) #output are 10 classes, numbers from 0-9

    #show model summary - how it looks
    model.summary()
    #compile the model - we want to have a multiple outcome
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])
    #fit the model and return the history while training
    history = model.fit(
        x=x_train,
        y=y_train,
        epochs=1
    )
    print("======训练结束=========")
    # 检查并创建目录 (exist_ok=True 表示如果文件夹已存在则不报错)
    save_path = "/tmp/models/detect-digits.keras"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    keras.models.save_model(model, save_path)


    # 1. 初始化 MinIO 客户端 (保持你的配置)
    minio_client = Minio(
        "minio-service.kubeflow:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    minio_bucket = "mlpipeline"

    def smart_upload_to_minio(local_path, bucket_name, minio_base_path):
        """
        智能上传函数：自动识别文件或文件夹并上传到 MinIO
        """
        if not os.path.exists(local_path):
            print(f"错误: 本地路径 {local_path} 不存在")
            return

        if os.path.isfile(local_path):
            # 如果是单个文件 (针对 Keras 3 的 .keras 文件)
            file_name = os.path.basename(local_path)
            remote_path = os.path.join(minio_base_path, file_name).replace(os.sep, '/')
            minio_client.fput_object(bucket_name, remote_path, local_path)
            print(f"成功上传文件: {local_path} -> {remote_path}")

        elif os.path.isdir(local_path):
            # 如果是文件夹 (针对旧版 SavedModel 格式)
            for root, dirs, files in os.walk(local_path):
                for file in files:
                    local_file = os.path.join(root, file)
                    # 计算相对路径，保持文件夹结构
                    rel_path = os.path.relpath(local_file, local_path)
                    remote_path = os.path.join(minio_base_path, rel_path).replace(os.sep, '/')

                    minio_client.fput_object(bucket_name, remote_path, local_file)
                    print(f"成功上传文件夹内文件: {local_file} -> {remote_path}")


    smart_upload_to_minio(
        local_path=save_path,
        bucket_name=minio_bucket,
        minio_base_path="models"  # 在 MinIO 里存放在 models 目录下
    )
    # 最终路径 - s3://mlpipeline/models/detect-digits.keras
    print("s3://mlpipeline/models/detect-digits.keras")
    

In [None]:

@dsl.component(
    base_image="ghcr.io/kubeflow/kubeflow/notebook-servers/jupyter-tensorflow-full:v1.10.0",
    packages_to_install=["minio", "kserve==0.11.0", "kubernetes"]
)
def deploy_model_to_kserve():
    def deploy_model_to_kserve_func(
            model: object,
            service_name: str,
            namespace: str
    ):
        from kserve import KServeClient
        from kserve import V1beta1InferenceService
        from kserve import V1beta1InferenceServiceSpec
        from kserve import V1beta1PredictorSpec
        from kserve import V1beta1SKLearnSpec
        from kubernetes import client as k8s_client

        # 手动配置 Token 和 Host
        configuration = k8s_client.Configuration()
        configuration.host = "http://kserve-controller-manager-service.kubeflow:8443" # 或者是 API Server 地址
        configuration.verify_ssl = False # 生产环境建议开启并配置证书
        configuration.api_key = {"authorization": "Bearer " + "ACCESS_TOKEN"}
        # 创建 KServe 客户端
        kserve_client = KServeClient(client_configuration=configuration)

        # 定义 InferenceService 结构
        # 注意：对于 Scikit-Learn 模型，KServe 需要存储路径包含 joblib/pickle 文件
        isvc = V1beta1InferenceService(
            api_version="serving.kserve.io/v1beta1",
            kind="InferenceService",
            metadata=k8s_client.V1ObjectMeta(
                name=service_name,
                namespace=namespace,
                annotations={'sidecar.istio.io/inject': 'false'}
            ),
            spec=V1beta1InferenceServiceSpec(
                predictor=V1beta1PredictorSpec(
                    sklearn=V1beta1SKLearnSpec(
                        # model.uri 会自动转换为 s3:// 或 gs:// 路径
                        storage_uri=model.uri
                    )
                )
            )
        )

        # 执行部署 (如果已存在则 patch，不存在则 create)
        try:
            kserve_client.create(isvc)
            print(f"Service {service_name} created.")
        except:
            kserve_client.patch(service_name, isvc)
            print(f"Service {service_name} updated.")

    from types import SimpleNamespace
    server=deploy_model_to_kserve_func(
        model=SimpleNamespace(uri="s3://mlpipeline/models/detect-digits.keras"),
        # model=SimpleNamespace(uri="models/detect-digits.keras"),
        service_name="detect-digits-2",
        namespace="kubeflow-user-example-com"
    )
    print(f"Service detect-digits created.{server}")
    

In [None]:


import kfp
from kfp import dsl

@dsl.pipeline(
    name="detect_digits_pipeline",
    description="detect digits pipeline demo"
)
def detect_digits_pipeline():
    # 1. 运行训练任务
    train_task = train_model()
    train_task.set_cpu_limit('1').set_memory_limit('4G')
    # GPU资源
    train_task.set_accelerator_limit(1)
    train_task.add_node_selector_constraint('nvidia.com/gpu')
    kubernetes.add_node_selector(
        train_task,
        label_key='nvidia.com/gpu.product',
        label_value='NVIDIA-GeForce-RTX-3090',
    )

    deploy_task = deploy_model_to_kserve()
    deploy_task.after(train_task) 
    

In [None]:


# Step 5: Compile
from kfp import compiler

compiler.Compiler().compile(detect_digits_pipeline, 'detect_digits_pipeline.yaml')

# Step 6: Run
from kfp.client import Client

client = Client(host='http://ml-pipeline.kubeflow:8888',
                namespace="kubeflow-user-example-com",
                existing_token="ACCESS_TOKEN",
                verify_ssl=False)
run = client.create_run_from_pipeline_package(
    'detect_digits_pipeline.yaml',
    enable_caching=False,  # 依然有效
    experiment_name='detect_digits_pipeline-test-experiment'
)
