In [None]:
import kfp
from kfp import dsl
from kfp.dsl import Dataset, Input, Output, Model
from typing import NamedTuple

# Step 1: 加载数据并保存为 Dataset Artifact
@dsl.component(
    base_image="python:3.11",
    packages_to_install=["pandas", "scikit-learn"]
)
def load_data(
        dataset_output: Output[Dataset]
):
    from sklearn.datasets import load_iris
    import pandas as pd

    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['target'] = iris.target
    df.to_csv(dataset_output.path, index=False)

# Step 2: 训练模型并输出 Model Artifact
@dsl.component(
    base_image="python:3.10",
    packages_to_install=["scikit-learn", "pandas", "joblib"]
)
def train_model(
        input_dataset: Input[Dataset],
        model_output: Output[Model]  # 新增：定义模型输出
) -> NamedTuple("Output", [("accuracy", float)]):
    import pandas as pd
    import joblib
    import os
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score

    # 读取数据
    df = pd.read_csv(input_dataset.path)
    X = df.drop(columns=['target'])
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # 训练模型
    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)

    # 保存模型到指定的 Artifact 路径
    # 强制保存为 KServe 认识的名字
    os.makedirs(model_output.path, exist_ok=True) # 确保它是目录
    # KFP 会自动处理 model_output.path 对应的云端存储位置
    joblib.dump(clf, os.path.join(model_output.path, "model.joblib"))

    # 计算准确率
    y_pred = clf.predict(X_test)
    acc = float(accuracy_score(y_test, y_pred))

    # 设置元数据（可选，能让 UI 显示更丰富的信息）
    model_output.metadata["framework"] = "scikit-learn"
    model_output.metadata["accuracy"] = acc

    print(f"Model accuracy: {acc}")
    return (acc,)

# 3. 部署model to kserve
@dsl.component(
    base_image="python:3.10",
    packages_to_install=["kserve==0.11.0", "kubernetes"]
)
def deploy_model_to_kserve(
        model: Input[Model],
        service_name: str,
        namespace: str
):
    from kserve import KServeClient
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1SKLearnSpec
    from kubernetes import client as k8s_client

    # 手动配置 Token 和 Host
    configuration = k8s_client.Configuration()
    configuration.host = "http://kserve-controller-manager-service.kubeflow:8443" # 或者是 API Server 地址
    configuration.verify_ssl = False # 生产环境建议开启并配置证书
    configuration.api_key = {"authorization": "Bearer " + "ACCESS_TOKEN"}
    # 创建 KServe 客户端
    kserve_client = KServeClient(client_configuration=configuration)

    # 定义 InferenceService 结构
    # 注意：对于 Scikit-Learn 模型，KServe 需要存储路径包含 joblib/pickle 文件
    isvc = V1beta1InferenceService(
        api_version="serving.kserve.io/v1beta1",
        kind="InferenceService",
        metadata=k8s_client.V1ObjectMeta(
            name=service_name,
            namespace=namespace,
            annotations={'sidecar.istio.io/inject': 'false'}
        ),
        spec=V1beta1InferenceServiceSpec(
            predictor=V1beta1PredictorSpec(
                sklearn=V1beta1SKLearnSpec(
                    # model.uri 会自动转换为 s3:// 或 gs:// 路径
                    storage_uri=model.uri
                )
            )
        )
    )

    # 执行部署 (如果已存在则 patch，不存在则 create)
    try:
        kserve_client.create(isvc)
        print(f"Service {service_name} created.")
    except:
        kserve_client.patch(service_name, isvc)
        print(f"Service {service_name} updated.")


# Step 4: 定义 Pipeline
@dsl.pipeline(
    name="kserver_pipeline",
    description="Full ML pipeline with Dataset and Model artifacts."
)
def kserver_pipeline():
    # 数据加载
    data_task = load_data()
    data_task.set_cpu_limit('1').set_memory_limit('2G')

    # 模型训练
    train_task = train_model(
        input_dataset=data_task.outputs["dataset_output"]
    )

    # GPU 资源配置
    train_task.set_cpu_limit('1')
    train_task.set_memory_limit('4G')
    train_task.set_accelerator_limit(1)
    train_task.add_node_selector_constraint('nvidia.com/gpu')
    # train_task.set_accelerator_type('NVIDIA-GeForce-RTX-3090')
    # 环境变量
    # train_task.set_env_variable(name='NVIDIA_VISIBLE_DEVICES', value='all')
    # train_task.set_env_variable(name='NVIDIA_DRIVER_CAPABILITIES', value='compute,utility')
    deploy_task = deploy_model_to_kserve(
        model=train_task.outputs["model_output"], # 这里的传递是关键
        service_name="iris-svc",
        namespace="kubeflow-user-example-com"
    )

# Step 5: Compile
from kfp import compiler

compiler.Compiler().compile(kserver_pipeline, 'kserver_pipeline.yaml')

# Step 6: Run
from kfp.client import Client

client = Client(host='http://ml-pipeline.kubeflow:8888',
                namespace="kubeflow-user-example-com",
                existing_token="ACCESS_TOKEN",
                verify_ssl=False)
run = client.create_run_from_pipeline_package(
    'kserver_pipeline.yaml',
    enable_caching=False,  # 依然有效
    experiment_name='kserver_pipeline-test-experiment'
)
