In [1]:
 pip install "kfp>=2" pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
# iris_regression_pipeline.py
#
# Works with: kfp >= 2.0
# $ pip install "kfp>=2" pandas scikit-learn

from typing import Annotated
from kfp import dsl, compiler


# ──────────────────── Components ──────────────────── #

@dsl.component(
    base_image='python:3.9',
    packages_to_install=['pandas', 'scikit-learn']
)
def load_iris_data() -> str:
    """Loads the Iris dataset and returns it as a CSV string."""
    from sklearn.datasets import load_iris
    import pandas as pd
    iris = load_iris()
    X = pd.DataFrame(iris.data, columns=iris.feature_names)
    y = pd.Series(iris.target, name="target")
    csv_str = pd.concat([X, y], axis=1).to_csv(index=False)
    return csv_str


@dsl.component(
    base_image='python:3.9',
    packages_to_install=['pandas', 'scikit-learn']
)
def train_model(
    csv_data: str,
) -> str:
    """Splits data, trains a LinearRegression model, returns metrics in JSON."""
    import pandas as pd
    from io import StringIO
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error
    import json
    

    df = pd.read_csv(StringIO(csv_data))
    X = df.drop(columns=["target"])
    y = df["target"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model = LinearRegression()
    model.fit(X_train, y_train)

    mse = mean_squared_error(y_test, model.predict(X_test))

    return json.dumps(
        {
            "mse": mse,
            "coefficients": model.coef_.tolist(),
            "intercept": model.intercept_,
        }
    )


# ──────────────────── Pipeline ──────────────────── #

@dsl.pipeline(
    name="Iris‑Regression‑Pipeline",
    description="Loads Iris data and trains a LinearRegression model."
)
def iris_regression_pipeline():
    iris_csv = load_iris_data()
    train_model(csv_data=iris_csv.output)


# ──────────────────── Compile to YAML ──────────────────── #

if __name__ == "__main__":
    compiler.Compiler().compile(
        pipeline_func=iris_regression_pipeline,
        package_path="iris_regression_pipeline.yaml",
    )


In [3]:
import kfp

client = kfp.Client()                          # connect to your Kubeflow endpoint
experiment = client.create_experiment("iris-regression")

run = client.run_pipeline(
    experiment_id=experiment.experiment_id,
    job_name="iris-regression-run",
    pipeline_package_path="iris_regression_pipeline.yaml",
)

