# Basic Kubeflow Pipeline

Demonstrate how to build Python components and a simple pipeline

1. Create 3 components: say_hello, add, and compose_message
2. Build the pipeline that: Runs the tasks say_hello and add in parallel. Because compose_message requires the output from add those are run sequentially.

In [None]:
! pip install --upgrade --quiet google-cloud-aiplatform kfp google-cloud-storage

## Import required libraries and set variables

In [None]:
import os
import kfp
from kfp import compiler
from kfp.dsl import component, pipeline
from google.cloud import aiplatform

# Change these values to match your project setup
PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]

# define project information manually if the above code didn't work
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]" # @param {type:"string"}

print(PROJECT_ID)

REGION = "us-central1" # @param {type:"string"}

# An existing bucket to store pipeline artifacts.
BUCKET_NAME = "basic-kf-pipelines-mlops-dar" # @param {type:"string"}

# Set default values for aiplatform
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=f"gs://{BUCKET_NAME}")


## Define the Pipeline components.

These are just Python functions with the component decorator.

In [None]:
@component(
    packages_to_install=["pandas"],  # example: you can list additional packages if needed
    base_image="python:3.9"          # a Python base image that has what you need
)
def say_hello(name: str) -> str:
    """
    A simple component that returns a hello world string.
    """
    message = f"Hello {name}!"
    print(message)
    return message

@component(
    packages_to_install=[],
    base_image="python:3.9"
)
def add_numbers(x: float, y: float) -> float:
    """
    Adds two numbers and returns the result.
    """
    result = x + y
    print(f"Adding {x} + {y} = {result}")
    return result

@component(
    packages_to_install=[],
    base_image="python:3.9"
)
def compose_message(name: str, x: float, y: float, result: float) -> str:
    """Composes a thank-you message that includes the name, the two numbers, and their sum."""
    message = f"Thanks, {name}. {x} and {y} added together equals {result}."
    print(message)
    return message


## Define the Pipeline using the Components

In [None]:
@pipeline(
    name="simple-pipeline",  # Pipeline name
    description="A simple pipeline that adds two numbers and outputs the results."
)
def my_simple_pipeline(
    name_value: str = "Joe",
    x_value: float = 1.0,
    y_value: float = 4.0
):
    # Step 1: Print "Hello World"
    hello_task = say_hello(name=name_value)

    # Step 2: Add two numbers
    add_task = add_numbers(x=x_value, y=y_value)

    # Step 3: Compose a thank-you message
    compose_task = compose_message(
        name=name_value,
        x=x_value,
        y=y_value,
        result=add_task.output
    )



## Compile the pipeline to a JSON file

In [None]:
from kfp.v2 import compiler

pipeline_filename = "my_simple_pipeline_job.json"
compiler.Compiler().compile(
    pipeline_func=my_simple_pipeline,
    package_path=pipeline_filename
)

print(f"Pipeline compiled to {pipeline_filename}")


# Run the Pipeline on Vertex AI Pipelines

Note: You could run a pipeline on any machine, but since our goal is running pipelines using Google Components we will run them in Google Cloud.

In [None]:
from google.cloud.aiplatform import PipelineJob

# Create a PipelineJob and submit
job = PipelineJob(
    display_name="my-simple-demo-pipeline-job",
    template_path=pipeline_filename,  # The JSON artifact produced by compilation
    pipeline_root=f"gs://{BUCKET_NAME}/pipeline_root",  # Where Vertex AI will store pipeline artifacts
    parameter_values={
        "name_value": "Steve", # override default
        "x_value": 27.4,  # override default
        "y_value": 53.22,  # override default
    },
    enable_caching=True,  # to enable or disable caching
)

# Note: If caching is true, but the parameter change the task is re-run.
# If caching is true, but the parameters don't change, tasks are not re-run.

# Runs the job and waits for it to finish
# job.run()

# Submits the job to Vertex AI Pipelines and ends
job.submit()