# Sample kubeflow pipeline

In [4]:
# Install components

!pip3 install google-cloud-aiplatform
!pip3 install kfp google-cloud-pipeline-components


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting kfp
  Using cached kfp-1.8.19-py3-none-any.whl
Collecting google-cloud-pipeline-components
  Using cached google_cloud_pipeline_components-1.0.41-py3-none-any.whl (1.0 MB)
Collecting kfp-server-api<2.0.0,>=1.1.2
  Using cached kfp_server_api-1.8.5-py3-none-any.whl
Collecting kfp-pipeline-spec<0.2.0,>=0.1.16
  Using cached kfp_pipeline_spec-0.1.16-py3-none-any.whl (19 kB)
Collecting requests-toolbelt<1,>=0.8.0
  Using cached requests_toolbelt-0.10.1-py2.py3-none-any.whl (54 kB)
Collecting pydantic<2,>=1.8.2
  Downloading pydantic-1.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0

In [None]:
# After installing these packages you'll need to restart the kernel
import os
if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [1]:
# Finally, check that you have correctly installed the packages. The KFP SDK version
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
!python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 1.8.19
google_cloud_pipeline_components version: 1.0.41


In [53]:
# Step 2: Set your project ID and bucket
PROJECT_ID = "[your-project-id]"
# Get your Google Cloud project ID from gcloud
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Project ID: kamal-anidine-dev


In [54]:
PROJECT_REGION = "[your-region]"  # @param {type: "string"}
if PROJECT_REGION == "[your-region]":
    PROJECT_REGION = "us-central1"

In [55]:
BUCKET_URI = f"gs://aip-{PROJECT_ID}-aip"
GENERATE_BUCKET_URI = False  # @param {type:"boolean"}


if GENERATE_BUCKET_URI:
    bucket_name = "gs://aip-{}".format(PROJECT_ID)
    !gsutil mb -p {PROJECT_ID} -l {REGION} {bucket_name}

    # set GCS bucket object TTL to 7 days
    !echo '{"rule":[{"action": {"type": "Delete"},"condition": {"age": 7}}]}' > gcs_lifecycle.tmp
    !gsutil lifecycle set gcs_lifecycle.tmp {bucket_name}
    !rm gcs_lifecycle.tmp

    BUCKET_URI = bucket_name
    print(f"changed BUCKET_URI to {BUCKET_URI} due to GENERATE_BUCKET_URI is True")

if BUCKET_URI == "" or BUCKET_URI is None or BUCKET_URI == "gs://[your-bucket-name]":
    BUCKET_URI = f"gs://aip-{PROJECT_ID}"
BUCKET_URI

'gs://aip-kamal-anidine-dev-aip'

### Import libraries


In [56]:
from typing import NamedTuple
import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component

### Creating your first pipeline
Create a short pipeline using the KFP SDK. This pipeline doesn't do anything ML related (don't worry, you'll get there!), this exercise is to teach you:

How to create custom components in the KFP SDK
How to run and monitor a pipeline in Vertex Pipelines
You'll create a pipeline that prints out a sentence using two outputs: a product name and an emoji description. This pipeline will consist of three components:

- product_name: This component will take a product name as input, and return that string as output.

- emoji: This component will take the text description of an emoji and convert it to an emoji. For example, the text code for ✨ is "sparkles". This component uses an emoji library to show you how to manage external dependencies in your pipeline.

- build_sentence: This final component will consume the output of the previous two to build a sentence that uses the emoji. For example, the resulting output might be "Vertex Pipelines is ✨

In [57]:
output_file =  "tmp/first-component.yaml"
@component(base_image="python:3.9", output_component_file=output_file)
def product_name(text: str) -> str:
    return text

product_name_component = kfp.components.load_component_from_file(output_file)

In [58]:
@component(packages_to_install=["emoji"])
def emoji( text: str) -> NamedTuple("Outputs", [("emoji_text", str), ("emoji", str)]):
    import emoji
    emoji_text = text
    emoji_str = emoji.emojize(':' + emoji_text + ':', language='alias')
    print("output one: {}; output_two: {}".format(emoji_text, emoji_str))
    return (emoji_text, emoji_str)

In [59]:
@component
def build_sentence(product: str, emoji: str, emojitext: str) -> str:
    print("We completed the pipeline, hooray!")
    end_str = product + " is "
    if len(emoji) > 0:
        end_str += emoji
    else:
        end_str += emojitext
    return(end_str)

### Putting the components together into a pipeline
The component definitions defined above created factory functions that can be used in a pipeline definition to create steps.

To set up a pipeline, use the @dsl.pipeline decorator, give the pipeline a name and description, and provide the root path where your pipeline's artifacts should be written. By artifacts, it means any output files generated by your pipeline. This intro pipeline doesn't generate any, but your next pipeline will.

In the next block of code you define an intro_pipeline function. This is where you specify the inputs to your initial pipeline steps, and how steps connect to each other:

product_task takes a product name as input. Here you're passing "Vertex Pipelines" but you can change this to whatever you'd like.

emoji_task takes the text code for an emoji as input. You can also change this to whatever you'd like. For example, "party_face" refers to the 🥳 emoji. Note that since both this and the product_task component don't have any steps that feed input into them, you manually specify the input for these when you define your pipeline.

The last step in the pipeline - consumer_task has three input parameters:

The output of product_task. Since this step only produces one output, you can reference it via product_task.output.
The emoji output of the emoji_task step. See the emoji component defined above where you named the output parameters.
Similarly, the emoji_text named output from the emoji component. In case your pipeline is passed text that doesn't correspond with an emoji, it'll use this text to construct a sentence.

In [60]:
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline_root/"
PIPELINE_ROOT

'gs://aip-kamal-anidine-dev-aip/pipeline_root/'

In [61]:
@dsl.pipeline(
    name="hello-world",
    description="An intro pipeline",
)
# You can change the `text` and `emoji_str` parameters here to update the pipeline output
def pipeline(text: str = "Vertex Pipelines", emoji_str: str = "sparkles"):
    product_task = product_name(text)
    emoji_task = emoji(emoji_str)
    _ = build_sentence(
        product_task.output,
        emoji_task.outputs["emoji"],
        emoji_task.outputs["emoji_text"],
    )

compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path="tmp/intro_pipeline_job.json"
)

In [63]:
import google.cloud.aiplatform as aip
# Before initializing, make sure to set the GOOGLE_APPLICATION_CREDENTIALS
# environment variable to the file path of your service account.
aip.init(project=PROJECT_ID, location=PROJECT_REGION)
job = aip.PipelineJob(
    display_name="intro_pipeline_job",
    template_path="tmp/intro_pipeline_job.json",
    pipeline_root=PIPELINE_ROOT,
)

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/1052232325248/locations/us-central1/pipelineJobs/hello-world-20230404143035
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1052232325248/locations/us-central1/pipelineJobs/hello-world-20230404143035')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/hello-world-20230404143035?project=1052232325248
