# kubeflow pipeline With Custom Pipeline

In [2]:
# Install components

!pip3 install -q google-cloud-aiplatform
!pip3 install -q  kfp google-cloud-pipeline-components

In [None]:
# After installing these packages you'll need to restart the kernel
import os
if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [3]:
# Finally, check that you have correctly installed the packages. The KFP SDK version
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
!python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 1.8.19
google_cloud_pipeline_components version: 1.0.41


In [24]:
import random
import string

# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

In [25]:
PROJECT_ID = "[your-project-id]"
# Get your Google Cloud project ID from gcloud
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]

# Setting Region
PROJECT_REGION = "[your-region]"  # @param {type: "string"}
if PROJECT_REGION == "[your-region]":
    PROJECT_REGION = "us-central1"


In [26]:
# Setting Bucket 
BUCKET_URI = f"gs://aip-{PROJECT_ID}-aip"
GENERATE_BUCKET_URI = False  # @param {type:"boolean"}


if GENERATE_BUCKET_URI:
    bucket_name = "gs://aip-{}".format(PROJECT_ID)
    !gsutil mb -p {PROJECT_ID} -l {REGION} {bucket_name}

    # set GCS bucket object TTL to 7 days
    !echo '{"rule":[{"action": {"type": "Delete"},"condition": {"age": 7}}]}' > gcs_lifecycle.tmp
    !gsutil lifecycle set gcs_lifecycle.tmp {bucket_name}
    !rm gcs_lifecycle.tmp

    BUCKET_URI = bucket_name
    print(f"changed BUCKET_URI to {BUCKET_URI} due to GENERATE_BUCKET_URI is True")

if BUCKET_URI == "" or BUCKET_URI is None or BUCKET_URI == "gs://[your-bucket-name]":
    BUCKET_URI = f"gs://aip-{PROJECT_ID}"

In [27]:
# Setup up the following constants for Vertex AI Pipelines:
PIPELINE_ROOT = "{}/pipeline_root/flowers".format(BUCKET_URI)
PACKAGE_PATH = "tmp/custom training pipeline.json".replace(" ", "_")

### Import libraries


In [28]:
import os
import kfp
import google.cloud.aiplatform as aip
from kfp.v2 import compiler
from typing import Any, Dict, List


### Creating Custom model
consider a pipeline with the following steps:

* Ingest data: This step loads training data into the pipeline.
* Preprocess data: This step preprocesses the ingested training data.
* Train model: This step uses the preprocessed training data to train a model.
* Evaluate model: This step evaluates the trained model.
* Deploy: This step deploys the trained model for predictions.

### Initialize Vertex AI SDK for Python
Initialize the Vertex AI SDK for Python for your project and corresponding bucket.



In [29]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [1]:
# Create and deploy an Custom image classification Model resource using a Dataset resource.

@kfp.dsl.pipeline(name="automl-image-training-v2")
def pipeline(project: str = PROJECT_ID, region: str = PROJECT_REGION):
    from google_cloud_pipeline_components import aiplatform as gcc_aip
    from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp,
                                                              ModelDeployOp)

    ds_op = gcc_aip.ImageDatasetCreateOp(
        project=project,
        display_name="flowers",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
    )

    training_job_run_op = gcc_aip.AutoMLImageTrainingJobRunOp(
        project=project,
        display_name="train-automl-flowers",
        prediction_type="classification",
        model_type="CLOUD",
        dataset=ds_op.outputs["dataset"],
        model_display_name="train-automl-flowers",
        training_fraction_split=0.6,
        validation_fraction_split=0.2,
        test_fraction_split=0.2,
        budget_milli_node_hours=8000,
    )

    endpoint_op = EndpointCreateOp(
        project=project,
        location=region,
        display_name="train-automl-flowers",
    )

    ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1,
    )

NameError: name 'kfp' is not defined

In [None]:
# Compile the pipeline
compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path=PACKAGE_PATH
)



In [None]:
# Run the pipeline
DISPLAY_NAME = "flowers_" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path=PACKAGE_PATH,
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

job.run()


Creating PipelineJob
PipelineJob created. Resource name: projects/1052232325248/locations/us-central1/pipelineJobs/automl-image-training-v2-20230404165859
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1052232325248/locations/us-central1/pipelineJobs/automl-image-training-v2-20230404165859')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-image-training-v2-20230404165859?project=1052232325248
PipelineJob projects/1052232325248/locations/us-central1/pipelineJobs/automl-image-training-v2-20230404165859 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/1052232325248/locations/us-central1/pipelineJobs/automl-image-training-v2-20230404165859 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/1052232325248/locations/us-central1/pipelineJobs/automl-image-training-v2-20230404165859 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob