# Kubeflow Pipeline that Uses Vertex AI to train and deploy a model

Demonstrates how to use Google Kubeflow Components to do the following.

1. Create a Vertex AI Tabular Dataset
2. Run the Training job with AutoML
3. Create an Enpoint
4. Deploy the Model to the Endpoint

In [None]:
!pip install --upgrade google-cloud-aiplatform google-cloud-pipeline-components kfp

In [None]:
# Change these values to match your project setup
PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]

# define project information manually if the above code didn't work
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]" # @param {type:"string"}

print(PROJECT_ID)

REGION = "us-central1" # @param {type:"string"}

# An existing bucket to store pipeline artifacts.
BUCKET_NAME = "basic-kf-pipelines-mlops-dar" # @param {type:"string"}

# GCP settings
GCS_BUCKET = "gs://mlops-data-sources" # @param {type:"string"}
PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline-artifacts"


In [None]:
import google_cloud_pipeline_components
print(google_cloud_pipeline_components.__version__)  # Should be a valid version number


## Import the Google Cloud Components for a simple ML Pipeline

In [None]:
import kfp
from kfp import compiler
from kfp.dsl import pipeline
from google.cloud import aiplatform

from google_cloud_pipeline_components.v1.automl.training_job import AutoMLTabularTrainingJobRunOp
from google_cloud_pipeline_components.v1.dataset import TabularDatasetCreateOp
from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp,ModelDeployOp)

# Initialize Vertex AI
aiplatform.init(project=PROJECT_ID, location=REGION)

## Build the Pipeline

1. Create a Vertex AI Tabular Dataset
2. Run the Training job with AutoML
3. Create an Enpoint
4. Deploy the Model to the Endpoint

In [None]:
@pipeline(
    name="flights-delay-prediction-pipeline",
    pipeline_root=PIPELINE_ROOT,
)
def flight_prediction_pipeline():
    # Create Vertex AI Dataset
    dataset_create_op = TabularDatasetCreateOp(
        project=PROJECT_ID,
        display_name="flights-dataset",
        gcs_source=f"{GCS_BUCKET}/flights.csv"
    )

    # Train AutoML Model
    training_op = AutoMLTabularTrainingJobRunOp(
        project=PROJECT_ID,
        display_name="flights-delay-model",
        optimization_prediction_type="regression",
        dataset=dataset_create_op.outputs["dataset"],
        target_column="ARR_DELAY",
        budget_milli_node_hours=1000,
    )

    endpoint_op = EndpointCreateOp(
        project=PROJECT_ID,
        location=REGION,
        display_name="flights-delay-model_endpoint",
    )

    _ = ModelDeployOp(
        model=training_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        dedicated_resources_machine_type="n1-standard-4",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    )


## Compile the Pipeline to a JSON file

In [None]:
# Compile the pipeline
compiler.Compiler().compile(
    pipeline_func=flight_prediction_pipeline,
    package_path="flight_prediction_pipeline.json",
)

## Run the Job on Vertex AI Pipelines

In [None]:
# Submit the pipeline to Vertex AI Pipelines
pipeline_job = aiplatform.PipelineJob(
    display_name="flights-delay-prediction-pipeline",
    template_path="flight_prediction_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
)

pipeline_job.run()