# An end-to-end Vertex Training Pipeline Demonstration

In [2]:
!pip install kfp

Collecting kfp
  Downloading kfp-1.8.11.tar.gz (298 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.6/298.6 KB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting PyYAML<6,>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m636.6/636.6 KB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-cloud-storage<2,>=1.20.0
  Downloading google_cloud_storage-1.44.0-py2.py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.8/106.8 KB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kubernetes<19,>=8.0.0
  Downloading kubernetes-18.20.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-api-python-client<2,>=1.7.8
  Downloading google_api_pytho

Finally, check that you have correctly installed the packages. The KFP SDK version should be >=1.6:

In [1]:
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

KFP SDK version: 1.8.11


In [1]:
import os
import json
from functools import partial

import kfp
import pprint
import yaml
from jinja2 import Template
from kfp.v2 import dsl
from kfp.v2.compiler import compiler
from kfp.v2.dsl import Dataset
from kfp.v2.google.client import AIPlatformClient

In [2]:
project_id='petcircle-science-playground'
project_number='9527'

In [3]:
af_registry_location='australia-southeast1'
af_registry_name='mlops-vertex-kit'

In [7]:
components_dir='../components/'

In [8]:
def _load_custom_component(project_id: str,
                           af_registry_location: str,
                           af_registry_name: str,
                           components_dir: str,
                           component_name: str):
    component_path = os.path.join(components_dir,
                                component_name,
                                'component.yaml.jinja')
    with open(component_path, 'r') as f:
        component_text = Template(f.read()).render(
          project_id=project_id,
          af_registry_location=af_registry_location,
          af_registry_name=af_registry_name)

    return kfp.components.load_component_from_text(component_text)

load_custom_component = partial(_load_custom_component,
                                project_id=project_id,
                                af_registry_location=af_registry_location,
                                af_registry_name=af_registry_name,
                                components_dir=components_dir)

In [9]:
preprocess_op = load_custom_component(component_name='data_preprocess')
train_op = load_custom_component(component_name='train_model')
check_metrics_op = load_custom_component(component_name='check_model_metrics')
create_endpoint_op = load_custom_component(component_name='create_endpoint')
test_endpoint_op = load_custom_component(component_name='test_endpoint')
deploy_model_op = load_custom_component(component_name='deploy_model')
monitor_model_op = load_custom_component(component_name='monitor_model')
hpo_op = load_custom_component(component_name='hpo')

Then define the pipeline using the following function:

In [11]:
pipeline_region='us-central1'
pipeline_root='gs://vertex_pipeline_demo_root_hy/pipeline_root'

In [12]:
data_region='us-central1'
#input_dataset_uri='bq://petcircle-science-playground.vertex_pipeline_demo.banknote_authentication'
input_dataset_uri='bq://petcircle-science-playground.datalake.review_product_2013_2022'
gcs_data_output_folder='gs://vertex_pipeline_demo_root_hy/datasets/training'
training_data_schema='reviewtext:string;Class:int'

data_pipeline_root='gs://vertex_pipeline_demo_root_hy/compute_root'

In [13]:
training_container_image_uri=f'{af_registry_location}-docker.pkg.dev/{project_id}/{af_registry_name}/training:latest'
serving_container_image_uri=f'{af_registry_location}-docker.pkg.dev/{project_id}/{af_registry_name}/serving:latest'
custom_job_service_account=f'{project_number}-compute@developer.gserviceaccount.com'

In [14]:
training_container_image_uri,serving_container_image_uri,custom_job_service_account

('us-central1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/training:latest',
 'us-central1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/serving:latest',
 '9527-compute@developer.gserviceaccount.com')

In [15]:
@dsl.pipeline(name='training-pipeline-template')
def pipeline(project_id: str,
             data_region: str,
             gcs_data_output_folder: str,
             input_dataset_uri: str,
             training_data_schema: str,
             data_pipeline_root: str,
             
             training_container_image_uri: str,
             serving_container_image_uri: str,
             custom_job_service_account: str,
             hptune_region: str,
             hp_config_suggestions_per_request: int,
             hp_config_max_trials: int,
             
             metrics_name: str,
             metrics_threshold: float,
             
             endpoint_machine_type: str,
             endpoint_min_replica_count: int,
             endpoint_max_replica_count: int,
             endpoint_test_instances: str,
             
             output_model_file_name: str = 'model.h5',
             machine_type: str = "n1-standard-8",
             accelerator_count: int = 0,
             accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
             vpc_network: str = "",
             enable_model_monitoring: str = 'False',
            task_type: str = 'training'):

    preprocess_task = preprocess_op(
      project_id=project_id,
      data_region=data_region,
      gcs_output_folder=gcs_data_output_folder,
      gcs_output_format="CSV",
      task_type=task_type)

    train_task = train_op(
      project_id=project_id,
      data_region=data_region,
      data_pipeline_root=data_pipeline_root,
      input_data_schema=training_data_schema,
      training_container_image_uri=training_container_image_uri,
      serving_container_image_uri=serving_container_image_uri,
      custom_job_service_account=custom_job_service_account,
      input_dataset=preprocess_task.outputs['output_dataset'],
      output_model_file_name=output_model_file_name,
      machine_type=machine_type,
      accelerator_count=accelerator_count,
      accelerator_type=accelerator_type,
      hptune_region=hptune_region,
      hp_config_max_trials=hp_config_max_trials,
      hp_config_suggestions_per_request=hp_config_suggestions_per_request,
      vpc_network=vpc_network)
    
    check_metrics_task = check_metrics_op(
      metrics_name=metrics_name,
      metrics_threshold=metrics_threshold,
      basic_metrics=train_task.outputs['basic_metrics'])

### Compile and run the end-to-end ML pipeline
With our full pipeline defined, it's time to compile it:

In [16]:
compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path="training_pipeline_job.json"
)



Next, instantiate an API client:

In [17]:
api_client = AIPlatformClient(
    project_id=project_id,
    region=pipeline_region)



Next, kick off a pipeline run:

In [18]:
test_instances = json.dumps([
		{'reviewtext': 'pet circle is not recommended',"Class":"0"},
		{'reviewtext': 'pet circle is highly recommended',"Class":"1"},
		{'reviewtext': 'think twice before you buy',"Class":"0"},
		{'reviewtext': 'great product. will buy again.',"Class":"1"}
		])
test_instances

'[{"reviewtext": "pet circle is not recommended", "Class": "0"}, {"reviewtext": "pet circle is highly recommended", "Class": "1"}, {"reviewtext": "think twice before you buy", "Class": "0"}, {"reviewtext": "great product. will buy again.", "Class": "1"}]'

In [19]:
pipeline_params = {
    'project_id': project_id,
    'data_region': data_region,
    'gcs_data_output_folder': gcs_data_output_folder,
    'output_model_file_name': 'model.h5',
    'input_dataset_uri': input_dataset_uri,
    'training_data_schema': training_data_schema,
    'data_pipeline_root': data_pipeline_root,
    
    'training_container_image_uri': training_container_image_uri,
    'serving_container_image_uri': serving_container_image_uri,
    'custom_job_service_account': custom_job_service_account,
    'hptune_region':"asia-east1",
    'hp_config_suggestions_per_request': 5,
    'hp_config_max_trials': 30,
    
    'metrics_name': 'au_prc',
    'metrics_threshold': 0.4,
    
    'endpoint_machine_type': 'n1-standard-4',
    'endpoint_min_replica_count': 1,
    'endpoint_max_replica_count': 1,
    'endpoint_test_instances': test_instances
}

response = api_client.create_run_from_job_spec(
    job_spec_path="training_pipeline_job.json", 
    pipeline_root=pipeline_root,
    parameter_values=pipeline_params,
    enable_caching=False)

In [27]:
gcs_data_output_folder, serving_container_image_uri, gcs_data_output_folder

('gs://vertex_pipeline_demo_root_hy/datasets/training',
 'us-central1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/serving:latest',
 'gs://vertex_pipeline_demo_root_hy/datasets/training')

In [2]:
from google.cloud import aiplatform

project_id='petcircle-science-playground'
data_region='us-central1'
training_container_image_uri='us-central1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/training:latest'
data_pipeline_root = 'gs://vertex_pipeline_demo_root_hy/compute_root'

aiplatform.init(
  project=project_id,
  location=data_region,
  staging_bucket=data_pipeline_root)


job = aiplatform.CustomContainerTrainingJob(
  display_name='batch_prediction',
  location=data_region,
  container_uri=training_container_image_uri)

## Test

In [29]:
from datetime import datetime
int(datetime.now().timestamp() * 1000)

1646958858507

In [28]:
datetime.now().strftime('%Y%m%d%H%M%S')

'20220311003414'

In [31]:
f'sentiment-analysis-model-{datetime.now().strftime("%Y%m%d%H%M%S")}'

'sentiment-analysis-model-20220311003532'