# An end-to-end Vertex Training Pipeline Demonstration

In [1]:
!pip install kfp==1.8.11

Collecting kfp==1.8.11
  Downloading kfp-1.8.11.tar.gz (298 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.6/298.6 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting PyYAML<6,>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m636.6/636.6 kB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-cloud-storage<2,>=1.20.0
  Downloading google_cloud_storage-1.44.0-py2.py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.8/106.8 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kubernetes<19,>=8.0.0
  Downloading kubernetes-18.20.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-api-python-client<2,>=1.7.8
  Downloading google_api_python

Finally, check that you have correctly installed the packages. The KFP SDK version should be >=1.6:

In [2]:
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

KFP SDK version: 1.8.11


In [4]:
import os
import json
from functools import partial

import kfp
import pprint
import yaml
from jinja2 import Template
from kfp.v2 import dsl
from kfp.v2.compiler import compiler
from kfp.v2.dsl import Dataset
from kfp.v2.google.client import AIPlatformClient

from google.cloud import aiplatform, firestore
from datetime import datetime

In [2]:
project_id='petcircle-science-playground'
project_number='734227425472'

In [3]:
af_registry_location='australia-southeast1'
af_registry_name='mlops-vertex-kit'

In [4]:
components_dir='../components/'

In [5]:
def _load_custom_component(project_id: str,
                           af_registry_location: str,
                           af_registry_name: str,
                           components_dir: str,
                           component_name: str):
    component_path = os.path.join(components_dir,
                                component_name,
                                'component.yaml.jinja')
    with open(component_path, 'r') as f:
        component_text = Template(f.read()).render(
          project_id=project_id,
          af_registry_location=af_registry_location,
          af_registry_name=af_registry_name)

    return kfp.components.load_component_from_text(component_text)

load_custom_component = partial(_load_custom_component,
                                project_id=project_id,
                                af_registry_location=af_registry_location,
                                af_registry_name=af_registry_name,
                                components_dir=components_dir)

In [6]:
preprocess_op = load_custom_component(component_name='data_preprocess')
train_op = load_custom_component(component_name='train_model')
check_metrics_op = load_custom_component(component_name='check_model_metrics')
create_endpoint_op = load_custom_component(component_name='create_endpoint')
test_endpoint_op = load_custom_component(component_name='test_endpoint')
deploy_model_op = load_custom_component(component_name='deploy_model')
monitor_model_op = load_custom_component(component_name='monitor_model')

Then define the pipeline using the following function:

In [7]:
pipeline_region='australia-southeast1'
pipeline_root='gs://vertex_pipeline_demo_root_hy_syd/pipeline_root'

In [8]:
data_region='australia-southeast1'
#input_dataset_uri='bq://petcircle-science-playground.vertex_pipeline_demo.banknote_authentication'
input_dataset_uri='bq://petcircle-science-playground.datalake.review_product_2013_2022'
gcs_data_output_folder='gs://vertex_pipeline_demo_root_hy_syd/datasets/training'
training_data_schema='reviewtext:string;Class:int'

data_pipeline_root='gs://vertex_pipeline_demo_root_hy_syd/compute_root'

In [9]:
training_container_image_uri=f'{af_registry_location}-docker.pkg.dev/{project_id}/{af_registry_name}/training:latest'
serving_container_image_uri=f'{af_registry_location}-docker.pkg.dev/{project_id}/{af_registry_name}/batch_prediction:latest'
hpo_container_image_uri=f'{af_registry_location}-docker.pkg.dev/{project_id}/{af_registry_name}/hpo:latest'
custom_job_service_account=f'{project_number}-compute@developer.gserviceaccount.com'

In [10]:
def save_to_firestore(project_id, solution_name, as_at_date, warehouse, **params):
    db = firestore.Client(project=project_id)
    db.collection("models").document(solution_name).collection("HPO").document(
        as_at_date).collection(warehouse).document("params").set(params,merge=True)

In [46]:
warehouses=['EC', 'MEL']
dates = ['2023-01-15', '2023-01-01']
hpo_best_dict={"batch_size": 32, "lr": 0.0001, "val_balanced_acc": 0.886807382106781}
solution_name='hpo-pipeline-template'

for warehouse in warehouses:
    for date in dates:        
        db.collection("models").document(solution_name).collection("HPO").document(
            date).collection(warehouse).document("params").set(hpo_best_dict,merge=True)

In [None]:
def query_from_firestore(project_id, solution_name='hpo-pipeline-template')
    db = firestore.Client(project=project_id)

    docs = db.collection('models').document(solution_name).collection("HPO").list_documents()
    docs_latest = max([doc.id for doc in docs])

    warehouses = [i.id for i in db.collection('models').document(solution_name).collection("HPO").document(docs_latest).collections()]

    hpo_warehouses = {}
    for warehouse in warehouses:
        param = db.collection('models').document(solution_name).collection("HPO").document(docs_latest).collection(warehouse).document("params")
        hpo_warehouses.update({warehouse: param.get().to_dict()})
        
    return hpo_warehouses

In [14]:
training_container_image_uri,serving_container_image_uri,custom_job_service_account, hpo_container_image_uri

('australia-southeast1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/training:latest',
 'australia-southeast1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/batch_prediction:latest',
 '734227425472-compute@developer.gserviceaccount.com',
 'australia-southeast1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/hpo:latest')

In [15]:
from google_cloud_pipeline_components.experimental import hyperparameter_tuning_job
from google_cloud_pipeline_components.v1.hyperparameter_tuning_job import HyperparameterTuningJobRunOp
from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.types import artifact_types
from kfp.v2.components import importer_node

In [45]:
@dsl.pipeline(name='training-pipeline-template')
def pipeline(project_id: str,
             data_region: str,
             gcs_data_output_folder: str,
             input_dataset_uri: str,
             training_data_schema: str,
             data_pipeline_root: str,
             
             training_container_image_uri: str,
             serving_container_image_uri: str,
             custom_job_service_account: str,
             hptune_region: str,
             hp_config_suggestions_per_request: int,
             hp_config_max_trials: int,
             
             metrics_name: str,
             metrics_threshold: float,
             
             endpoint_machine_type: str,
             endpoint_min_replica_count: int,
             endpoint_max_replica_count: int,
             endpoint_test_instances: str,
             
             output_model_file_name: str = 'model.h5',
             machine_type: str = "n1-standard-8",
             accelerator_count: int = 0,
             accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
             vpc_network: str = "",
             enable_model_monitoring: str = 'False',
            task_type: str = 'training'):

    preprocess_task = preprocess_op(
      project_id=project_id,
      data_region=data_region,
      gcs_output_folder=gcs_data_output_folder,
      gcs_output_format="CSV",
      task_type=task_type)

    train_task = train_op(
      project_id=project_id,
      data_region=data_region,
      data_pipeline_root=data_pipeline_root,
      input_data_schema=training_data_schema,
      training_container_image_uri=training_container_image_uri,
      serving_container_image_uri=serving_container_image_uri,
      custom_job_service_account=custom_job_service_account,
      input_dataset=preprocess_task.outputs['output_dataset'],
      output_model_file_name=output_model_file_name,
      machine_type=machine_type,
      accelerator_count=accelerator_count,
      accelerator_type=accelerator_type,
      hptune_region=hptune_region,
      hp_config_max_trials=hp_config_max_trials,
      hp_config_suggestions_per_request=hp_config_suggestions_per_request,
      vpc_network=vpc_network)
    
    check_metrics_task = check_metrics_op(
      metrics_name=metrics_name,
      metrics_threshold=metrics_threshold,
      basic_metrics=train_task.outputs['basic_metrics'])

In [3]:
from kfp.v2.dsl import component
from kfp.v2.dsl import Dataset, Input, Metrics, Model, Output

@component
def worker_pool_specs(project_id: str,
    data_region: str,
    data_pipeline_root: str,
    hpo_container_image_uri: str,
    custom_job_service_account: str,
    warehouse: str,
   # input_dataset: Input[Dataset]
                     ) -> list:

    task_type = 'training'
    display_name = 'hpo-pipeline-template'
    CMDARGS = [
    #"--training_data_uri="+str(input_dataset.uri),
    "--warehouse="+warehouse,
    "--training_data_uri=gs://vertex_pipeline_demo_root_hy_syd/datasets/training/processed_data-20230118012508.csv"
    ]

    # The spec of the worker pools including machine type and Docker image
    worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4",
        },
        "replica_count": 1,
        "container_spec": {"image_uri": hpo_container_image_uri, "args": CMDARGS},
    }
    ]
    
    return worker_pool_specs

@component(packages_to_install=['google-cloud-firestore==2.3'])
def best_hpo_to_args(hpo_best: str,
                    project_id: str,
                    solution_name: str,
                    as_at_date: str,
                    warehouse: str) -> str:
    import json
    from google.cloud import firestore
    hpo_best = json.loads(hpo_best.replace("'", '"'))

    hpo_best_dict = {}
    
    for i in hpo_best['parameters']:
        hpo_best_dict.update({i['parameterId']: i['value']})
    
    for i in hpo_best['finalMeasurement']['metrics']:
        hpo_best_dict.update({i['metricId']: i['value']})
    
    db = firestore.Client(project=project_id)
    db.collection("models").document(solution_name).collection("HPO").document(
        as_at_date).collection(warehouse).document("params").set(hpo_best_dict,merge=True)
    
    hpo_best_dict=str(hpo_best_dict).replace("'", '"')
    
    return hpo_best_dict

def hpo_warehouse(project_id,
                 data_region,
                 data_pipeline_root,
                 #preprocess_task,
                 display_name,
                 metric_spec,
                 parameter_spec,
                 warehouse,
                 gcp_resources
                 ):
#     worker_pool_specs_op = worker_pool_specs(project_id=project_id,
#     data_region=data_region,
#     data_pipeline_root=data_pipeline_root,
#     hpo_container_image_uri=hpo_container_image_uri,
#     custom_job_service_account=custom_job_service_account,
#     warehouse=warehouse,
# #    input_dataset="gs://vertex_pipeline_demo_root_hy_syd/datasets/training/processed_data-20230118012508.csv"                                 
#     #input_dataset=preprocess_task.outputs['output_dataset'])
#     )

#     tuning_op = HyperparameterTuningJobRunOp(
#     display_name=display_name+'-'+warehouse,
#     project=project_id,
#     location=data_region,
#     worker_pool_specs=worker_pool_specs_op.output,
#     study_spec_metrics=metric_spec,
#     study_spec_parameters=parameter_spec,
#     max_trial_count=4,
#     parallel_trial_count=2,
#     base_output_directory=data_pipeline_root,
#     study_spec_algorithm='GRID_SEARCH'
#     )
 
    trials_op = hyperparameter_tuning_job.GetTrialsOp(
#        gcp_resources=tuning_op.outputs["gcp_resources"]
        gcp_resources=gcp_resources
#        gcp_resources='{"resources":[{"resourceType":"HyperparameterTuningJob","resourceUri":"https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/695071668661387264"}]}'

    )

    best_trial_op = hyperparameter_tuning_job.GetBestTrialOp(
        trials=trials_op.output, study_spec_metrics=metric_spec
    )
    
    best_hpo_to_args_op = best_hpo_to_args(best_trial_op.output,
                                          project_id=project_id,               
                                        as_at_date=datetime.now().strftime('%Y-%m-%d'),
                                        warehouse=warehouse,
                                          solution_name=display_name)
    return warehouse
    

@dsl.pipeline(name='hpo-pipeline-template')
def pipeline(project_id: str,
             data_region: str,
             gcs_data_output_folder: str,
             input_dataset_uri: str,
             training_data_schema: str,
             data_pipeline_root: str,
             
             training_container_image_uri: str,
             serving_container_image_uri: str,
             custom_job_service_account: str,
             hptune_region: str,
             hp_config_suggestions_per_request: int,
             hp_config_max_trials: int,
             
             metrics_name: str,
             metrics_threshold: float,
             
             endpoint_machine_type: str,
             endpoint_min_replica_count: int,
             endpoint_max_replica_count: int,
             endpoint_test_instances: str,
             
             output_model_file_name: str = 'model.h5',
             machine_type: str = "n1-standard-8",
             accelerator_count: int = 0,
             accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
             vpc_network: str = "",
             enable_model_monitoring: str = 'False',
            task_type: str = 'training'):
    
    task_type = 'training'
    display_name = 'hpo-pipeline-template'
    metric_spec = hyperparameter_tuning_job.serialize_metrics({"val_balanced_acc": "maximize"})
    parameter_spec = hyperparameter_tuning_job.serialize_parameters(
    {
        "batch_size": aiplatform.hyperparameter_tuning.DiscreteParameterSpec(
            values=[32, 64], scale=None
        ),
        "lr": aiplatform.hyperparameter_tuning.DiscreteParameterSpec(
            values=[0.0001, 0.0002], scale=None
        ),
    }
    )

    preprocess_task = preprocess_op(
      project_id=project_id,
      data_region=data_region,
      gcs_output_folder=gcs_data_output_folder,
      gcs_output_format="CSV",
      task_type=task_type)

#     worker_pool_specs_op = worker_pool_specs(project_id=project_id,
#     data_region=data_region,
#     data_pipeline_root=data_pipeline_root,
#     hpo_container_image_uri=hpo_container_image_uri,
#     custom_job_service_account=custom_job_service_account,
#     input_dataset=preprocess_task.outputs['output_dataset'])

#     tuning_op = HyperparameterTuningJobRunOp(
#     display_name=display_name,
#     project=project_id,
#     location=data_region,
#     worker_pool_specs=worker_pool_specs_op.output,
#     study_spec_metrics=metric_spec,
#     study_spec_parameters=parameter_spec,
#     max_trial_count=4,
#     parallel_trial_count=2,
#     base_output_directory=data_pipeline_root,
#     study_spec_algorithm='GRID_SEARCH'
#     )
    
#     trials_op = hyperparameter_tuning_job.GetTrialsOp(
#         gcp_resources=tuning_op.outputs["gcp_resources"]
# #        gcp_resources='{"resources":[{"resourceType":"HyperparameterTuningJob","resourceUri":"https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/695071668661387264"}]}'

#     )

#     best_trial_op = hyperparameter_tuning_job.GetBestTrialOp(
#         trials=trials_op.output, study_spec_metrics=metric_spec
#     )

    hpo_op_ec = hpo_warehouse(project_id,
             data_region,
             data_pipeline_root,
             #preprocess_task,
             display_name,
             metric_spec,
             parameter_spec,
            "EC",
            '{"resources":[{"resourceType":"HyperparameterTuningJob","resourceUri":"https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/4881167522302263296"}]}'
            )
    
    hpo_op_mel = hpo_warehouse(project_id,
                 data_region,
                 data_pipeline_root,
                 #preprocess_task,
                 display_name,
                 metric_spec,
                 parameter_spec,
                "MEL",
                '{"resources":[{"resourceType":"HyperparameterTuningJob","resourceUri":"https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/2070921354823073792"}]}'     
                              )
    
    with dsl.Condition(
        len([hpo_op_ec.output, hpo_op_mel.output])==2, 
        name="train_model",
    ):
    
        train_task = train_op(
          project_id=project_id,
          data_region=data_region,
          data_pipeline_root=data_pipeline_root,
          input_data_schema=training_data_schema,
          training_container_image_uri=training_container_image_uri,
          serving_container_image_uri=serving_container_image_uri,
          custom_job_service_account=custom_job_service_account,
          input_dataset=preprocess_task.outputs['output_dataset'],
          output_model_file_name=output_model_file_name,
          machine_type=machine_type,
          accelerator_count=accelerator_count,
          accelerator_type=accelerator_type,
          hptune_region=hptune_region,
          hp_config_max_trials=hp_config_max_trials,
          hp_config_suggestions_per_request=hp_config_suggestions_per_request,
          vpc_network=vpc_network,
          #train_additional_args=hpo_op_ec.output
        )

NameError: name 'dsl' is not defined

### Compile and run the end-to-end ML pipeline
With our full pipeline defined, it's time to compile it:

In [82]:
compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path="training_pipeline_job.json"
)

Next, instantiate an API client:

In [83]:
api_client = AIPlatformClient(
    project_id=project_id,
    region=pipeline_region)

Next, kick off a pipeline run:

In [78]:
test_instances = json.dumps([
		{'reviewtext': 'pet circle is not recommended',"Class":"0"},
		{'reviewtext': 'pet circle is highly recommended',"Class":"1"},
		{'reviewtext': 'think twice before you buy',"Class":"0"},
		{'reviewtext': 'great product. will buy again.',"Class":"1"}
		])

pipeline_params = {
    'project_id': project_id,
    'data_region': data_region,
    'gcs_data_output_folder': gcs_data_output_folder,
    'output_model_file_name': 'model.h5',
    'input_dataset_uri': input_dataset_uri,
    'training_data_schema': training_data_schema,
    'data_pipeline_root': data_pipeline_root,
    
    'training_container_image_uri': training_container_image_uri,
    'serving_container_image_uri': serving_container_image_uri,
    'custom_job_service_account': custom_job_service_account,
    'hptune_region':"asia-east1",
    'hp_config_suggestions_per_request': 5,
    'hp_config_max_trials': 30,
    
    'metrics_name': 'au_prc',
    'metrics_threshold': 0.3,
    
    'endpoint_machine_type': 'n1-standard-4',
    'endpoint_min_replica_count': 1,
    'endpoint_max_replica_count': 1,
    'endpoint_test_instances': test_instances
}

In [88]:
response = api_client.create_run_from_job_spec(
    job_spec_path="training_pipeline_job.json", 
    pipeline_root=pipeline_root,
    parameter_values=pipeline_params,
    enable_caching=True)

In [47]:
hpo_best = { "id": "1", "state": 4, "parameters": [ { "parameterId": "batch_size", "value": 32.0}, {"parameterId": "epochs", "value": 16.0}], "finalMeasurement": { "stepCount": "32", "metrics": [ { "metricId": "val_balanced_acc", "value": 0.878764271736145 } ] }, "startTime": "2023-01-16T01:34:33.765892041Z", "endTime": "2023-01-16T01:42:56Z", "name": "", "measurements": [], "clientId": "", "infeasibleReason": "", "customJob": "", "webAccessUris": {} }
hpo_best = str(hpo_best)

hpo_best = json.loads(hpo_best.replace("'", '"'))

hpo_best_dict = {}

for i in hpo_best['parameters']:
    hpo_best_dict.update({i['parameterId']: i['value']})
    
for i in hpo_best['finalMeasurement']['metrics']:
    hpo_best_dict.update({i['metricId']: i['value']})
    
hpo_best_dict=str(hpo_best_dict).replace("'", '"')
hpo_best_dict

'{"batch_size": 32.0, "epochs": 16.0, "val_balanced_acc": 0.878764271736145}'

In [48]:
train_args=[]
train_additional_args = hpo_best_dict

arg_dict = json.loads(train_additional_args)
for item in arg_dict:
    train_args.append('--' + item)
    train_args.append(arg_dict[item])

In [49]:
train_args

['--batch_size',
 32.0,
 '--epochs',
 16.0,
 '--val_balanced_acc',
 0.878764271736145]

## Or

In [80]:
data_pipeline_root

'gs://vertex_pipeline_demo_root_hy_syd/compute_root'

In [79]:
task_type = 'training'

display_name = 'hpo-pipeline-template'
CMDARGS = [
    "--training_data_uri=gs://vertex_pipeline_demo_root_hy_syd/datasets/training/processed_data-20230109041341.csv"
]

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4",
        },
        "replica_count": 1,
        "container_spec": {"image_uri": hpo_container_image_uri, "args": CMDARGS},
    }
]

metric_spec = {"val_balanced_acc": "maximize"}
parameter_spec = {"batch_size": aiplatform.hyperparameter_tuning.DiscreteParameterSpec(
             values=[32, 64], scale=None)}


aiplatform.init(
  project=project_id,
  location=data_region,
  staging_bucket=data_pipeline_root)
    
hpo_custom_job = aiplatform.CustomJob(
    display_name=display_name,
    project=project_id,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=data_pipeline_root,
    base_output_dir=data_pipeline_root
)

hp_job = aiplatform.HyperparameterTuningJob(
    display_name=display_name,
    custom_job=hpo_custom_job,
    metric_spec=metric_spec,
    parameter_spec=parameter_spec,
    max_trial_count=5,
    parallel_trial_count=2,
    search_algorithm='grid',
    project=project_id,
    location=data_region
)
print('start!')
hpo_results = hp_job.run()

start!
Creating HyperparameterTuningJob
HyperparameterTuningJob created. Resource name: projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448
To use this HyperparameterTuningJob in another session:
hpt_job = aiplatform.HyperparameterTuningJob.get('projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448')
View HyperparameterTuningJob:
https://console.cloud.google.com/ai/platform/locations/australia-southeast1/training/6821093061792104448?project=734227425472
HyperparameterTuningJob projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448 cu

In [29]:
import hypertune
hpt = hypertune.HyperTune()
hpt.metric_path

if not os.path.exists(os.path.dirname(hpt.metric_path)):
    os.makedirs(os.path.dirname(hpt.metric_path))

In [43]:
os.path.dirname(hpt.metric_path)

'/tmp/hypertune'

In [44]:
os.listdir(os.path.dirname(hpt.metric_path))

[]

In [25]:
os.getcwd()

'/home/jupyter/vertex_hpo/vertex_pipeline/notebook'

In [19]:
import hypertune
hpt = hypertune.HyperTune()
hpt.report_hyperparameter_tuning_metric?

[0;31mSignature:[0m
[0mhpt[0m[0;34m.[0m[0mreport_hyperparameter_tuning_metric[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mhyperparameter_metric_tag[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmetric_value[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mglobal_step[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcheckpoint_path[0m[0;34m=[0m[0;34m''[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Method to report hyperparameter tuning metric.

Args:
  hyperparameter_metric_tag: The hyperparameter metric name this metric
    value is associated with. Should keep consistent with the tag
    specified in HyperparameterSpec.
  metric_value: float, the values for the hyperparameter metric to report.
  global_step: int, the global step this metric value is associated with.
  checkpoint_path: The checkpoint path which can be used to warmstart from.
[0;31mFile:[0m      /opt/conda/lib/python3.7/site-package

In [45]:
preprocess_op = load_custom_component(component_name='data_preprocess')
train_op = load_custom_component(component_name='train_model')
check_metrics_op = load_custom_component(component_name='check_model_metrics')
create_endpoint_op = load_custom_component(component_name='create_endpoint')
test_endpoint_op = load_custom_component(component_name='test_endpoint')
deploy_model_op = load_custom_component(component_name='deploy_model')
monitor_model_op = load_custom_component(component_name='monitor_model')
hpo_op = load_custom_component(component_name='hpo')

In [46]:
@dsl.pipeline(name='hpo-pipeline-template')
def pipeline(project_id: str,
             data_region: str,
             gcs_data_output_folder: str,
             input_dataset_uri: str,
             training_data_schema: str,
             data_pipeline_root: str,
             
             training_container_image_uri: str,
             hpo_container_image_uri: str,
             serving_container_image_uri: str,
             custom_job_service_account: str,
             hptune_region: str,
             hp_config_suggestions_per_request: int,
             hp_config_max_trials: int,
             
             metrics_name: str,
             metrics_threshold: float,
             
             endpoint_machine_type: str,
             endpoint_min_replica_count: int,
             endpoint_max_replica_count: int,
             endpoint_test_instances: str = "",
             
             output_model_file_name: str = 'model.h5',
             machine_type: str = "n1-standard-8",
             accelerator_count: int = 0,
             accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED',
             vpc_network: str = "",
             enable_model_monitoring: str = 'False',
            task_type: str = 'training'):

    preprocess_task = preprocess_op(
      project_id=project_id,
      data_region=data_region,
      gcs_output_folder=gcs_data_output_folder,
      gcs_output_format="CSV",
      task_type=task_type)
    
    hpo_task = hpo_op(
        project_id=project_id,
        data_region=data_region,
        data_pipeline_root=data_pipeline_root,
        hpo_container_image_uri=hpo_container_image_uri,
        custom_job_service_account=custom_job_service_account,
        input_dataset=preprocess_task.outputs['output_dataset'])
    
compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path="training_pipeline_job.json"
)

api_client = AIPlatformClient(
    project_id=project_id,
    region=pipeline_region)

pipeline_params = {
    'project_id': project_id,
    'data_region': data_region,
    'gcs_data_output_folder': gcs_data_output_folder,
    'output_model_file_name': 'model.h5',
    'input_dataset_uri': input_dataset_uri,
    'training_data_schema': training_data_schema,
    'data_pipeline_root': data_pipeline_root,
    
    'training_container_image_uri': training_container_image_uri,
    'hpo_container_image_uri': hpo_container_image_uri,
    'serving_container_image_uri': serving_container_image_uri,
    'custom_job_service_account': custom_job_service_account,
    'hptune_region':"asia-east1",
    'hp_config_suggestions_per_request': 5,
    'hp_config_max_trials': 30,
    
    'metrics_name': 'au_prc',
    'metrics_threshold': 0.4,
    
    'endpoint_machine_type': 'n1-standard-4',
    'endpoint_min_replica_count': 1,
    'endpoint_max_replica_count': 1
}

response = api_client.create_run_from_job_spec(
    job_spec_path="training_pipeline_job.json", 
    pipeline_root=pipeline_root,
    parameter_values=pipeline_params,
    enable_caching=False)

In [179]:
gcp_resources

'{"resources": [{"resourceType": "HyperparameterTuningJob", "resourceUri": "https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448"}]}'

In [181]:
json.loads(gcp_resources)

{'resources': [{'resourceType': 'HyperparameterTuningJob',
   'resourceUri': 'https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448'}]}

In [114]:
from google.cloud import aiplatform
from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources
from google.protobuf.json_format import Parse
from google.cloud.aiplatform_v1.types import study
import json

gcp_resources = {"resources": [{"resourceType": "HyperparameterTuningJob", 
                  "resourceUri": "https://australia-southeast1-aiplatform.googleapis.com/v1/projects/734227425472/locations/australia-southeast1/hyperparameterTuningJobs/6821093061792104448"}]}
gcp_resources = json.dumps(gcp_resources)

api_endpoint_suffix = '-aiplatform.googleapis.com'
gcp_resources_proto = Parse(gcp_resources, GcpResources())
gcp_resources_split = gcp_resources_proto.resources[0].resource_uri.partition(
  'projects')
resource_name = gcp_resources_split[1] + gcp_resources_split[2]
prefix_str = gcp_resources_split[0]
prefix_str = prefix_str[:prefix_str.find(api_endpoint_suffix)]
api_endpoint = prefix_str[(prefix_str.rfind('//') + 2):] + api_endpoint_suffix

client_options = {'api_endpoint': api_endpoint}
job_client = aiplatform.gapic.JobServiceClient(client_options=client_options)
response = job_client.get_hyperparameter_tuning_job(name=resource_name)

[study.Trial.to_json(trial) for trial in response.trials]

['{\n  "id": "1",\n  "state": 4,\n  "parameters": [\n    {\n      "parameterId": "batch_size",\n      "value": 32.0\n    }\n  ],\n  "finalMeasurement": {\n    "stepCount": "32",\n    "metrics": [\n      {\n        "metricId": "val_balanced_acc",\n        "value": 0.878764271736145\n      }\n    ]\n  },\n  "startTime": "2023-01-15T21:46:41.744844329Z",\n  "endTime": "2023-01-15T21:56:18Z",\n  "name": "",\n  "measurements": [],\n  "clientId": "",\n  "infeasibleReason": "",\n  "customJob": "",\n  "webAccessUris": {}\n}',
 '{\n  "id": "2",\n  "state": 4,\n  "parameters": [\n    {\n      "parameterId": "batch_size",\n      "value": 64.0\n    }\n  ],\n  "finalMeasurement": {\n    "stepCount": "32",\n    "metrics": [\n      {\n        "metricId": "val_balanced_acc",\n        "value": 0.8759955167770386\n      }\n    ]\n  },\n  "startTime": "2023-01-15T21:46:41.744937138Z",\n  "endTime": "2023-01-15T21:52:18Z",\n  "name": "",\n  "measurements": [],\n  "clientId": "",\n  "infeasibleReason": "",

In [27]:
from google.cloud import aiplatform

project_id='petcircle-science-playground'
data_region='us-central1'
training_container_image_uri='us-central1-docker.pkg.dev/petcircle-science-playground/mlops-vertex-kit/training:latest'
data_pipeline_root = 'gs://vertex_pipeline_demo_root_hy_syd/compute_root'

aiplatform.init(
  project=project_id,
  location=data_region,
  staging_bucket=data_pipeline_root)


job = aiplatform.CustomContainerTrainingJob(
  display_name='batch_prediction',
  location=data_region,
  container_uri=training_container_image_uri)