# 07 - Prediction Serving

The purpose of the notebook is to show how to use the deployed model for online and batch prediction.
The notebook covers the following tasks:
1. Test the endpoints for online prediction.
2. Use the uploaded custom model for batch prediction.
3. Run a the batch prediction pipeline using `Vertex Pipelines`.

## Setup

### Import libraries

In [1]:
import os
from datetime import datetime
import tensorflow as tf

from google.cloud import aiplatform as vertex_ai

2022-09-11 00:45:32.402357: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-11 00:45:32.402397: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


### Setup Google Cloud project

In [2]:
PROJECT = 'mwpmltr' # Change to your project id.
REGION = 'us-central1' # Change to your region.
BUCKET = 'gcp-certification-chicago-taxi-demo' # Change to your bucket name.

if PROJECT == "" or PROJECT is None or PROJECT == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT = shell_output[0]
    
if BUCKET == "" or BUCKET is None or BUCKET == "[your-bucket-name]":
    # Get your bucket name to GCP project id
    BUCKET = PROJECT
    # Try to create the bucket if it doesn't exists
    ! gsutil mb -l $REGION gs://$BUCKET
    print("")
    
print("Project ID:", PROJECT)
print("Region:", REGION)
print("Bucket name:", BUCKET)

Project ID: mwpmltr
Region: us-central1
Bucket name: gcp-certification-chicago-taxi-demo


### Set configurations

In [3]:
VERSION = 'v01'
DATASET_DISPLAY_NAME = 'chicago-taxi-tips'
MODEL_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier-{VERSION}'
ENDPOINT_DISPLAY_NAME = f'{DATASET_DISPLAY_NAME}-classifier'

SERVE_BQ_DATASET_NAME = 'playground_central' # Change to your serving BigQuery dataset name.
SERVE_BQ_TABLE_NAME = 'chicago_taxitrips_prep' # Change to your serving BigQuery table name.

## 1. Making Online Predicitons


In [4]:
vertex_ai.init(
    project=PROJECT,
    location=REGION,
    staging_bucket=BUCKET
)

endpoint_name = vertex_ai.Endpoint.list(
    filter=f'display_name={ENDPOINT_DISPLAY_NAME}', 
    order_by="update_time")[-1].gca_resource.name

endpoint = vertex_ai.Endpoint(endpoint_name)

In [5]:
test_instances = [  
    {
        "dropoff_grid": ["POINT(-87.6 41.9)"],
        "euclidean": [2064.2696],
        "loc_cross": [""],
        "payment_type": ["Credit Card"],
        "pickup_grid": ["POINT(-87.6 41.9)"],
        "trip_miles": [1.37],
        "trip_day": [12],
        "trip_hour": [16],
        "trip_month": [2],
        "trip_day_of_week": [4],
        "trip_seconds": [555]
    }
]

In [6]:
predictions = endpoint.predict(test_instances).predictions

for prediction in predictions:
    print(prediction)

{'classes': ['tip<20%', 'tip>=20%'], 'scores': [0.164241076, 0.835758924]}


In [7]:
# explanations = endpoint.explain(test_instances).explanations

# for explanation in explanations:
#     print(explanation)

## 2. Batch Prediction

In [8]:
WORKSPACE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
SERVING_DATA_DIR = os.path.join(WORKSPACE, 'serving_data')
SERVING_INPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'input_data')
SERVING_OUTPUT_DATA_DIR = os.path.join(SERVING_DATA_DIR, 'output_predictions')

In [9]:
print(f"WORKSPACE = {WORKSPACE}")
print(f"SERVING_DATA_DIR = {SERVING_DATA_DIR}")
print(f"SERVING_INPUT_DATA_DIR = {SERVING_INPUT_DATA_DIR}")
print(f"SERVING_OUTPUT_DATA_DIR = {SERVING_OUTPUT_DATA_DIR}")

WORKSPACE = gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/
SERVING_DATA_DIR = gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/serving_data
SERVING_INPUT_DATA_DIR = gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/serving_data/input_data
SERVING_OUTPUT_DATA_DIR = gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/serving_data/output_predictions


In [10]:
if tf.io.gfile.exists(SERVING_DATA_DIR):
    print("Removing previous serving data...")
    tf.io.gfile.rmtree(SERVING_DATA_DIR)
    
print("Creating serving data directory...")
tf.io.gfile.mkdir(SERVING_DATA_DIR)
print("Serving data directory is ready.")

Removing previous serving data...
Creating serving data directory...
Serving data directory is ready.


### Extract serving data to Cloud Storage as JSONL

In [11]:
from src.common import datasource_utils
from src.preprocessing import etl

In [12]:
# LIMIT = 10000 # <- 25 min
LIMIT = 100 

sql_query = datasource_utils.get_serving_source_query(
    bq_dataset_name=SERVE_BQ_DATASET_NAME, 
    bq_table_name=SERVE_BQ_TABLE_NAME,
    limit=LIMIT
)

print(sql_query)


    SELECT 
        IF(trip_month IS NULL, -1, trip_month) trip_month,
        IF(trip_day IS NULL, -1, trip_day) trip_day,
        IF(trip_day_of_week IS NULL, -1, trip_day_of_week) trip_day_of_week,
        IF(trip_hour IS NULL, -1, trip_hour) trip_hour,
        IF(trip_seconds IS NULL, -1, trip_seconds) trip_seconds,
        IF(trip_miles IS NULL, -1, trip_miles) trip_miles,
        IF(payment_type IS NULL, 'NA', payment_type) payment_type,
        IF(pickup_grid IS NULL, 'NA', pickup_grid) pickup_grid,
        IF(dropoff_grid IS NULL, 'NA', dropoff_grid) dropoff_grid,
        IF(euclidean IS NULL, -1, euclidean) euclidean,
        IF(loc_cross IS NULL, 'NA', loc_cross) loc_cross
    FROM playground_central.chicago_taxitrips_prep 
    LIMIT 100


In [13]:
job_name = f"extract-{DATASET_DISPLAY_NAME}-serving-{datetime.now().strftime('%Y%m%d%H%M%S')}"

args = {
    'job_name': job_name,
    #'runner': 'DataflowRunner',
    'sql_query': sql_query,
    'exported_data_prefix': os.path.join(SERVING_INPUT_DATA_DIR, "data-"),
    'temporary_dir': os.path.join(WORKSPACE, 'tmp'),
    'gcs_location': os.path.join(WORKSPACE, 'bq_tmp'),
    'project': PROJECT,
    'region': REGION,
    'setup_file': './setup.py'
}

In [14]:
tf.get_logger().setLevel('ERROR')

print("Data extraction started...")
etl.run_extract_pipeline(args)
print("Data extraction completed.")

Data extraction started...


  temp_location = pcoll.pipeline.options.view_as(


Data extraction completed.


In [15]:
!gsutil ls {SERVING_INPUT_DATA_DIR}

gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/serving_data/input_data/data--00000-of-00001.jsonl


### Submit the batch prediction job

In [16]:
model_name =  vertex_ai.Model.list(
    filter=f'display_name={MODEL_DISPLAY_NAME}',
    order_by="update_time")[-1].gca_resource.name

In [17]:
model_name

'projects/55590906972/locations/us-central1/models/5832755253723791360'

In [18]:
job_resources =  {
    "machine_type": 'n1-standard-2',
    #'accelerator_count': 1,
    #'accelerator_type': 'NVIDIA_TESLA_T4'
    "starting_replica_count": 1,
    "max_replica_count": 10,
}

job_display_name = f"{MODEL_DISPLAY_NAME}-prediction-job-{datetime.now().strftime('%Y%m%d%H%M%S')}"

vertex_ai.BatchPredictionJob.create(
    job_display_name=job_display_name,
    model_name=model_name,
    gcs_source=SERVING_INPUT_DATA_DIR + '/*.jsonl',
    gcs_destination_prefix=SERVING_OUTPUT_DATA_DIR,
    instances_format='jsonl',
    predictions_format='jsonl',
    sync=True,
    **job_resources,
)

Creating BatchPredictionJob


INFO:google.cloud.aiplatform.jobs:Creating BatchPredictionJob


BatchPredictionJob created. Resource name: projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob created. Resource name: projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264


To use this BatchPredictionJob in another session:


INFO:google.cloud.aiplatform.jobs:To use this BatchPredictionJob in another session:


bpj = aiplatform.BatchPredictionJob('projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264')


INFO:google.cloud.aiplatform.jobs:bpj = aiplatform.BatchPredictionJob('projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264')


View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/1394945179479179264?project=55590906972


INFO:google.cloud.aiplatform.jobs:View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/1394945179479179264?project=55590906972


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_RUNNING


BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_SUCCEEDED


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264 current state:
JobState.JOB_STATE_SUCCEEDED


BatchPredictionJob run completed. Resource name: projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264


INFO:google.cloud.aiplatform.jobs:BatchPredictionJob run completed. Resource name: projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264


<google.cloud.aiplatform.jobs.BatchPredictionJob object at 0x7f529c5ab250> 
resource name: projects/55590906972/locations/us-central1/batchPredictionJobs/1394945179479179264

## 3. Run the batch prediction pipeline using Vertex Pipelines

In [19]:
WORKSPACE = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}/"
ARTIFACT_STORE = os.path.join(WORKSPACE, 'tfx_artifacts')
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-predict-pipeline'

### Set the pipeline configurations for the Vertex AI run

In [20]:
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/{DATASET_DISPLAY_NAME}"
os.environ["MODEL_DISPLAY_NAME"] = MODEL_DISPLAY_NAME
os.environ["PIPELINE_NAME"] = PIPELINE_NAME
os.environ["ARTIFACT_STORE_URI"] = ARTIFACT_STORE
os.environ["BATCH_PREDICTION_BQ_DATASET_NAME"] = SERVE_BQ_DATASET_NAME
os.environ["BATCH_PREDICTION_BQ_TABLE_NAME"] = SERVE_BQ_TABLE_NAME
os.environ["SERVE_LIMIT"] = "1000"
os.environ["BEAM_RUNNER"] = "DirectRunner"
os.environ["TFX_IMAGE_URI"] = f"gcr.io/{PROJECT}/{DATASET_DISPLAY_NAME}:{VERSION}"

In [21]:
os.environ["ENABLE_CACHE"] = "1"
os.environ["SERVING_RUNTIME"] = "tf-cpu.2-8"

In [22]:
import importlib
from src.tfx_pipelines import config
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

PROJECT: mwpmltr
REGION: us-central1
GCS_LOCATION: gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips
ARTIFACT_STORE_URI: gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/tfx_artifacts
MODEL_REGISTRY_URI: gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/model_registry
DATASET_DISPLAY_NAME: chicago-taxi-tips
MODEL_DISPLAY_NAME: chicago-taxi-tips-classifier-v01
PIPELINE_NAME: chicago-taxi-tips-classifier-v01-predict-pipeline
ML_USE_COLUMN: ml_use
EXCLUDE_COLUMNS: trip_start_timestamp
TRAIN_LIMIT: 0
TEST_LIMIT: 0
SERVE_LIMIT: 1000
NUM_TRAIN_SPLITS: 4
NUM_EVAL_SPLITS: 1
ACCURACY_THRESHOLD: 0.8
USE_KFP_SA: False
TFX_IMAGE_URI: gcr.io/mwpmltr/chicago-taxi-tips:v01
BEAM_RUNNER: DirectRunner
BEAM_DIRECT_PIPELINE_ARGS: ['--project=mwpmltr', '--temp_location=gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/temp']
BEAM_DATAFLOW_PIPELINE_ARGS: ['--project=mwpmltr', '--temp_location=gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/temp', '--region=us-

### (Optional) Build the ML container image

This is the `TFX` runtime environment for the training pipeline steps.

In [23]:
!echo $TFX_IMAGE_URI

gcr.io/mwpmltr/chicago-taxi-tips:v01


In [24]:
!gcloud builds submit --tag $TFX_IMAGE_URI . --timeout=15m --machine-type=e2-highcpu-8

Creating temporary tarball archive of 59 file(s) totalling 2.2 MiB before compression.
Some files were not included in the source upload.

Check the gcloud log [/home/jupyter/.config/gcloud/logs/2022.09.11/01.09.38.077000.log] to see which files and the contents of the
default gcloudignore file used (see `$ gcloud topic gcloudignore` to learn
more).

Uploading tarball of [.] to [gs://mwpmltr_cloudbuild/source/1662858578.17596-4a0ddedfc85c45c7bba9cad927fb3ee8.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/mwpmltr/locations/global/builds/d89a6bee-2ed8-4d2f-9068-8bc06f144e98].
Logs are available at [https://console.cloud.google.com/cloud-build/builds/d89a6bee-2ed8-4d2f-9068-8bc06f144e98?project=55590906972].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "d89a6bee-2ed8-4d2f-9068-8bc06f144e98"

FETCHSOURCE
Fetching storage object: gs://mwpmltr_cloudbuild/source/1662858578.17596-4a0ddedfc85c45c7bba9cad927fb3ee8.tgz#16628585787585

### Compile pipeline

In [25]:
from src.tfx_pipelines import runner

pipeline_definition_file = f'{config.PIPELINE_NAME}.json'
pipeline_definition = runner.compile_prediction_pipeline(pipeline_definition_file)

### Submit run to Vertex Pipelines

In [26]:
from kfp.v2.google.client import AIPlatformClient

pipeline_client = AIPlatformClient(
    project_id=PROJECT, region=REGION)
                 
pipeline_client.create_run_from_job_spec(
    job_spec_path=pipeline_definition_file
)



{'name': 'projects/55590906972/locations/us-central1/pipelineJobs/chicago-taxi-tips-classifier-v01-predict-pipeline-20220911011349',
 'displayName': 'chicago-taxi-tips-classifier-v01-predict-pipeline-20220911011349',
 'createTime': '2022-09-11T01:13:49.987038Z',
 'updateTime': '2022-09-11T01:13:49.987038Z',
 'pipelineSpec': {'deploymentConfig': {'@type': 'type.googleapis.com/ml_pipelines.PipelineDeploymentConfig',
   'executors': {'bigquery_data_gen_executor': {'container': {'image': 'gcr.io/mwpmltr/chicago-taxi-tips:v01',
      'command': ['python',
       '-m',
       'tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor'],
      'args': ['--executor_class_path',
       'src.tfx_pipelines.components.bigquery_data_gen_Executor',
       '--json_serialized_invocation_args',
       '{{$}}',
       '--project=mwpmltr',
       '--temp_location=gs://gcp-certification-chicago-taxi-demo/chicago-taxi-tips/temp']}},
    'datastore_prediction_writer_executor': {'container': {'image':