# Monitoring deployed models with Vertex Model Monitoring

In [13]:
import copy
import os
import pprint
import pandas as pd
import tensorflow as tf
import time

from google.cloud import aiplatform
from google.cloud import bigquery_datatransfer

from google.cloud.aiplatform_v1beta1.services.endpoint_service import \
    EndpointServiceClient
from google.cloud.aiplatform_v1beta1.services.job_service import \
    JobServiceClient
from google.cloud.aiplatform_v1beta1.services.prediction_service import \
    PredictionServiceClient
from google.cloud.aiplatform_v1beta1.types.io import BigQuerySource
from google.cloud.aiplatform_v1beta1.types.model_deployment_monitoring_job import (
    ModelDeploymentMonitoringJob, ModelDeploymentMonitoringObjectiveConfig,
    ModelDeploymentMonitoringScheduleConfig)
from google.cloud.aiplatform_v1beta1.types.model_monitoring import (
    ModelMonitoringAlertConfig, ModelMonitoringObjectiveConfig,
    SamplingStrategy, ThresholdConfig)
from google.cloud.aiplatform_v1beta1.types.prediction_service import \
    PredictRequest

from google.protobuf import json_format
from google.protobuf.duration_pb2 import Duration
from google.protobuf.struct_pb2 import Value

## Configure lab settings

In [14]:
PROJECT_ID = 'jk-vertex-workshop'
REGION = 'us-central1'
PREFIX = 'jkvw'

STAGING_BUCKET = f'gs://{PREFIX}-bucket'
VERTEX_SA = f'{PREFIX}-training-sa@{PROJECT_ID}.iam.gserviceaccount.com'

API_ENDPOINT = f'{REGION}-aiplatform.googleapis.com'
PREDICT_API_ENDPOINT = f'{REGION}-prediction-aiplatform.googleapis.com'

ENDPOINT_DISPLAY_NAME = f'{PREFIX} Taxi tip predictor'

BQ_DATASET_NAME = f'{PREFIX}_dataset' 
BQ_TRAIN_SPLIT_NAME = 'training'
BQ_VALID_SPLIT_NAME = 'validation'
BQ_TEST_SPLIT_NAME = 'testing'
BQ_LOCATION = 'US'

## Copy the training dataset to the lab region

This is to address an issue with Model Monitoring. Use SQL Workspace to copy the dataset 

In [31]:
BQ_DATASET_NAME = f'{PREFIX}_dataset_regional'
BQ_DATASET_NAME

'jkvw_dataset_regional'

## Test the deployed model

In [32]:
filter = f'display_name="{ENDPOINT_DISPLAY_NAME}"'

for endpoint_info in aiplatform.Endpoint.list(filter=filter):
    print(endpoint_info)
    
endpoint = aiplatform.Endpoint(endpoint_info.resource_name)

ENDPOINT_ID = endpoint.resource_name

<google.cloud.aiplatform.models.Endpoint object at 0x7fba4c161f50> 
resource name: projects/910094146258/locations/us-central1/endpoints/3098810795165745152


In [33]:
test_instances = [  
    
    {
        "dropoff_grid": ["POINT(-87.6 41.9)"],
        "euclidean": [2064.2696],
        "payment_type": ["Credit Card"],
        "pickup_grid": ["POINT(-87.6 41.9)"],
        "trip_miles": [1.37],
        "trip_day": [12],
        "trip_hour": [16],
        "trip_month": [2],
        "trip_day_of_week": [4],
        "trip_seconds": [555]
    }
]

predictions = endpoint.predict(instances=test_instances)
prob = tf.nn.sigmoid(predictions[0])
print('Probability of tip > 20%:', prob.numpy())

Probability of tip > 20%: [[0.72956824]]


## Starting monitoring jobs

### Define helper functions

In [34]:
def send_predict_request(endpoint, input):
    client_options = {"api_endpoint": PREDICT_API_ENDPOINT}
    client = PredictionServiceClient(client_options=client_options)
    params = {}
    params = json_format.ParseDict(params, Value())
    request = PredictRequest(endpoint=endpoint, parameters=params)
    inputs = [json_format.ParseDict(input, Value())]
    request.instances.extend(inputs)
    response = client.predict(request)
    return response


def list_monitoring_jobs():
    client_options = dict(api_endpoint=API_ENDPOINT)
    parent = f"projects/{PROJECT_ID}/locations/{REGION}"
    client = JobServiceClient(client_options=client_options)
    response = client.list_model_deployment_monitoring_jobs(parent=parent)
    return response

def get_monitoring_job(job):
    client_options = dict(api_endpoint=API_ENDPOINT)
    parent = f"projects/{PROJECT_ID}/locations/{REGION}"
    client = JobServiceClient(client_options=client_options)
    response = client.get_model_deployment_monitoring_job(parent=parent)
    return response

def pause_monitoring_job(job):
    client_options = dict(api_endpoint=API_ENDPOINT)
    client = JobServiceClient(client_options=client_options)
    response = client.pause_model_deployment_monitoring_job(name=job)
    print(response)


def delete_monitoring_job(job):
    client_options = dict(api_endpoint=API_ENDPOINT)
    client = JobServiceClient(client_options=client_options)
    response = client.delete_model_deployment_monitoring_job(name=job)
    print(response)


### Configure the job

#### Configure skew and drift thresholds

In [35]:
SKEW_THRESHOLDS = {
    'trip_month': 0.3,
    'trip_day': 0.3,
    'trip_day_of_week': 0.3,
    'trip_hour': 0.3,
    'trip_seconds': 0.3,
    'trip_miles': 0.3,
    'payment_type': 0.3,
    'pickup_grid': 0.3,
    'dropoff_grid': 0.3,
    'euclidean': 0.3,  
}

DIRFT_THRESHOLDS = {
    'trip_month': 0.3,
    'trip_day': 0.3,
    'trip_day_of_week': 0.3,
    'trip_hour': 0.3,
    'trip_seconds': 0.3,
    'trip_miles': 0.3,
    'payment_type': 0.3,
    'pickup_grid': 0.3,
    'dropoff_grid': 0.3,
    'euclidean': 0.3, 
}


In [36]:
skew_thresholds = {feature: ThresholdConfig(value=float(value)) for feature, value in SKEW_THRESHOLDS.items()}
skew_config = ModelMonitoringObjectiveConfig.TrainingPredictionSkewDetectionConfig(
    skew_thresholds=skew_thresholds
)


drift_thresholds = {feature: ThresholdConfig(value=float(value)) for feature, value in DIRFT_THRESHOLDS.items()}
drift_config = ModelMonitoringObjectiveConfig.PredictionDriftDetectionConfig(
    drift_thresholds=drift_thresholds
)

#### Configure training dataset source location
This is used for schema generation

In [37]:
TARGET = 'tip_bin'
BQ_TRAINING_DATA = f'bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_TRAIN_SPLIT_NAME}'

training_dataset = ModelMonitoringObjectiveConfig.TrainingDataset(target_field=TARGET)
training_dataset.bigquery_source = BigQuerySource(input_uri=BQ_TRAINING_DATA)

#### Configure model monitoring object

In [38]:
objective_config = ModelMonitoringObjectiveConfig(
    training_dataset=training_dataset,
    training_prediction_skew_detection_config=skew_config,
    prediction_drift_detection_config=drift_config,
)
objective_template = ModelDeploymentMonitoringObjectiveConfig(
    objective_config=objective_config
)

#### Get all deployed model ids on the monitored endpoint

In [39]:
client = EndpointServiceClient(client_options=dict(api_endpoint=API_ENDPOINT))
response = client.get_endpoint(name=ENDPOINT_ID)
model_ids = []
for model in response.deployed_models:
    model_ids.append(model.id)
model_ids

['3328564744905818112']

#### Set objectives for each deployed model

In [40]:
objective_configs = []
for model_id in model_ids:
    objective_config = copy.deepcopy(objective_template)
    objective_config.deployed_model_id = model_id
    objective_configs.append(objective_config)

#### Configure sampling strategy

In [41]:
LOG_SAMPLE_RATE = 0.8

random_sampling = SamplingStrategy.RandomSampleConfig(sample_rate=LOG_SAMPLE_RATE)
sampling_config = SamplingStrategy(random_sample_config=random_sampling)

#### Configure monitoring schedule

In [42]:
MONITOR_INTERVAL = 3600

duration = Duration(seconds=MONITOR_INTERVAL)
schedule_config = ModelDeploymentMonitoringScheduleConfig(monitor_interval=duration)

#### Configure alerting

In [43]:
NOTIFY_EMAIL = "jarekk@gcp.solutions"
emails = [NOTIFY_EMAIL]

email_config = ModelMonitoringAlertConfig.EmailAlertConfig(user_emails=emails)
alerting_config = ModelMonitoringAlertConfig(email_alert_config=email_config)

#### Create a monitoring job

In [47]:
job_name = "TAXI_MONITORING_{}".format(time.strftime("%Y%m%d_%H%M%S"))

predict_schema = ""
analysis_schema = ""
    
job = ModelDeploymentMonitoringJob(
    display_name=job_name,
    endpoint=ENDPOINT_ID,
    model_deployment_monitoring_objective_configs=objective_configs,
    logging_sampling_strategy=sampling_config,
    model_deployment_monitoring_schedule_config=schedule_config,
    model_monitoring_alert_config=alerting_config,
    predict_instance_schema_uri=predict_schema,
    analysis_instance_schema_uri=analysis_schema,
)
    
options = dict(api_endpoint=API_ENDPOINT)
client = JobServiceClient(client_options=options)

parent = f"projects/{PROJECT_ID}/locations/{REGION}"
response = client.create_model_deployment_monitoring_job(
    parent=parent, model_deployment_monitoring_job=job
)
    
print("Created monitoring job:")
print(response.name)
print(response.state)
job_id = response.name

Created monitoring job:
projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/8870054970385235968
JobState.JOB_STATE_PENDING


#### List monitoring jobs

In [48]:
for job in list_monitoring_jobs():
    print(job.name, job.state)

projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/8870054970385235968 JobState.JOB_STATE_PENDING
projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/298578979592339456 JobState.JOB_STATE_FAILED
projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/160093291050696704 JobState.JOB_STATE_FAILED
projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/7446917488136159232 JobState.JOB_STATE_FAILED
projects/910094146258/locations/us-central1/modelDeploymentMonitoringJobs/2892652364957745152 JobState.JOB_STATE_FAILED
