## Initial setup

In [None]:
import pandas as pd
import numpy as np

import google.cloud.aiplatform as aiplatform
from google.cloud.aiplatform import model_monitoring

In [None]:
PROJECT_ID = ""
REGION = "us-central1"

In [None]:
aiplatform.init(project=PROJECT_ID,
                location=REGION)

Copy-paste endpoint info from the AutoML notebook e.g. look for something like "projects/123/locations/us-central1/endpoints/456"

In [None]:
endpoint = aiplatform.Endpoint('')

## Configure alerting specification

In [None]:
USER_EMAIL = "1234@best-company.com"

alerting_config = model_monitoring.EmailAlertConfig(user_emails=[USER_EMAIL],
                                                    enable_logging=True)

In [None]:
# Monitoring Interval
MONITOR_INTERVAL = 1  # least count = 1; measured in hours

# Create schedule configuration
schedule_config = model_monitoring.ScheduleConfig(monitor_interval=MONITOR_INTERVAL)

In [None]:
SAMPLE_RATE = 0.95  # default value is 0.8 i.e. 80%

# Create sampling configuration
logging_sampling_strategy = model_monitoring.RandomSampleConfig(sample_rate=SAMPLE_RATE)

In [None]:
DRIFT_THRESHOLD_VALUE = 0.05

# Set column-wise threshold values
DRIFT_THRESHOLDS = {"age": DRIFT_THRESHOLD_VALUE,
                    "capital_gain": DRIFT_THRESHOLD_VALUE,
                    "capital_loss": DRIFT_THRESHOLD_VALUE,
                    "hours_per_week": DRIFT_THRESHOLD_VALUE,
                    "native_country": DRIFT_THRESHOLD_VALUE}

drift_config = model_monitoring.DriftDetectionConfig(drift_thresholds=DRIFT_THRESHOLDS)

In [None]:
TRAIN_DATA_GCS_URI = ""  # source of training csv file
TARGET = "income"  # label column

SKEW_THRESHOLD_VALUE = 0.05

SKEW_THRESHOLDS = {"age": SKEW_THRESHOLD_VALUE,
                   "capital_gain": SKEW_THRESHOLD_VALUE,
                   "capital_loss": SKEW_THRESHOLD_VALUE,
                   "hours_per_week": SKEW_THRESHOLD_VALUE,
                   "native_country": SKEW_THRESHOLD_VALUE}

skew_config = model_monitoring.SkewDetectionConfig(data_source=TRAIN_DATA_GCS_URI,
                                                   skew_thresholds=SKEW_THRESHOLDS,
                                                   target_field=TARGET,
                                                   data_format="csv")

In [None]:
objective_config = model_monitoring.ObjectiveConfig(
                                                    skew_detection_config=skew_config,
                                                    drift_detection_config=drift_config,
                                                   )

## Monitoring

In [None]:
monitoring_job = aiplatform.ModelDeploymentMonitoringJob.create(
                                                                display_name="income_bracket",  # for GCP console
                                                                project=PROJECT_ID,
                                                                location=REGION,
                                                                endpoint=endpoint,
                                                                logging_sampling_strategy=logging_sampling_strategy,
                                                                schedule_config=schedule_config,
                                                                alert_config=alerting_config,
                                                                objective_configs=objective_config,
                                                               )

print(monitoring_job)

Check current status

In [None]:
jobs = monitoring_job.list(filter="display_name=")  # same as in previous cell
job = jobs[0]
print(job.state)

Wait for a few minutes and check again

In [None]:
print(job.state)

Generate synthetic data for prediction requests

In [None]:
import random

instances = []

for _ in range(1000):
    new_row = {
                "age": str(random.randint(20, 65)),
                "workclass": "State-gov",
                "fnlwgt": str(random.randint(50000, 200000)),
                "education": "University",
                "education_num": str(random.randint(5, 50)),
                "marital_status": "Happily_Married",
                "occupation": "Salaried",
                "relationship": "Committed",
                "race": "Proud_American",
                "sex": "Male",
                "capital_gain": str(random.randint(0, 5000)),
                "capital_loss": str(random.randint(0, 50000)),
                "hours_per_week": str(random.randint(10, 80)),
                "native_country": "USA"
              }
    instances.append(new_row)

In [None]:
for instance in instances:
    response = endpoint.predict(instances=[instance])

prediction = response[0]

# print the prediction for the first instance
print(prediction)

In [None]:
instances = []

for _ in range(10):
    new_row = {
                "age": str(random.randint(2, 200)),
                "workclass": "State-gov",
                "fnlwgt": "100000",
                "education": "University",
                "education_num": str(random.randint(500, 550)),
                "marital_status": "Happily_Married",
                "occupation": "Salaried",
                "relationship": "Committed",
                "race": "Proud_American",
                "sex": "Male",
                "capital_gain": "500000",
                "capital_loss": "123456789",
                "hours_per_week": "256",
                "native_country": "Canada"
              }
    instances.append(new_row)

In [None]:
for instance in instances:
    response = endpoint.predict(instances=[instance])

prediction = response[0]

# print the prediction for the first instance
print(prediction)