## Define Variables

In [126]:
PROJECT_ID = 'jchavezar-demo'
REGION = 'us-central1'
PIPELINE_ROOT_PATH = 'gs://vtx-root-path'
DATASET = 'demos'
DEPLOY_IMAGE = "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-5:latest"
MODEL_DIR = 'gs://vtx-models/bqmodel'

## Queries

In [127]:
### Create returning_users table
query_return = """
WITH
  firstlasttouch AS (
  SELECT
    user_pseudo_id,
    MIN(event_timestamp) AS user_first_engagement,
    MAX(event_timestamp) AS user_last_engagement
  FROM
    `firebase-public-project.analytics_153293282.events_*`
  WHERE
    event_name='user_engagement'
  GROUP BY
    user_pseudo_id )
SELECT
  user_pseudo_id,
  user_first_engagement,
  user_last_engagement,
  EXTRACT(MONTH
  FROM
    TIMESTAMP_MICROS(user_first_engagement)) AS month,
  EXTRACT(DAYOFYEAR
  FROM
    TIMESTAMP_MICROS(user_first_engagement)) AS julianday,
  EXTRACT(DAYOFWEEK
  FROM
    TIMESTAMP_MICROS(user_first_engagement)) AS dayofweek,
  #add 24 hr to user's first touch
  (user_first_engagement + 86400000000) AS ts_24hr_after_first_engagement,
  #churned = 1 if last_touch within 24 hr of app installation, else 0
IF
  (user_last_engagement < (user_first_engagement + 86400000000), 1, 0 ) AS churned,
  #bounced = 1 if last_touch within 10 min, else 0
IF
  (user_last_engagement <= (user_first_engagement + 600000000), 1, 0 ) AS bounced,
FROM
  firstlasttouch
GROUP BY
  1,
  2,
  3
"""

In [128]:
## Create user demographics table

query_demographics = """
WITH first_values AS (
      SELECT
          user_pseudo_id,
          geo.country as country,
          device.operating_system as operating_system,
          device.language as language,
          ROW_NUMBER() OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC) AS row_num
      FROM `firebase-public-project.analytics_153293282.events_*`
      WHERE event_name='user_engagement'
      )
  SELECT * EXCEPT (row_num)
  FROM first_values
  WHERE row_num = 1
"""

In [129]:
## Create user aggregate behavior

query_user_agg = """
WITH
  events_first24hr AS (
    #select user data only from first 24 hr of using the app
  SELECT
    e.*
  FROM
    `firebase-public-project.analytics_153293282.events_*` e
  JOIN
    bqmlga4.returningusers r
  ON
    e.user_pseudo_id = r.user_pseudo_id
  WHERE
    e.event_timestamp <= r.ts_24hr_after_first_engagement )
SELECT
  user_pseudo_id,
  SUM(IF(event_name = 'user_engagement', 1, 0)) AS cnt_user_engagement,
  SUM(IF(event_name = 'level_start_quickplay', 1, 0)) AS cnt_level_start_quickplay,
  SUM(IF(event_name = 'level_end_quickplay', 1, 0)) AS cnt_level_end_quickplay,
  SUM(IF(event_name = 'level_complete_quickplay', 1, 0)) AS cnt_level_complete_quickplay,
  SUM(IF(event_name = 'level_reset_quickplay', 1, 0)) AS cnt_level_reset_quickplay,
  SUM(IF(event_name = 'post_score', 1, 0)) AS cnt_post_score,
  SUM(IF(event_name = 'spend_virtual_currency', 1, 0)) AS cnt_spend_virtual_currency,
  SUM(IF(event_name = 'ad_reward', 1, 0)) AS cnt_ad_reward,
  SUM(IF(event_name = 'challenge_a_friend', 1, 0)) AS cnt_challenge_a_friend,
  SUM(IF(event_name = 'completed_5_levels', 1, 0)) AS cnt_completed_5_levels,
  SUM(IF(event_name = 'use_extra_steps', 1, 0)) AS cnt_use_extra_steps,
FROM
  events_first24hr
GROUP BY
  1
"""

In [130]:
## Join and Create Train Dataset

query_dataset_train = f"""
  SELECT
    dem.*,
    IFNULL(beh.cnt_user_engagement, 0) AS cnt_user_engagement,
    IFNULL(beh.cnt_level_start_quickplay, 0) AS cnt_level_start_quickplay,
    IFNULL(beh.cnt_level_end_quickplay, 0) AS cnt_level_end_quickplay,
    IFNULL(beh.cnt_level_complete_quickplay, 0) AS cnt_level_complete_quickplay,
    IFNULL(beh.cnt_level_reset_quickplay, 0) AS cnt_level_reset_quickplay,
    IFNULL(beh.cnt_post_score, 0) AS cnt_post_score,
    IFNULL(beh.cnt_spend_virtual_currency, 0) AS cnt_spend_virtual_currency,
    IFNULL(beh.cnt_ad_reward, 0) AS cnt_ad_reward,
    IFNULL(beh.cnt_challenge_a_friend, 0) AS cnt_challenge_a_friend,
    IFNULL(beh.cnt_completed_5_levels, 0) AS cnt_completed_5_levels,
    IFNULL(beh.cnt_use_extra_steps, 0) AS cnt_use_extra_steps,
    ret.user_first_engagement,
    ret.month,
    ret.julianday,
    ret.dayofweek,
    ret.churned
  FROM
    {DATASET}.returningusers ret
  LEFT OUTER JOIN
    {DATASET}.user_demographics dem
  ON 
    ret.user_pseudo_id = dem.user_pseudo_id
  LEFT OUTER JOIN 
    {DATASET}.user_aggregate_behavior beh
  ON
    ret.user_pseudo_id = beh.user_pseudo_id
  WHERE ret.bounced = 0
"""

In [131]:
## Model Query

create_model_query = f"""

CREATE OR REPLACE MODEL {DATASET}.churn_logreg

OPTIONS(
  MODEL_TYPE='LOGISTIC_REG',
  INPUT_LABEL_COLS=['churned'],
  ENABLE_GLOBAL_EXPLAIN=TRUE
) AS

SELECT
  *
FROM
  {DATASET}.train
  
"""

## Create Pipeline and Components

In [132]:
from kfp.v2.dsl import pipeline
from kfp.v2.components import importer_node
from google_cloud_pipeline_components.types import artifact_types
from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.v1.bigquery import BigqueryCreateModelJobOp, BigqueryQueryJobOp, BigqueryEvaluateModelJobOp, BigqueryPredictModelJobOp, BigqueryExportModelJobOp

@pipeline(name='bqml-mm')
def pipeline(
    project_id: str,
    location: str,
    artifact_uri: str
):
    returning_users_table = BigqueryQueryJobOp(
        project=project_id,
        location=location,
        query=query_return,
        job_configuration_query={
            "destination_table": {
                "project_id": PROJECT_ID,
                "dataset_id": DATASET,
                "table_id": "returningusers",
            },
            "write_disposition": "WRITE_TRUNCATE"
        }
    ).set_display_name('returningusers')
    query_demographics_table = BigqueryQueryJobOp(
        project=project_id,
        location=location,
        query=query_demographics,
        job_configuration_query={
            "destination_table": {
                "project_id": PROJECT_ID,
                "dataset_id": DATASET,
                "table_id": "user_demographics",
            "write_disposition": "WRITE_TRUNCATE"
            },
            "write_disposition": "WRITE_TRUNCATE"
        }
    )
    user_aggregate_behavior_table = BigqueryQueryJobOp(
        project=project_id,
        location=location,
        query=query_user_agg,
        job_configuration_query={
            "destination_table": {
                "project_id": PROJECT_ID,
                "dataset_id": DATASET,
                "table_id": "user_aggregate_behavior",
            },
            "write_disposition": "WRITE_TRUNCATE"        
        }
    ).set_display_name('user_aggregate_behavior')
    
    # Here we create the train dataset for BigQuery ML
    
    query_dataset_train_table = BigqueryQueryJobOp(
        project=project_id,
        location=location,
        query=query_dataset_train,
        job_configuration_query={
            "destination_table": {
                "project_id": PROJECT_ID,
                "dataset_id": DATASET,
                "table_id": "train",
            },
            "write_disposition": "WRITE_TRUNCATE"
        }
    ).set_display_name('query_dataset_train').after(returning_users_table, query_demographics_table, user_aggregate_behavior_table)
    
    # We train the model
    
    create_model_query_job = BigqueryCreateModelJobOp(
        project=project_id,
        location=location,
        query=create_model_query,
    ).after(query_dataset_train_table)
    
    # Evaluate it
    
    _ = BigqueryEvaluateModelJobOp(
        project=PROJECT_ID, location="US", model=create_model_query_job.outputs["model"]
    ).after(create_model_query_job)
    
    # Make some predictions
    
    _ = BigqueryPredictModelJobOp(
        project=project_id,
        location=location,
        model=create_model_query_job.outputs["model"],
        table_name=f"`{DATASET}.train`",
        # query_statement=f"SELECT * EXCEPT ({label}) FROM {bq_table} WHERE body_mass_g IS NOT NULL AND sex IS NOT NULL"
        job_configuration_query={
            "destinationTable": {
                "project_id": PROJECT_ID,
                "dataset_id": DATASET,
                "table_id": "results_1",
            },
            "write_disposition": "WRITE_TRUNCATE"
        },
    ).after(create_model_query_job)
    
    # Export Model to 
    
    bq_export = BigqueryExportModelJobOp(
        project=project_id,
        location=location,
        model=create_model_query_job.outputs["model"],
        model_destination_path=artifact_uri,
    ).after(create_model_query_job)

    import_unmanaged_model_task = importer_node.importer(
        artifact_uri=artifact_uri,
        artifact_class=artifact_types.UnmanagedContainerModel,
        metadata={
            "containerSpec": {
                "imageUri": DEPLOY_IMAGE,
            },
        },
    ).after(bq_export)
    
    model_upload = ModelUploadOp(
        project=project_id,
        display_name="bqml_churn_logreg",
        unmanaged_container_model=import_unmanaged_model_task.outputs["artifact"],
        #explanation_parameters=EXPLAIN_PARAMS,
        #explanation_metadata=EXPLAIN_META,
    ).after(import_unmanaged_model_task)

## Compile Pipeline

In [133]:
from kfp.v2 import compiler
import warnings
warnings.filterwarnings('ignore')

compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='bqml_mm.json')

## Run Pipeline

In [134]:
import google.cloud.aiplatform as aip

job = aip.PipelineJob(
    display_name="bqml_mm",
    template_path="bqml_mm.json",
    pipeline_root=PIPELINE_ROOT_PATH,
    parameter_values={
        'project_id': PROJECT_ID,
        'location': 'US',
        'artifact_uri': MODEL_DIR
    },
)

#job.submit()
job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/bqml-mm-20220901153758?project=569083142710
PipelineJob projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/569083142710/locations/us-central1/pipelineJobs/bqml-mm-20220901153758 current state:
PipelineStat

## Extracting Metadata from the Model

In [145]:
MODEL_PATH = "gs://mco-mm/churn"

params = {"sampled_shapley_attribution": {"path_count": 10}}
EXPLAIN_PARAMS = aiplatform.explain.ExplanationParameters(params)

builder = saved_model_metadata_builder.SavedModelMetadataBuilder(
    model_path=MODEL_PATH, outputs_to_explain=["churned_probs"]
)
EXPLAIN_META = builder.get_metadata_protobuf()

In [146]:
MODEL_DIR = "gs://mco-mm/churn"
MODEL_NAME = "churn"
IMAGE = "us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-5:latest"

model = aiplatform.Model.upload(
    display_name=MODEL_NAME,
    artifact_uri=MODEL_DIR,
    serving_container_image_uri=IMAGE,
    explanation_parameters=EXPLAIN_PARAMS,
    explanation_metadata=EXPLAIN_META,
    sync=True,
)

MODEL_ID = model.resource_name.split("/")[-1]

Creating Model
Create Model backing LRO: projects/569083142710/locations/us-central1/models/2461476681102131200/operations/2851258294792617984
Model created. Resource name: projects/569083142710/locations/us-central1/models/2461476681102131200@1
To use this Model in another session:
model = aiplatform.Model('projects/569083142710/locations/us-central1/models/2461476681102131200@1')


In [147]:
endpoint = model.deploy(machine_type="n1-standard-4")
print(f"endpoint display name: {endpoint.display_name}")
print(f"endpoint resource name: {endpoint.resource_name}")
ENDPOINT = endpoint.resource_name
ENDPOINT_ID = ENDPOINT.split("/")[-1]

Creating Endpoint
Create Endpoint backing LRO: projects/569083142710/locations/us-central1/endpoints/1640849580641746944/operations/4332942572197511168
Endpoint created. Resource name: projects/569083142710/locations/us-central1/endpoints/1640849580641746944
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/569083142710/locations/us-central1/endpoints/1640849580641746944')
Deploying model to Endpoint : projects/569083142710/locations/us-central1/endpoints/1640849580641746944
Deploy Endpoint model backing LRO: projects/569083142710/locations/us-central1/endpoints/1640849580641746944/operations/5280950293759000576
Endpoint model deployed. Resource name: projects/569083142710/locations/us-central1/endpoints/1640849580641746944
endpoint display name: churn_endpoint
endpoint resource name: projects/569083142710/locations/us-central1/endpoints/1640849580641746944


## Monitoring

In [156]:
USER_EMAIL = "jesusarguelles@google.com"
JOB_NAME = "churn"

LOG_SAMPLE_RATE = 0.8
MONITOR_INTERVAL = 3600
DATASET_BQ_URI = "bq://jchavezar-demo.demos.train"
TARGET = "churned"

DEFAULT_THRESHOLD_VALUE = 0.001

SKEW_THRESHOLDS = {
    "country": DEFAULT_THRESHOLD_VALUE,
    "cnt_user_engagement": DEFAULT_THRESHOLD_VALUE,
}
DRIFT_THRESHOLDS = {
    "country": DEFAULT_THRESHOLD_VALUE,
    "cnt_user_engagement": DEFAULT_THRESHOLD_VALUE,
}
ATTRIB_SKEW_THRESHOLDS = {
    "country": DEFAULT_THRESHOLD_VALUE,
    "cnt_user_engagement": DEFAULT_THRESHOLD_VALUE,
}
ATTRIB_DRIFT_THRESHOLDS = {
    "country": DEFAULT_THRESHOLD_VALUE,
    "cnt_user_engagement": DEFAULT_THRESHOLD_VALUE,
}

In [157]:
from google.cloud.aiplatform import model_monitoring

skew_config = model_monitoring.SkewDetectionConfig(
    data_source=DATASET_BQ_URI,
    skew_thresholds=SKEW_THRESHOLDS,
    attribute_skew_thresholds=ATTRIB_SKEW_THRESHOLDS,
    target_field=TARGET,
)

In [158]:
drift_config = model_monitoring.DriftDetectionConfig(
    drift_thresholds=DRIFT_THRESHOLDS,
    attribute_drift_thresholds=ATTRIB_DRIFT_THRESHOLDS,
)

explanation_config = model_monitoring.ExplanationConfig()
objective_config = model_monitoring.ObjectiveConfig(
    skew_config, drift_config, explanation_config
)

In [159]:
from google.cloud import aiplatform

# Create sampling configuration
random_sampling = model_monitoring.RandomSampleConfig(sample_rate=LOG_SAMPLE_RATE)

# Create schedule configuration
schedule_config = model_monitoring.ScheduleConfig(monitor_interval=MONITOR_INTERVAL)

# Create alerting configuration.
emails = [USER_EMAIL]
alerting_config = model_monitoring.EmailAlertConfig(
    user_emails=emails, enable_logging=True
)

# Create the monitoring job.
job = aiplatform.ModelDeploymentMonitoringJob.create(
    display_name=JOB_NAME,
    logging_sampling_strategy=random_sampling,
    schedule_config=schedule_config,
    alert_config=alerting_config,
    objective_configs=objective_config,
    project=PROJECT_ID,
    location=REGION,
    endpoint=endpoint,
)

Creating ModelDeploymentMonitoringJob
ModelDeploymentMonitoringJob created. Resource name: projects/569083142710/locations/us-central1/modelDeploymentMonitoringJobs/3277647735813046272
To use this ModelDeploymentMonitoringJob in another session:
mdm_job = aiplatform.ModelDeploymentMonitoringJob('projects/569083142710/locations/us-central1/modelDeploymentMonitoringJobs/3277647735813046272')
View Model Deployment Monitoring Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/model-deployment-monitoring/3277647735813046272?project=569083142710


In [161]:
DEFAULT_INPUT = {
    "cnt_ad_reward": 0,
    "cnt_challenge_a_friend": 0,
    "cnt_completed_5_levels": 1,
    "cnt_level_complete_quickplay": 3,
    "cnt_level_end_quickplay": 5,
    "cnt_level_reset_quickplay": 2,
    "cnt_level_start_quickplay": 6,
    "cnt_post_score": 34,
    "cnt_spend_virtual_currency": 0,
    "cnt_use_extra_steps": 0,
    "cnt_user_engagement": 120,
    "country": "Denmark",
    "dayofweek": 3,
    "julianday": 254,
    "language": "da-dk",
    "month": 9,
    "operating_system": "IOS",
    "user_pseudo_id": "104B0770BAE16E8B53DF330C95881893",
}

In [198]:
# Run a prediction request to generate schema, if necessary.
try:
    _ = endpoint.predict([DEFAULT_INPUT])
    print("prediction succeeded")
except Exception as e:
    print(f"prediction failed: {e}")

prediction succeeded


In [196]:
!gsutil cat gs://cloud-ai-platform-9b627e36-89f4-4b8c-bf59-1e00936392b5/model_monitoring/job-4451685361231331328/batch_prediction_output/training/2694564350097620992/batch_prediction_output/prediction-churn-2022_06_14T07_07_01_207Z/explanation.results-00006-of-00007

{"instance": {"user_pseudo_id": "434B79A24E73A32E488354A1AAD98495", "country": "United States", "operating_system": "ANDROID", "language": "en-us", "cnt_user_engagement": 46, "cnt_level_start_quickplay": 0, "cnt_level_end_quickplay": 0, "cnt_level_complete_quickplay": 0, "cnt_level_reset_quickplay": 0, "cnt_post_score": 4, "cnt_spend_virtual_currency": 0, "cnt_ad_reward": 0, "cnt_challenge_a_friend": 0, "cnt_completed_5_levels": 0, "cnt_use_extra_steps": 0, "month": 8, "julianday": 220, "dayofweek": 4}, "prediction": {"churned_values": ["1", "0"], "predicted_churned": ["1"], "churned_probs": [0.8249296206859882, 0.17507037931401181]}, "explanation": {"attributions": [{"outputName": "churned_probs", "baselineOutputValue": 0.019908302721557195, "instanceOutputValue": 0.8249296206859882, "outputIndex": [0], "featureAttributions": {"cnt_ad_reward": 1.1102230246251566e-17, "cnt_challenge_a_friend": 0.0, "cnt_completed_5_levels": 0.0, "cnt_level_complete_quickplay": -1.1102230246251566e-17, 