In [2]:
# Code referenced from https://sagemaker-examples.readthedocs.io/en/latest/sagemaker_model_monitor/fairness_and_explainability/SageMaker-Model-Monitor-Fairness-and-Explainability.html
import copy
import json
import random
import time
import pandas as pd

from datetime import datetime, timedelta

from sagemaker import get_execution_role, image_uris, Session
from sagemaker.clarify import (
    BiasConfig,
    DataConfig,
    ModelConfig,
    ModelPredictedLabelConfig,
    SHAPConfig,
)
from sagemaker.model import Model
from sagemaker.model_monitor import (
    BiasAnalysisConfig,
    CronExpressionGenerator,
    DataCaptureConfig,
    EndpointInput,
    ExplainabilityAnalysisConfig,
    ModelBiasMonitor,
    ModelExplainabilityMonitor,
)
from sagemaker.s3 import S3Downloader, S3Uploader

In [3]:
role = get_execution_role()
print(f"RoleArn: {role}")

sagemaker_session = Session()
sagemaker_client = sagemaker_session.sagemaker_client
sagemaker_runtime_client = sagemaker_session.sagemaker_runtime_client

region = sagemaker_session.boto_region_name
print(f"AWS region: {region}")

# A different bucket can be used, but make sure the role for this notebook has
# the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = Session().default_bucket()
print(f"Demo Bucket: {bucket}")
prefix = "sagemaker/Final_Project_Model_Monitor"
s3_key = f"s3://{bucket}/{prefix}"
print(f"S3 key: {s3_key}")

s3_capture_upload_path = f"{s3_key}/datacapture"
ground_truth_upload_path = f"{s3_key}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
s3_report_path = f"{s3_key}/reports"

print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

baseline_results_uri = f"{s3_key}/baselining"
print(f"Baseline results uri: {baseline_results_uri}")

endpoint_instance_count = 1
endpoint_instance_type = "ml.m5.large"


RoleArn: arn:aws:iam::004608622582:role/LabRole
AWS region: us-east-1
Demo Bucket: sagemaker-us-east-1-004608622582
S3 key: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901
Capture path: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/datacapture
Ground truth path: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41
Report path: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports
Baseline results uri: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselining


### Model Files and Data Files

In [4]:
base_path = "/root/AAI-540-Final-Project/"
model_file = f"{base_path}/Models/xgb_regressor_model.tar.gz"
test_dataset = f"{base_path}/Data/test_data_no_head.csv"
validation_dataset = f"{base_path}/Data/validation_data_head.csv"
dataset_type = "text/csv"

with open(validation_dataset) as f:
    headers_line = f.readline().rstrip()
all_headers = headers_line.split(",")
label_header = all_headers[0]

### Deploy model to Amazon SageMaker

In [7]:
model_name = f"Final-Project-xgb-regression-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Model name: ", model_name)
endpoint_name = f"Final-Project-xgb-regression-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Endpoint name: ", endpoint_name)

Model name:  DEMO-xgb-churn-pred-model-monitor-2024-06-10-0258
Endpoint name:  DEMO-xgb-churn-model-monitor-2024-06-10-0258


### Invoke Deployed Model

In [8]:
image_uri = image_uris.retrieve("xgboost", region, "0.90-1")
print(f"XGBoost image uri: {image_uri}")
model = Model(
    role=role,
    name=model_name,
    image_uri=image_uri,
    model_data=model_url,
    sagemaker_session=sagemaker_session,
)

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=s3_capture_upload_path,
)
print(f"Deploying model {model_name} to endpoint {endpoint_name}")
model.deploy(
    initial_instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

XGBoost image uri: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3
Deploying model DEMO-xgb-churn-pred-model-monitor-2024-06-10-0258 to endpoint DEMO-xgb-churn-model-monitor-2024-06-10-0258
------!

### View Captured Data

In [9]:
print(f"Sending test traffic to the endpoint {endpoint_name}. \nPlease wait", end="")
test_dataset_size = 0  # record the number of rows in data we're sending for inference
with open(test_dataset, "r") as f:
    for row in f:
        if test_dataset_size < 120:
            payload = row.rstrip("\n")
            response = sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                Body=payload,
                ContentType=dataset_type,
            )
            prediction = response["Body"].read()
            print(".", end="", flush=True)
            time.sleep(0.5)
        test_dataset_size += 1

print()
print("Done!")

Sending test traffic to the endpoint DEMO-xgb-churn-model-monitor-2024-06-10-0258. 
Please wait........................................................................................................................
Done!


In [10]:
print("Waiting 30 seconds for captures to show up", end="")
for _ in range(30):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        break
    print(".", end="", flush=True)
    time.sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-5:]))

Waiting 30 seconds for captures to show up
Found Capture Files:
s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/datacapture/DEMO-xgb-churn-model-monitor-2024-06-10-0258/AllTraffic/2024/06/10/03/01-44-716-c9d92938-42de-4d60-aca7-cbedcab750bc.jsonl


In [11]:
capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")[-10:-1]
print(capture_file[-1])

{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"68,0,159.5,123,240.8,93,210.3,76,11.4,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0","encoding":"CSV"},"endpointOutput":{"observedContentType":"text/csv; charset=utf-8","mode":"OUTPUT","data":"0.007001264952123165","encoding":"CSV"}},"eventMetadata":{"eventId":"1d2148fa-537e-47ca-9253-7358b22d04fb","inferenceTime":"2024-06-10T03:02:44Z"},"eventVersion":"0"}


In [12]:
print(json.dumps(json.loads(capture_file[-1]), indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "text/csv",
      "mode": "INPUT",
      "data": "68,0,159.5,123,240.8,93,210.3,76,11.4,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0",
      "encoding": "CSV"
    },
    "endpointOutput": {
      "observedContentType": "text/csv; charset=utf-8",
      "mode": "OUTPUT",
      "data": "0.007001264952123165",
      "encoding": "CSV"
    }
  },
  "eventMetadata": {
    "eventId": "1d2148fa-537e-47ca-9253-7358b22d04fb",
    "inferenceTime": "2024-06-10T03:02:44Z"
  },
  "eventVersion": "0"
}


### Creating Artifical Traffic

In [13]:
import threading


class WorkerThread(threading.Thread):
    def __init__(self, do_run, *args, **kwargs):
        super(WorkerThread, self).__init__(*args, **kwargs)
        self.__do_run = do_run
        self.__terminate_event = threading.Event()

    def terminate(self):
        self.__terminate_event.set()

    def run(self):
        while not self.__terminate_event.is_set():
            self.__do_run(self.__terminate_event)

In [14]:
def invoke_endpoint(terminate_event):
    with open(test_dataset, "r") as f:
        i = 0
        for row in f:
            payload = row.rstrip("\n")
            response = sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                ContentType="text/csv",
                Body=payload,
                InferenceId=str(i),  # unique ID per row
            )
            i += 1
            response["Body"].read()
            time.sleep(1)
            if terminate_event.is_set():
                break


# Keep invoking the endpoint with test data
invoke_endpoint_thread = WorkerThread(do_run=invoke_endpoint)
invoke_endpoint_thread.start()

### Fake Ground Truth

In [15]:
import random


def ground_truth_with_id(inference_id):
    random.seed(inference_id)  # to get consistent results
    rand = random.random()
    # format required by the merge container
    return {
        "groundTruthData": {
            "data": "1" if rand < 0.7 else "0",  # randomly generate positive labels 70% of the time
            "encoding": "CSV",
        },
        "eventMetadata": {
            "eventId": str(inference_id),
        },
        "eventVersion": "0",
    }


def upload_ground_truth(upload_time):
    records = [ground_truth_with_id(i) for i in range(test_dataset_size)]
    fake_records = [json.dumps(r) for r in records]
    data_to_upload = "\n".join(fake_records)
    target_s3_uri = f"{ground_truth_upload_path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
    print(f"Uploading {len(fake_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)

In [16]:
# Generate data for the last hour
upload_ground_truth(datetime.utcnow() - timedelta(minutes=12))

Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/02/5048.jsonl


In [17]:
# Generate data once every 15 minutes
def generate_fake_ground_truth(terminate_event):
    upload_ground_truth(datetime.utcnow())
    for _ in range(0, 15):
        time.sleep(60)
        if terminate_event.is_set():
            break


ground_truth_thread = WorkerThread(do_run=generate_fake_ground_truth)
ground_truth_thread.start()

Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/03/0248.jsonl


### Model Bias Monitor

In [18]:
model_bias_monitor = ModelBiasMonitor(
    role=role,
    sagemaker_session=sagemaker_session,
    max_runtime_in_seconds=1800,
)

In [19]:
model_bias_baselining_job_result_uri = f"{baseline_results_uri}/model_bias"
model_bias_data_config = DataConfig(
    s3_data_input_path=validation_dataset,
    s3_output_path=model_bias_baselining_job_result_uri,
    label=label_header,
    headers=all_headers,
    dataset_type=dataset_type,
)

In [20]:
model_bias_config = BiasConfig(
    label_values_or_threshold=[1],
    facet_name="Account Length",
    facet_values_or_threshold=[100],
)

In [21]:
model_predicted_label_config = ModelPredictedLabelConfig(
    probability_threshold=0.8,
)

In [22]:
model_config = ModelConfig(
    model_name=model_name,
    instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    content_type=dataset_type,
    accept_type=dataset_type,
)

### Kickoff Baseline Job

In [23]:
model_bias_monitor.suggest_baseline(
    model_config=model_config,
    data_config=model_bias_data_config,
    bias_config=model_bias_config,
    model_predicted_label_config=model_predicted_label_config,
)
print(f"ModelBiasMonitor baselining job: {model_bias_monitor.latest_baselining_job_name}")


Job Name:  baseline-suggestion-job-2024-06-10-03-02-49-291
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/baseline-suggestion-job-2024-06-10-03-02-49-291/input/dataset/validation-dataset-with-header.csv', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselining/model_bias/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselin

In [24]:
model_bias_monitor.latest_baselining_job.wait(logs=False)
model_bias_constraints = model_bias_monitor.suggested_constraints()
print()
print(f"ModelBiasMonitor suggested constraints: {model_bias_constraints.file_s3_uri}")
print(S3Downloader.read_file(model_bias_constraints.file_s3_uri))

.............................................................................................................................!
ModelBiasMonitor suggested constraints: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselining/model_bias/analysis.json
{
    "version": "1.0",
    "post_training_bias_metrics": {
        "label": "Churn",
        "facets": {
            "Account Length": [
                {
                    "value_or_threshold": "(100, 225]",
                    "metrics": [
                        {
                            "name": "AD",
                            "description": "Accuracy Difference (AD)",
                            "value": 0.03416521605801226
                        },
                        {
                            "name": "CDDPL",
                            "description": "Conditional Demographic Disparity in Predicted Labels (CDDPL)",
                            "value": null,
                         

### Schedule Model Bias Monitor

In [54]:
model_bias_analysis_config = None

if not model_bias_monitor.latest_baselining_job:
    model_bias_analysis_config = BiasAnalysisConfig(
        model_bias_config,
        headers=all_headers,
        label=label_header,
    )
model_bias_monitor.create_monitoring_schedule(
    analysis_config=model_bias_analysis_config,
    output_s3_uri=s3_report_path,
    endpoint_input=EndpointInput(
        endpoint_name=endpoint_name,
        destination="/opt/ml/processing/input/endpoint",
        start_time_offset="-PT1H",
        end_time_offset="-PT0H",
        probability_threshold_attribute=0.8,
    ),
    ground_truth_input=ground_truth_upload_path,
    schedule_cron_expression="cron(0 * ? * * *)",
)
print(f"Model bias monitoring schedule: {model_bias_monitor.monitoring_schedule_name}")
# cron(0 \d+(/12)? *|? * *|? *)/NOW

Model bias monitoring schedule: monitoring-schedule-2024-06-10-03-40-56-398


In [55]:
def wait_for_execution_to_start(model_monitor):
    print(
        "A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer)."
    )

    print("Waiting for the first execution to happen", end="")
    schedule_desc = model_monitor.describe_schedule()
    while "LastMonitoringExecutionSummary" not in schedule_desc:
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(60)
    print()
    print("Done! Execution has been created")

    print("Now waiting for execution to start", end="")
    while schedule_desc["LastMonitoringExecutionSummary"]["MonitoringExecutionStatus"] in "Pending":
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(10)

    print()
    print("Done! Execution has started")

In [56]:
wait_for_execution_to_start(model_bias_monitor)

A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer).
Waiting for the first execution to happen..........Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/03/5053.jsonl
...........
Done! Execution has been created
Now waiting for execution to start.
Done! Execution has started


In [57]:
model_bias_monitor.stop_monitoring_schedule()


Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-10-03-40-56-398


### Wait for execution to finish

In [58]:
# Waits for the schedule to have last execution in a terminal status.
def wait_for_execution_to_finish(model_monitor):
    schedule_desc = model_monitor.describe_schedule()
    execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
    if execution_summary is not None:
        print("Waiting for execution to finish", end="")
        while execution_summary["MonitoringExecutionStatus"] not in [
            "Completed",
            "CompletedWithViolations",
            "Failed",
            "Stopped",
        ]:
            print(".", end="", flush=True)
            time.sleep(60)
            schedule_desc = model_monitor.describe_schedule()
            execution_summary = schedule_desc["LastMonitoringExecutionSummary"]
        print()
        print("Done! Execution has finished")
    else:
        print("Last execution not found")

In [59]:
wait_for_execution_to_finish(model_bias_monitor)

Waiting for execution to finish.Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/04/0254.jsonl
...........
Done! Execution has finished


### Inspect Execution Results

In [60]:
schedule_desc = model_bias_monitor.describe_schedule()
execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
if execution_summary and execution_summary["MonitoringExecutionStatus"] in [
    "Completed",
    "CompletedWithViolations",
]:
    last_model_bias_monitor_execution = model_bias_monitor.list_executions()[-1]
    last_model_bias_monitor_execution_report_uri = (
        last_model_bias_monitor_execution.output.destination
    )
    print(f"Report URI: {last_model_bias_monitor_execution_report_uri}")
    last_model_bias_monitor_execution_report_files = sorted(
        S3Downloader.list(last_model_bias_monitor_execution_report_uri)
    )
    print("Found Report Files:")
    print("\n ".join(last_model_bias_monitor_execution_report_files))
else:
    last_model_bias_monitor_execution = None
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

Report URI: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-03-40-56-398/2024/06/10/04
Found Report Files:
s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-03-40-56-398/2024/06/10/04/analysis.json
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-03-40-56-398/2024/06/10/04/report.html
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-03-40-56-398/2024/06/10/04/report.ipynb
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule

In [61]:
# Check violations
if last_model_bias_monitor_execution:
    model_bias_violations = last_model_bias_monitor_execution.constraint_violations()
    if model_bias_violations:
        print(model_bias_violations.body_dict)


Could not retrieve constraints file at location 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-03-40-56-398/2024/06/10/04/constraint_violations.json'. To manually retrieve ConstraintViolations object from a given uri, use 'my_model_monitor.constraints(my_s3_uri)' or 'ConstraintViolations.from_s3_uri(my_s3_uri)'


### Model Explainability Monitor

In [62]:
model_explainability_monitor = ModelExplainabilityMonitor(
    role=role,
    sagemaker_session=sagemaker_session,
    max_runtime_in_seconds=1800,
)

In [63]:
model_explainability_baselining_job_result_uri = f"{baseline_results_uri}/model_explainability"
model_explainability_data_config = DataConfig(
    s3_data_input_path=validation_dataset,
    s3_output_path=model_explainability_baselining_job_result_uri,
    label=label_header,
    headers=all_headers,
    dataset_type=dataset_type,
)

In [64]:
test_dataframe = pd.read_csv(test_dataset, header=None)
shap_baseline = [list(test_dataframe.mean())]

shap_config = SHAPConfig(
    baseline=shap_baseline,
    num_samples=100,
    agg_method="mean_abs",
    save_local_shap_values=False,
)

### Kick Off Baseline Job

In [65]:
model_explainability_monitor.suggest_baseline(
    data_config=model_explainability_data_config,
    model_config=model_config,
    explainability_config=shap_config,
)
print(
    f"ModelExplainabilityMonitor baselining job: {model_explainability_monitor.latest_baselining_job_name}"
)


Job Name:  baseline-suggestion-job-2024-06-10-04-14-39-892
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/baseline-suggestion-job-2024-06-10-04-14-39-892/input/dataset/validation-dataset-with-header.csv', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselining/model_explainability/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-202009

In [66]:
model_explainability_monitor.latest_baselining_job.wait(logs=False)
model_explainability_constraints = model_explainability_monitor.suggested_constraints()
print()
print(
    f"ModelExplainabilityMonitor suggested constraints: {model_explainability_constraints.file_s3_uri}"
)
print(S3Downloader.read_file(model_explainability_constraints.file_s3_uri))

...Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/04/1455.jsonl
..................................................................................................................!
ModelExplainabilityMonitor suggested constraints: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/baselining/model_explainability/analysis.json
{
    "version": "1.0",
    "explanations": {
        "kernel_shap": {
            "label0": {
                "global_shap_values": {
                    "Account Length": 0.004092584426168822,
                    "VMail Message": 0.020018270866969987,
                    "Day Mins": 0.04022687379373547,
                    "Day Calls": 0.004412040670225618,
                    "Eve Mins": 0.016142587115974254,
                    "Eve Calls": 0.004468387752954899,
                    "Night Mins": 0.007921397767130557,
      

### Schedule model explainability monitor

In [68]:
model_explainability_analysis_config = None
if not model_explainability_monitor.latest_baselining_job:
    # Remove label because only features are required for the analysis
    headers_without_label_header = copy.deepcopy(all_headers)
    headers_without_label_header.remove(label_header)
    model_explainability_analysis_config = ExplainabilityAnalysisConfig(
        explainability_config=shap_config,
        model_config=model_config,
        headers=headers_without_label_header,
    )
model_explainability_monitor.create_monitoring_schedule(
    output_s3_uri=s3_report_path,
    endpoint_input=endpoint_name,
    schedule_cron_expression="cron(0 * ? * * *)",
)

In [69]:
wait_for_execution_to_start(model_explainability_monitor)

A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer).
Waiting for the first execution to happen........Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/04/5058.jsonl
............Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/05/0259.jsonl
........
Done! Execution has been created
Now waiting for execution to start.......................Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/05/1501.jsonl
...
Done! Execution has started


In [70]:
model_explainability_monitor.stop_monitoring_schedule()


Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-10-04-43-08-723


In [71]:
wait_for_execution_to_finish(model_explainability_monitor)

Waiting for execution to finish............Uploading 334 records to s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2024-06-10-02-58-41/2024/06/10/05/2702.jsonl
.
Done! Execution has finished


In [72]:
schedule_desc = model_explainability_monitor.describe_schedule()
execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
if execution_summary and execution_summary["MonitoringExecutionStatus"] in [
    "Completed",
    "CompletedWithViolations",
]:
    last_model_explainability_monitor_execution = model_explainability_monitor.list_executions()[-1]
    last_model_explainability_monitor_execution_report_uri = (
        last_model_explainability_monitor_execution.output.destination
    )
    print(f"Report URI: {last_model_explainability_monitor_execution_report_uri}")
    last_model_explainability_monitor_execution_report_files = sorted(
        S3Downloader.list(last_model_explainability_monitor_execution_report_uri)
    )
    print("Found Report Files:")
    print("\n ".join(last_model_explainability_monitor_execution_report_files))
else:
    last_model_explainability_monitor_execution = None
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

Report URI: s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-04-43-08-723/2024/06/10/05
Found Report Files:
s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-04-43-08-723/2024/06/10/05/analysis.json
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-04-43-08-723/2024/06/10/05/report.html
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-04-43-08-723/2024/06/10/05/report.ipynb
 s3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule

In [73]:
if last_model_explainability_monitor_execution:
    model_explainability_violations = (
        last_model_explainability_monitor_execution.constraint_violations()
    )
    if model_explainability_violations:
        print(model_explainability_violations.body_dict)


Could not retrieve constraints file at location 's3://sagemaker-us-east-1-004608622582/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/DEMO-xgb-churn-model-monitor-2024-06-10-0258/monitoring-schedule-2024-06-10-04-43-08-723/2024/06/10/05/constraint_violations.json'. To manually retrieve ConstraintViolations object from a given uri, use 'my_model_monitor.constraints(my_s3_uri)' or 'ConstraintViolations.from_s3_uri(my_s3_uri)'


### Clean Up

In [74]:
invoke_endpoint_thread.terminate()
ground_truth_thread.terminate()

In [75]:
from sagemaker.predictor import Predictor

predictor = Predictor(endpoint_name, sagemaker_session=sagemaker_session)
model_monitors = predictor.list_monitors()
for model_monitor in model_monitors:
    model_monitor.stop_monitoring_schedule()
    wait_for_execution_to_finish(model_monitor)
    model_monitor.delete_monitoring_schedule()


Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-10-04-43-08-723
Waiting for execution to finish
Done! Execution has finished

Deleting Monitoring Schedule with name: monitoring-schedule-2024-06-10-04-43-08-723

Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-10-03-40-56-398
Waiting for execution to finish
Done! Execution has finished

Deleting Monitoring Schedule with name: monitoring-schedule-2024-06-10-03-40-56-398


In [76]:
predictor.delete_endpoint()
predictor.delete_model()

In [2]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>