### Imports

In [None]:
%pip install --upgrade boto3

In [None]:
import copy
import time
import pandas as pd
import threading

from datetime import datetime

from sagemaker import get_execution_role, image_uris, Session
from sagemaker.clarify import (
    DataConfig,
    ModelConfig,
    SHAPConfig,
)
from sagemaker.model import Model
from sagemaker.model_monitor import (
    CronExpressionGenerator,
    DataCaptureConfig,
    ExplainabilityAnalysisConfig,
    ModelExplainabilityMonitor,
)
from sagemaker.predictor import Predictor
from sagemaker.s3 import S3Downloader, S3Uploader

### Basic Configuration

In [None]:
role = get_execution_role()
print(f"Execution Role: {role}")

sagemaker_session = Session()
sagemaker_client = sagemaker_session.sagemaker_client
sagemaker_runtime_client = sagemaker_session.sagemaker_runtime_client

region = sagemaker_session.boto_region_name
print(f"AWS region: {region}")

# A different bucket can be used, but make sure the role for this notebook has
# the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = Session().default_bucket()
print(f"Demo Bucket: {bucket}")
prefix = "sagemaker/shap-observability-promo-planning"
s3_key = f"s3://{bucket}/{prefix}"
print(f"S3 key: {s3_key}")

s3_capture_upload_path = f"{s3_key}/datacapture"
s3_report_path = f"{s3_key}/reports"

print(f"Capture path: {s3_capture_upload_path}")
print(f"Report path: {s3_report_path}")

baseline_results_uri = f"{s3_key}/baselining"
print(f"Baseline results uri: {baseline_results_uri}")

endpoint_instance_count = 1
endpoint_instance_type = "ml.m5.large"
schedule_expression = CronExpressionGenerator.hourly()

### Test bucket connectivity

In [None]:
# Upload a test file
test_file = 'upload-test-file.txt'
with open(test_file, 'w') as f:
    f.write('Hello world!\n')

S3Uploader.upload(test_file, f"s3://{bucket}/test_upload")
print("Success! We are all set to proceed.")

### Model and Data Preparation

In [None]:
s3_data_uri = 's3://adp-rnd-ml-datasets/promotion-planning/validation/data.csv'
s3_model_uri = 's3://adp-rnd-ml-models/promotion-planning/model/promotion-planning-train-job-2023-01-31-084806/output/model.tar.gz'

In [None]:
dataset_type = 'text/csv'

model_dir = 'model'
model_file = f'{model_dir}/model.tar.gz'
S3Downloader.download(s3_model_uri, model_dir)


dataset_dir = 'data'
S3Downloader.download(s3_data_uri, dataset_dir)

In [None]:
df = pd.read_csv(f'{dataset_dir}/data.csv')
print('SHAPE:', df.shape)

all_headers = df.columns.tolist()
label_header = all_headers[0]
all_headers[:10]

In [None]:
fraction = 0.01
test_idx = df.sample(frac=fraction).index
test_data = df[df.index.isin(test_idx)]
print('Test shape:', test_data.shape)
test_dataset_dir = 'test'
test_dataset = f'{test_dataset_dir}/test.csv'
test_data.drop(label_header, axis=1).to_csv(test_dataset, index=False, header=False)


val_data = df[~df.index.isin(test_idx)].sample(frac=fraction)
print('Validation shape:', val_data.shape)
validation_dataset_dir = 'validation'
validation_dataset = f'{validation_dataset_dir}/validation.csv'
val_data.to_csv(validation_dataset, index=False, header=True)

In [None]:
model_url = S3Uploader.upload(model_file, s3_key)
print(f"Model file has been uploaded to {model_url}")

## Create endpoint

In [None]:
model_name = f"shap-observability-promo-planning-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Model name: ", model_name)
endpoint_name = f"shap-observability-promo-planning-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Endpoint name: ", endpoint_name)

In [None]:
image_uri = image_uris.retrieve("xgboost", region, '0.90-1')
print(f"XGBoost image uri: {image_uri}")
model = Model(
    role=role,
    name=model_name,
    image_uri=image_uri,
    model_data=model_url,
    sagemaker_session=sagemaker_session,
)

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=s3_capture_upload_path,
)
print(f"Deploying model {model_name} to endpoint {endpoint_name}")
model.deploy(
    initial_instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

### Set up endpoint invokations

In [None]:

class WorkerThread(threading.Thread):
    def __init__(self, do_run, *args, **kwargs):
        super(WorkerThread, self).__init__(*args, **kwargs)
        self.__do_run = do_run
        self.__terminate_event = threading.Event()

    def terminate(self):
        self.__terminate_event.set()

    def run(self):
        while not self.__terminate_event.is_set():
            self.__do_run(self.__terminate_event)


def invoke_endpoint(terminate_event):
    with open(test_dataset, "r") as f:
        i = 0
        for row in f:
            payload = row.rstrip("\n")
            response = sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                ContentType="text/csv",
                Body=payload,
                InferenceId=str(i),  # unique ID per row
            )
            i += 1
            response["Body"].read()
            time.sleep(1)
            if terminate_event.is_set():
                break


# Keep invoking the endpoint with test data
invoke_endpoint_thread = WorkerThread(do_run=invoke_endpoint)
invoke_endpoint_thread.start()

### Set up monitoring job

In [None]:
model_config = ModelConfig(
    model_name=model_name,
    instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    content_type=dataset_type,
    accept_type=dataset_type,
)



model_explainability_monitor = ModelExplainabilityMonitor(
    role=role,
    sagemaker_session=sagemaker_session,
    max_runtime_in_seconds=1800,
)


model_explainability_baselining_job_result_uri = f"{baseline_results_uri}/model_explainability"
print(f'Explainability baseline s3 uri: {model_explainability_baselining_job_result_uri}')
model_explainability_data_config = DataConfig(
    s3_data_input_path=validation_dataset,
    s3_output_path=model_explainability_baselining_job_result_uri,
    label=label_header,
    headers=all_headers,
    dataset_type=dataset_type,
)

### Create Shap baseline

In [None]:

test_dataframe = pd.read_csv(test_dataset, header=None)
shap_baseline = test_dataframe.sample(frac=fraction).values.tolist()


shap_config = SHAPConfig(
    baseline=shap_baseline,
    num_samples=50,
    agg_method="mean_abs",
    save_local_shap_values=False,
)

model_explainability_monitor.suggest_baseline(
    data_config=model_explainability_data_config,
    model_config=model_config,
    explainability_config=shap_config,
)
latest_baselining_job_name = model_explainability_monitor.latest_baselining_job_name
print(f"ModelExplainabilityMonitor baselining job: {latest_baselining_job_name}")


#### Run explainability baselining job

In [None]:

model_explainability_monitor.latest_baselining_job.wait(logs=False)
model_explainability_constraints = model_explainability_monitor.suggested_constraints()
print()
print(
    f"ModelExplainabilityMonitor suggested constraints: {model_explainability_constraints.file_s3_uri}"
)
print(S3Downloader.read_file(model_explainability_constraints.file_s3_uri))

model_explainability_analysis_config = None
if not model_explainability_monitor.latest_baselining_job:
    # Remove label because only features are required for the analysis
    headers_without_label_header = copy.deepcopy(all_headers)
    headers_without_label_header.remove(label_header)
    model_explainability_analysis_config = ExplainabilityAnalysisConfig(
        explainability_config=shap_config,
        model_config=model_config,
        headers=headers_without_label_header,
    ) 
    

### Create Monitoring schedule

In [None]:
def wait_for_execution_to_start(model_monitor):
    print(
        "A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer)."
    )

    print("Waiting for the first execution to happen", end="")
    schedule_desc = model_monitor.describe_schedule()
    while "LastMonitoringExecutionSummary" not in schedule_desc:
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(60)
    print()
    print("Done! Execution has been created")

    print("Now waiting for execution to start", end="")
    while schedule_desc["LastMonitoringExecutionSummary"]["MonitoringExecutionStatus"] in "Pending":
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(10)

    print()
    print("Done! Execution has started")



# Waits for the schedule to have last execution in a terminal status.
def wait_for_execution_to_finish(model_monitor):
    schedule_desc = model_monitor.describe_schedule()
    execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
    if execution_summary is not None:
        print("Waiting for execution to finish", end="")
        while execution_summary["MonitoringExecutionStatus"] not in [
            "Completed",
            "CompletedWithViolations",
            "Failed",
            "Stopped",
        ]:
            print(".", end="", flush=True)
            time.sleep(60)
            schedule_desc = model_monitor.describe_schedule()
            execution_summary = schedule_desc["LastMonitoringExecutionSummary"]
        print()
        print("Done! Execution has finished")
    else:
        print("Last execution not found")


In [None]:
model_explainability_monitor.create_monitoring_schedule(
    output_s3_uri=s3_report_path,
    endpoint_input=endpoint_name,
    schedule_cron_expression=schedule_expression,
    analysis_config=model_explainability_analysis_config
)

In [None]:
wait_for_execution_to_start(model_explainability_monitor)

In [None]:
wait_for_execution_to_finish(model_explainability_monitor)

In [None]:

schedule_desc = model_explainability_monitor.describe_schedule()
execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
if execution_summary and execution_summary["MonitoringExecutionStatus"] in [
    "Completed",
    "CompletedWithViolations",
]:
    last_model_explainability_monitor_execution = model_explainability_monitor.list_executions()[-1]
    last_model_explainability_monitor_execution_report_uri = (
        last_model_explainability_monitor_execution.output.destination
    )
    print(f"Report URI: {last_model_explainability_monitor_execution_report_uri}")
    last_model_explainability_monitor_execution_report_files = sorted(
        S3Downloader.list(last_model_explainability_monitor_execution_report_uri)
    )
    print("Found Report Files:")
    print("\n ".join(last_model_explainability_monitor_execution_report_files))
else:
    last_model_explainability_monitor_execution = None
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

In [None]:
if last_model_explainability_monitor_execution:
    model_explainability_violations = (
        last_model_explainability_monitor_execution.constraint_violations()
    )
    if model_explainability_violations:
        print(model_explainability_violations.body_dict)

## Cleanup

In [None]:

model_explainability_monitor.stop_monitoring_schedule()
model_explainability_monitor.delete_monitoring_schedule()

In [None]:
invoke_endpoint_thread.terminate()

predictor = Predictor(endpoint_name, sagemaker_session=sagemaker_session)
predictor.delete_endpoint()
predictor.delete_model()