# Amazon SageMaker Model - Model Runner with Holdout Data

## Section 1 - Setup <a id='setup'></a>

#### 1.1 Import necessary libraries

In [40]:
%%time

from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
from time import sleep
from threading import Thread

import io
import random
import pandas as pd

from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer

from sagemaker.model import Model
from sagemaker.model_monitor import (
    BiasAnalysisConfig,
    CronExpressionGenerator,
    DataCaptureConfig,
    EndpointInput,
    ExplainabilityAnalysisConfig,
    ModelBiasMonitor,
    ModelExplainabilityMonitor,
)

from sagemaker.clarify import (
    BiasConfig,
    DataConfig,
    ModelConfig,
    ModelPredictedLabelConfig,
    SHAPConfig,
)

from threading import Timer

CPU times: user 153 μs, sys: 10 μs, total: 163 μs
Wall time: 180 μs


#### 1.2 AWS region and IAM Role & Sagemaker Clients and Bucket Details

In [41]:
sagemaker_session = Session()
sagemaker_client = sagemaker_session.sagemaker_client
sagemaker_runtime_client = sagemaker_session.sagemaker_runtime_client

# Retrieve the default Amazon S3 bucket associated with the SageMaker session.
bucket = sagemaker_session.default_bucket()
print("Bucket:", bucket)

# Get the IAM role associated with the current SageMaker notebook or environment.
role = get_execution_role()
print("RoleArn:", role)

# Get the AWS region name for the current session.
region = boto3.Session().region_name
print("Region", region)

# Retrieve the AWS account ID of the caller using the Security Token Service (STS) client.
account_id = boto3.client("sts").get_caller_identity().get("Account")

# Create a Boto3 client for the SageMaker service, specifying the AWS region.
sm = boto3.Session().client(service_name="sagemaker", region_name=region)

# Create an S3 client
s3 = boto3.client('s3')

Bucket: sagemaker-us-east-1-084375567266
RoleArn: arn:aws:iam::084375567266:role/service-role/SageMaker-ExecutionRole-20250222T162355
Region us-east-1


In [42]:
endpoint_name = "retain-ai-xgb-endpoint"
response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
display(response)

{'EndpointName': 'retain-ai-xgb-endpoint',
 'EndpointArn': 'arn:aws:sagemaker:us-east-1:084375567266:endpoint/retain-ai-xgb-endpoint',
 'EndpointConfigName': 'retain-ai-xgb-endpoint',
 'ProductionVariants': [{'VariantName': 'AllTraffic',
   'DeployedImages': [{'SpecifiedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3',
     'ResolvedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost@sha256:4814427c3e0a6cf99e637704da3ada04219ac7cd5727ff62284153761d36d7d3',
     'ResolutionTime': datetime.datetime(2025, 2, 24, 3, 30, 46, 86000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 1,
   'DesiredInstanceCount': 1}],
 'DataCaptureConfig': {'EnableCapture': True,
  'CaptureStatus': 'Started',
  'CurrentSamplingPercentage': 100,
  'DestinationS3Uri': 's3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/data/monitoring'},
 'EndpointStatus': 'InService',
 'CreationTime': date

In [43]:
# Download the file from S3 to a local file object
houldout_file_key = "aai-540-group-3-final-project/data/holdout/holdout.csv"
response = s3.get_object(Bucket=bucket, Key=houldout_file_key)

# Read the content of the file into a pandas DataFrame
data_df = pd.read_csv(response['Body'])

# Display the DataFrame
display(data_df)

all_headers = data_df.columns.to_list()
print(all_headers)

label_header = all_headers[len(all_headers) - 1]
print(label_header)

Unnamed: 0,Employee ID,Age,Gender,Years at Company,Job Role,Monthly Income,Work-Life Balance,Job Satisfaction,Performance Rating,Number of Promotions,...,Number of Dependents,Job Level,Company Size,Company Tenure,Remote Work,Leadership Opportunities,Innovation Opportunities,Company Reputation,Employee Recognition,Attrition
0,44132,38,1,23,1,10351,0,2,1,2,...,4,0,2,54,0,0,0,3,2,1
1,67355,22,1,13,2,8012,1,0,2,0,...,1,2,1,74,0,0,0,2,0,1
2,67290,40,0,32,4,6157,1,1,1,0,...,4,1,2,62,0,0,0,3,1,0
3,25581,33,0,8,2,9281,2,0,0,2,...,0,0,1,25,0,0,0,2,1,0
4,39986,57,1,22,3,6116,3,0,0,2,...,2,1,1,38,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29794,37472,32,1,13,2,8809,1,2,0,2,...,4,1,1,26,0,0,0,2,2,1
29795,16814,45,0,16,4,9907,2,0,0,0,...,1,0,2,79,1,0,0,3,2,0
29796,15541,28,1,3,3,5238,0,0,3,0,...,1,1,1,5,0,0,0,2,0,0
29797,56431,39,1,2,1,4814,2,2,0,2,...,2,1,1,74,0,1,0,2,1,0


['Employee ID', 'Age', 'Gender', 'Years at Company', 'Job Role', 'Monthly Income', 'Work-Life Balance', 'Job Satisfaction', 'Performance Rating', 'Number of Promotions', 'Overtime', 'Distance from Home', 'Education Level', 'Marital Status', 'Number of Dependents', 'Job Level', 'Company Size', 'Company Tenure', 'Remote Work', 'Leadership Opportunities', 'Innovation Opportunities', 'Company Reputation', 'Employee Recognition', 'Attrition']
Attrition


#### 1.3 S3 bucket and prefixes

In [44]:
# Bucket prefix to store details for the monitor
prefix = "aai-540-group-3-final-project"

# Other prefixes
data_capture_prefix = f"{prefix}/data/monitoring"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/data/monitoring
Ground truth path: s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/ground_truth_data/2025-02-24-05
Report path: s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/reports


## 2. Call Endpoint with Houldout Data & Upload Ground Truth

In [45]:
import matplotlib.pyplot as plt

def print_predictions_graphically(predictions):
    """
    Print predictions graphically as dots and dashes.

    Parameters:
    - predictions (list): List of predictions (either probabilities or class labels).
    """
    for prediction in predictions:
        try:
            # Convert prediction to float if it's not already
            pred_value = float(prediction)

            # Print a dot for values >= 0.5, and a dash for values < 0.5
            if pred_value >= 0.5:
                print(".", end="", flush=True)
            else:
                print("-", end="", flush=True)

        except ValueError:
            print("Invalid prediction value:", prediction)
    
    print()  # Move to the next line after all predictions are printed

In [46]:
# Function to invoke SageMaker endpoint and track predictions
def invoke_sagemaker_endpoint(data_df=data_df, num_rows=20, endpoint_name="retain-ai-xgb-endpoint"):
    """
    Prepare data and send it to a SageMaker endpoint for prediction.
    """
    # Select random rows
    rows = random.sample(range(len(data_df)), num_rows)
    selected_data = data_df.iloc[rows]
    
    # Drop the 'Attrition' column as it's the target variable
    input_data = selected_data.drop(columns=['Attrition'])
    
    # Create a SageMaker runtime client
    runtime_client = boto3.client('sagemaker-runtime')

    # Loop through the selected rows
    predictions = []
    employee_ids = []

    for index, row in input_data.iterrows():
        row_data = row.to_frame().T  # Convert the row to a DataFrame (single-row)
        
        # Convert to CSV format without the header
        csv_buffer = io.StringIO()
        row_data.to_csv(csv_buffer, index=False, header=False)
        row_payload = csv_buffer.getvalue()  # Get the CSV payload for the row
        
        # Invoke the endpoint for the row
        response = runtime_client.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='text/csv',  # Assuming the model expects CSV input
            Body=row_payload
        )
        
        # Get the prediction from the response
        prediction = response['Body'].read().decode('utf-8')
        predictions.append(prediction)
        # Store the Employee ID for the prediction
        employee_ids.append(row['Employee ID'])
        
    return predictions, employee_ids

In [47]:
# Do some test predictions 
display(invoke_sagemaker_endpoint(data_df, 20))

(['0.8375929594039917',
  '0.37252160906791687',
  '0.2816898226737976',
  '0.9198864102363586',
  '0.5393272638320923',
  '0.9918322563171387',
  '0.04047931730747223',
  '0.6468871831893921',
  '0.05848420038819313',
  '0.6389176249504089',
  '0.024290157482028008',
  '0.5312730669975281',
  '0.3103310465812683',
  '0.5777894854545593',
  '0.26706746220588684',
  '0.4011916220188141',
  '0.12166334688663483',
  '0.13825473189353943',
  '0.9642210006713867',
  '0.4188932180404663'],
 [25601,
  19788,
  22905,
  384,
  39229,
  36264,
  51615,
  36866,
  20532,
  30905,
  11019,
  66323,
  43251,
  23209,
  60089,
  39637,
  46688,
  32762,
  19205,
  27368])

In [48]:
!aws s3 ls $s3_capture_upload_path  --recursive

2025-02-24 03:58:53      25302 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/03/57-42-336-70986326-735f-4a47-8602-dd3231b1fbb4.jsonl
2025-02-24 04:00:53      42189 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/03/59-43-813-12a87154-813d-4068-ace0-031931b14b48.jsonl
2025-02-24 04:07:23      50611 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/04/06-16-746-09429515-3bc0-4195-b04d-f3a2030d5263.jsonl
2025-02-24 04:08:23      50616 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/04/07-18-811-9fe6d466-52c3-40eb-817c-ef7a51830c10.jsonl
2025-02-24 04:09:23      50611 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/04/08-20-741-f42ecccf-bd18-4ed5-8fb1-c53f48c5a069.jsonl
2025-02-24 04:10:33      50604 aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic

In [49]:
# View Capture data
print("Waiting for captures to show up", end="")
capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
if capture_files:
    capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")
    capture_record = json.loads(capture_file[0])
    display(capture_record)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-3:]))

Waiting for captures to show up

{'captureData': {'endpointInput': {'observedContentType': 'text/csv',
   'mode': 'INPUT',
   'data': '27705,45,1,36,0,3687,2,0,0,0,0,38,1,1,2,2,2,88,0,0,0,3,0\n',
   'encoding': 'CSV'},
  'endpointOutput': {'observedContentType': 'text/csv; charset=utf-8',
   'mode': 'OUTPUT',
   'data': '0.9422573447227478',
   'encoding': 'CSV'}},
 'eventMetadata': {'eventId': '27be67c2-4e26-4dd6-a22e-497c5a610590',
  'inferenceTime': '2025-02-24T05:21:22Z'},
 'eventVersion': '0'}


Found Capture Files:
s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/05/15-10-667-af02f254-8ee0-4d7b-be1d-095f39cc10cf.jsonl
 s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/05/16-12-359-5ed74f10-0bd9-4c15-a771-7790a21e78dc.jsonl
 s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/data/monitoring/retain-ai-xgb-endpoint/AllTraffic/2025/02/24/05/21-22-886-7754714f-9b1f-498f-9255-8045bb0de022.jsonl


In [50]:
# To store predictions and their corresponding employee IDs
predictions_store = []
employee_ids_store = []

# Flag to stop the scheduler
stop_scheduler = False

# Function to invoke SageMaker endpoint and track predictions
def invoke_sagemaker_endpoint_with_schedule(data_df=data_df, num_rows=20, endpoint_name="retain-ai-xgb-endpoint"):
    if stop_scheduler:
        print("Prediction scheduler stopped.")
        return
        
    predictions, employee_ids = invoke_sagemaker_endpoint(data_df=data_df, num_rows=20, endpoint_name="retain-ai-xgb-endpoint")
    print_predictions_graphically(predictions)
    # Store predictions and IDs for later ground truth upload
    predictions_store.append(predictions)
    employee_ids_store.append(employee_ids)

    # Rescedule every 10 secs 
    Timer(10, invoke_sagemaker_endpoint_with_schedule).start()

In [51]:
# Function to upload ground truth every hour
def upload_ground_truth(upload_time=None):
    if stop_scheduler:
        print("Upload scheduler stopped.")
        return
        
    if upload_time is None:
        # Get current time for timestamping the folder in S3
        upload_time = datetime.utcnow()

    # Flatten the stored Employee IDs into a list of ground truth records
    ground_truth_records = []
    for ids in employee_ids_store:  # Only need employee_ids for actual labels
        for emp_id in ids:  # For each Employee ID
            # Fetch the actual Attrition label from data_df for the given Employee ID
            actual_attrition = data_df.loc[data_df['Employee ID'] == emp_id, 'Attrition'].values[0]

            # Now use the actual Attrition label (not the predicted one) for ground truth
            ground_truth_records.append({
                "groundTruthData": {
                    "data": str(actual_attrition),  # Use the actual Attrition value (1 or 0)
                    "encoding": "CSV",
                },
                "eventMetadata": {
                    "eventId": str(emp_id),  # Use Employee ID as the unique identifier
                },
                "eventVersion": "0",
            })

    # Convert ground truth records to JSONL (newline-delimited JSON)
    upload_records = [json.dumps(record) for record in ground_truth_records]
    data_to_upload = "\n".join(upload_records)
    
    # Upload to S3
    target_s3_uri = f"{ground_truth_upload_path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
    print(f"Uploading {len(upload_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)
   
    # Clear the stored Employee IDs after uploading
    employee_ids_store.clear()
   
    # Rescedule every 60 secs 
    Timer(60, upload_ground_truth).start()

In [52]:
# Start the prediction scheduler
def start_predictions():
    global stop_scheduler
    stop_scheduler = False  # Ensure that the scheduler is running
    invoke_sagemaker_endpoint_with_schedule()

In [53]:
def start_upload():
    global stop_scheduler
    stop_scheduler = False  # Ensure that the upload scheduler is running
    upload_ground_truth(datetime.utcnow() - timedelta(hours=1))

In [54]:
def stop():
    global stop_scheduler
    stop_scheduler = True  # Stop the upload scheduler

In [55]:
start_predictions()  # Start the prediction task every 20 seconds
start_upload()   # Start the upload task every minute

-----.-.-....----.--
Uploading 20 records to s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/ground_truth_data/2025-02-24-05/2025/02/24/04/3331.jsonl
..----.--....-.-.-.-
-------.-.--.--..-.-
-......-..------....
-.-.-.-.-.--.-.--.-.
.---..-..--.-....--.
Uploading 100 records to s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/ground_truth_data/2025-02-24-05/2025/02/24/05/3431.jsonl
...-...-...-....-.-.
...-....------...-.-
..-..--.-.---.......
--.--.....-..--.----
-.-.-..-.....-.----.
--..---.......--...-
Uploading 120 records to s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/ground_truth_data/2025-02-24-05/2025/02/24/05/3531.jsonl
....--...-.-.-..----
-.--..---...---..-..
.--..-----..--....--
--.--.--..---.--...-
-.....--....-.-...-.
-.--..--......---...
Uploading 120 records to s3://sagemaker-us-east-1-084375567266/aai-540-group-3-final-project/ground_truth_data/2025-02-24-05/2025/02/24/05/3631.jsonl
-.-.-...-.--.-..-..-
..

In [56]:
# To stop the scheduler at any point, set the flag to True
# Example: to stop the schedule, call the following line
stop()
# clear the stored Employee IDs after uploading
employee_ids_store.clear()

Prediction scheduler stopped.
Upload scheduler stopped.
