In [1]:
'''
This notebook is created to load, deploy, and monitor our Kmeans model. It utilizes CloudWatch alarms
and Sagemaker endpoints to accomplish these goals, as well as helper scripts as .py files. 
'''

'\nThis notebook is created to load, deploy, and monitor our Kmeans model. It utilizes CloudWatch alarms\nand Sagemaker endpoints to accomplish these goals, as well as helper scripts as .py files. \n'

In [2]:
!pip install awswrangler



In [3]:
# Need to shuffle helper script around due to peculiar issues
!mv code/inference.py inference.py

In [4]:
import os
import boto3
import shutil
import numpy as np
import pandas as pd
import time
import io
import sagemaker
import awswrangler as wr
from datetime import datetime
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer, JSONSerializer
from sagemaker.deserializers import JSONDeserializer
from inference import model_fn, input_fn, predict_fn, output_fn
from sagemaker.model_monitor import ModelQualityMonitor, CronExpressionGenerator, EndpointInput, DataCaptureConfig

# CloudWatch client (for alarms)
cw_client = boto3.client("cloudwatch", region_name="us-east-1")

bucket_name = "arxiv-project-bucket"
role = "arn:aws:iam::221082214706:role/MYLabRole"
region = "us-east-1"

sess = sagemaker.Session(boto_session=boto3.Session(region_name=region))
print("Using bucket:", bucket_name)

# S3 path for the trained model from model training notebook
model_s3_path = f"s3://{bucket_name}/models/model.tar.gz"



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


Using bucket: arxiv-project-bucket


In [5]:
# Need a suitable scikit-learn container image for inference
sklearn_image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=region,
    version="1.2-1", # Source of many errors, set version to same as in training
    py_version="py3"
)
print("Scikit-Learn container image:", sklearn_image_uri)

Scikit-Learn container image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3


In [6]:
'''
NOTE: 
In model deployment below, when definining model, I set directory to code folder within my local AWS notebook. 
I specialized the directory to include inference.py because calling the local directory where all files are hosted, 
and where inference.py was previously hosted, caused Sagemaekr to package all of those causing permission and size issues, 
crashing our endpoint deployment.
'''

'\nNOTE: \nIn model deployment below, when definining model, I set directory to code folder within my local AWS notebook. \nI specialized the directory to include inference.py because calling the local directory where all files are hosted, \nand where inference.py was previously hosted, caused Sagemaekr to package all of those causing permission and size issues, \ncrashing our endpoint deployment.\n'

In [7]:
target_dir = "code"
# Move inference.py into the code folder
shutil.move("inference.py", os.path.join(target_dir, "inference.py"))
print("Moved inference.py to the 'code' directory.")

Moved inference.py to the 'code' directory.


In [8]:
# SageMaker Model using the SKLearn container and our inference script
model = sagemaker.Model(
    model_data=model_s3_path,
    image_uri=sklearn_image_uri,
    role=role,
    entry_point="inference.py",
    source_dir="code",
    sagemaker_session=sess
)

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=f"s3://{bucket_name}/data-capture"
)

# Deploy the model as an endpoint and return a predictor object
endpoint_name = "arxiv-clustering-endpoint" + datetime.utcnow().strftime("%Y%m%d%H%M%S")

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config
)

print("Endpoint deployed:", endpoint_name)

------!Endpoint deployed: arxiv-clustering-endpoint20250304010548


In [9]:
predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess,
    serializer=JSONSerializer(), 
    deserializer=JSONDeserializer()
)

In [10]:
status = sess.describe_endpoint(endpoint_name)["EndpointStatus"]
while status != "InService":
    print("Endpoint status:", status)
    time.sleep(10)
    status = sess.describe_endpoint(endpoint_name)["EndpointStatus"]
print("Endpoint is in service!")

Endpoint is in service!


In [11]:
print("Predictor:", predictor)

Predictor: Predictor: {'endpoint_name': 'arxiv-clustering-endpoint20250304010548', 'sagemaker_session': <sagemaker.session.Session object at 0x7f978267ee00>, 'serializer': <sagemaker.base_serializers.JSONSerializer object at 0x7f97806a5750>, 'deserializer': <sagemaker.base_deserializers.JSONDeserializer object at 0x7f9780b94550>}


In [12]:
# Create a CloudWatch Logs client
logs_client = boto3.client("logs", region_name=region)

# Define the log group name based on your endpoint name
log_group = f"/aws/sagemaker/Endpoints/{endpoint_name}"

# Describe log streams in the log group
response = logs_client.describe_log_streams(logGroupName=log_group)
log_streams = response.get("logStreams", [])

if log_streams:
    log_stream_name = log_streams[0]["logStreamName"]
    events_response = logs_client.get_log_events(
        logGroupName=log_group, 
        logStreamName=log_stream_name
    )
    for event in events_response.get("events", []):
        print(event["message"])
else:
    print("No log streams found in log group:", log_group)

2025-03-04 01:08:31,834 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
2025-03-04 01:08:31,836 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
2025-03-04 01:08:31,836 INFO - sagemaker-containers - nginx config: 
worker_processes auto;
daemon off;
pid /tmp/nginx.pid;
error_log  /dev/stderr;
worker_rlimit_nofile 4096;
events {
  worker_connections 2048;
}
http {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_read_timeout 60s;
      proxy_pass http://gunicorn;
    }
    location / {
      return 404 "{}";
    }
  }
}
202

In [13]:
desc = sess.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
print("Endpoint description:", desc)

Endpoint description: {'EndpointName': 'arxiv-clustering-endpoint20250304010548', 'EndpointArn': 'arn:aws:sagemaker:us-east-1:221082214706:endpoint/arxiv-clustering-endpoint20250304010548', 'EndpointConfigName': 'arxiv-clustering-endpoint20250304010548', 'ProductionVariants': [{'VariantName': 'AllTraffic', 'DeployedImages': [{'SpecifiedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3', 'ResolvedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn@sha256:9ff74d76191b625518d00824d43608641042302d446dbb72b01417be51ec4854', 'ResolutionTime': datetime.datetime(2025, 3, 4, 1, 5, 55, 123000, tzinfo=tzlocal())}], 'CurrentWeight': 1.0, 'DesiredWeight': 1.0, 'CurrentInstanceCount': 1, 'DesiredInstanceCount': 1}], 'DataCaptureConfig': {'EnableCapture': True, 'CaptureStatus': 'Started', 'CurrentSamplingPercentage': 100, 'DestinationS3Uri': 's3://arxiv-project-bucket/data-capture'}, 'EndpointStatus': 'InService', 'CreationTime': datet

In [14]:
# We should test the (hopefully) deployed endpoint with a test input
# Create a sample input with 50 dimensional output
# Has to match the n_dim value in our first notebook in SVD
sample_input = np.random.rand(1, 50).tolist()
print("Sending sample input:", sample_input)

# Call the endpoint and predictor should invoke your inference.py code
response = predictor.predict(sample_input)
print("Endpoint response:", response)

Sending sample input: [[0.4390833519394989, 0.6401406333983879, 0.7566341858810585, 0.9720417493628968, 0.5508708528426888, 0.206584335696397, 0.28500506530350733, 0.34108946407854945, 0.17839809359803105, 0.3652364271944426, 0.7174211538274556, 0.9292912230707593, 0.7611759942129664, 0.5822065915242812, 0.023003686486261388, 0.5834347725824839, 0.7220893508009194, 0.9491631862843318, 0.9057384311553117, 0.19428960652030536, 0.10622479636593729, 0.41323025621421017, 0.5685681495961176, 0.8817279084737528, 0.35382308701497667, 0.8449841588969577, 0.8204304623841299, 0.8548246482438826, 0.07444425272495236, 0.30544665928005954, 0.21625248481673676, 0.8420845783481145, 0.9798392133992908, 0.33147000453747677, 0.6956830083464318, 0.2787275444765902, 0.2405942079688408, 0.33782404699678137, 0.6677406036320244, 0.2214544834602633, 0.9101609665680681, 0.46436093772342235, 0.11221768626027973, 0.7592675717438081, 0.45227872018909543, 0.9142770224924576, 0.7266199955867158, 0.9394816450437943, 

In [15]:
# From here we will use the test results to generate an idea of our model and a baseline for our results

In [16]:
# With Model Deployed, going to generate baseline predictions from test data and push it to S3
test_data_prefix = f"s3://{bucket_name}/processed_csv/test/"

# List all test files in our prefix
test_files = wr.s3.list_objects(path=test_data_prefix)
print("Test files:", test_files)

# Concatenate all test CSV files into a single DataFrame
df_test = pd.concat([wr.s3.read_csv(file) for file in test_files], ignore_index=True)
print("Combined test data shape:", df_test.shape)

Test files: ['s3://arxiv-project-bucket/processed_csv/test/arxiv_test_2022.csv', 's3://arxiv-project-bucket/processed_csv/test/arxiv_test_2023.csv']
Combined test data shape: (414530, 59)


In [18]:
# Need to sample because kernel dies, overloads, and crashes AWS account
df_test_sampled = df_test.sample(n=20000, random_state=39)
print("Sampled test data shape:", df_test_sampled.shape)

Sampled test data shape: (20000, 59)


In [19]:
# Extract numeric features for clustering.
# We assume that your preprocessing created SVD features with names like "svd_0", "svd_1", ..., "svd_49".
svd_cols = [col for col in df_test.columns if col.startswith("svd_")]
if not svd_cols:
    raise ValueError("No SVD columns found in test data.")
X_test = df_test_sampled[svd_cols].values
print("Test feature matrix shape:", X_test.shape)

Test feature matrix shape: (20000, 50)


In [20]:
# Set batch size to 1 to avoid overloading and crashing the kernel with 500 error

In [22]:
# Generate predictions on the test data using the deployed endpoint.
# Need to batch in or we will get a 500 error return for overloading server

def batch_predict(predictor, X, batch_size=1):
    all_predictions = []
    n = len(X)
    for i in range(0, n, batch_size):
        batch = X[i:i+batch_size]
        # If the batch is empty, break out of the loop
        if batch.size == 0:
            break
        preds = predictor.predict(batch.tolist())
        # If preds is None or not an array, skip this batch
        if preds is None:
            print(f"Batch {i//batch_size + 1} returned None, skipping.")
            continue
        preds_arr = np.atleast_1d(preds)
        # If the predictions array is empty, skip
        if preds_arr.size == 0:
            print(f"Batch {i//batch_size + 1} returned an empty prediction, skipping.")
            continue
        all_predictions.append(preds_arr)
        print(f"Processed batch {i//batch_size + 1} / {int(np.ceil(n / batch_size))}")
    if len(all_predictions) == 0:
        return np.array([])
    return np.concatenate(all_predictions)

# X_test is our test feature matrix, so input here
predictions = batch_predict(predictor, X_test, batch_size=1)
baseline_df = pd.DataFrame({"prediction": predictions.flatten()})

# Create a baseline DataFrame from the predictions.
baseline_df = pd.DataFrame({"prediction": predictions.flatten()})
# (If you have ground truth values available, you could include them as well.)

Processed batch 1 / 20000
Processed batch 2 / 20000
Processed batch 3 / 20000
Processed batch 4 / 20000
Processed batch 5 / 20000
Processed batch 6 / 20000
Processed batch 7 / 20000
Processed batch 8 / 20000
Processed batch 9 / 20000
Processed batch 10 / 20000
Processed batch 11 / 20000
Processed batch 12 / 20000
Processed batch 13 / 20000
Processed batch 14 / 20000
Processed batch 15 / 20000
Processed batch 16 / 20000
Processed batch 17 / 20000
Processed batch 18 / 20000
Processed batch 19 / 20000
Processed batch 20 / 20000
Processed batch 21 / 20000
Processed batch 22 / 20000
Processed batch 23 / 20000
Processed batch 24 / 20000
Processed batch 25 / 20000
Processed batch 26 / 20000
Processed batch 27 / 20000
Processed batch 28 / 20000
Processed batch 29 / 20000
Processed batch 30 / 20000
Processed batch 31 / 20000
Processed batch 32 / 20000
Processed batch 33 / 20000
Processed batch 34 / 20000
Processed batch 35 / 20000
Processed batch 36 / 20000
Processed batch 37 / 20000
Processed 

In [23]:
# Save the baseline predictions as CSV in-memory and upload directly to S3.
csv_buffer = io.StringIO()
baseline_df.to_csv(csv_buffer, index=False)
baseline_s3_key = "model_monitoring/baseline/baseline_test_predictions.csv"
s3_client = boto3.client('s3', region_name=region)
s3_client.put_object(Bucket=bucket_name, Key=baseline_s3_key, Body=csv_buffer.getvalue())
print(f"Baseline predictions uploaded to: s3://{bucket_name}/{baseline_s3_key}")

Baseline predictions uploaded to: s3://arxiv-project-bucket/model_monitoring/baseline/baseline_test_predictions.csv


In [24]:
#----------------------------- Model Monitoring ----------------------------

In [25]:
# Baseline dataset URIs
# For quality monitoring, the baseline predictions file (created previously)
baseline_dataset_uri = f"s3://{bucket_name}/model_monitoring/baseline/baseline_test_predictions.csv"
# For drift monitoring, we assume a baseline input file is available (created earlier) at:
baseline_input_uri = f"s3://{bucket_name}/model_monitoring/baseline/baseline_input.csv"

# S3 output location where monitoring job results (evaluation JSON files) will be saved
monitoring_output_uri = f"s3://{bucket_name}/model_monitoring/reports/"
print("Monitoring will target endpoint:", endpoint_name)

Monitoring will target endpoint: arxiv-clustering-endpoint20250304010548


In [26]:
# Define Monitoring Schedule Parameters to run every hour
schedule_expression = CronExpressionGenerator.hourly()

# Unique names for each monitoring schedule
quality_monitor_schedule_name = "arxiv-quality-monitor-" + datetime.utcnow().strftime("%Y%m%d%H%M%S")
drift_monitor_schedule_name = "arxiv-drift-monitor-" + datetime.utcnow().strftime("%Y%m%d%H%M%S")

# S3 URI for your custom evaluation scripts
custom_baseline_script_uri = f"s3://{bucket_name}/code/custom_baseline.py"
custom_drift_script_uri = f"s3://{bucket_name}/code/custom_drift.py"

print("Quality script URI:", custom_baseline_script_uri)
print("Drift script URI:", custom_drift_script_uri)

Quality script URI: s3://arxiv-project-bucket/code/custom_baseline.py
Drift script URI: s3://arxiv-project-bucket/code/custom_drift.py


In [27]:
# Create an EndpointInput Object for captured data. Model monitoring processing job will use this to find captured data from endpoint
endpoint_input = EndpointInput(
    endpoint_name=endpoint_name,
    destination="/opt/ml/processing/input_data",
    inference_attribute="prediction"
)

In [28]:
# Quality (Drift) Monitoring Schedule runs a custom_baseline.py script to compute silhouette score on validation data
model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=sess
)

quality_monitor_response = model_quality_monitor.create_monitoring_schedule(
    record_preprocessor_script=custom_baseline_script_uri,
    monitor_schedule_name=quality_monitor_schedule_name,
    endpoint_input=endpoint_input,
    output_s3_uri=monitoring_output_uri,
    problem_type="Regression",           # Silhouette score is continuous
    ground_truth_input=baseline_dataset_uri,
    schedule_cron_expression=schedule_expression
)
print("Quality monitoring schedule created:", quality_monitor_schedule_name)

Quality monitoring schedule created: arxiv-quality-monitor-20250304011445


In [29]:
# Data Drift Monitoring Schedule
# Runs the custom_drift.py script to compute drift metrics on input features and compares against our baseline
drift_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=sess
)

drift_monitor_response = drift_monitor.create_monitoring_schedule(
    record_preprocessor_script=custom_drift_script_uri,
    monitor_schedule_name=drift_monitor_schedule_name,
    endpoint_input=endpoint_input,
    output_s3_uri=monitoring_output_uri,
    problem_type="Regression", # Choose regression because drift metric is continuous
    ground_truth_input=baseline_input_uri,
    schedule_cron_expression=schedule_expression
)
print("Data drift monitoring schedule created:", drift_monitor_schedule_name)

Data drift monitoring schedule created: arxiv-drift-monitor-20250304011445


In [30]:
# CloudWatch Alarms for Endpoint Metrics
# First is an Alarm on Invocation Error Rate (if the 5XX error count rises above threshold)
error_alarm_name = "Endpoint5XXErrorAlarm"
cw_client.put_metric_alarm(
    AlarmName=error_alarm_name,
    AlarmDescription="Alarm if 5XX error count exceeds threshold",
    ActionsEnabled=True,
    MetricName="Invocation5XXErrors",
    Namespace="AWS/SageMaker/Endpoints",
    Statistic="Sum",
    Dimensions=[{"Name": "EndpointName", "Value": endpoint_name}],
    Period=300,  # Evaluate every 5 minutes
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=1,  # Trigger alarm if at least 1 error in a period
    ComparisonOperator="GreaterThanThreshold",
    TreatMissingData="notBreaching"
)
print("CloudWatch alarm created for 5XX error rate:", error_alarm_name)

# Alarm on Endpoint Latency (if average latency exceeds threshold)
latency_alarm_name = "EndpointLatencyAlarm"
cw_client.put_metric_alarm(
    AlarmName=latency_alarm_name,
    AlarmDescription="Alarm if endpoint latency exceeds threshold",
    ActionsEnabled=True,
    MetricName="Latency",
    Namespace="AWS/SageMaker/Endpoints",
    Statistic="Average",
    Dimensions=[{"Name": "EndpointName", "Value": endpoint_name}],
    Period=300,  # Evaluate every 5 minutes
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=1000,  # If avg latency exceeds 1000 milliseconds
    ComparisonOperator="GreaterThanThreshold",
    TreatMissingData="notBreaching"
)
print("CloudWatch alarm created for endpoint latency:", latency_alarm_name)

CloudWatch alarm created for 5XX error rate: Endpoint5XXErrorAlarm
CloudWatch alarm created for endpoint latency: EndpointLatencyAlarm


In [31]:
# Verify Monitoring Schedules
# Wait a bit and then describe the quality and drift schedules to verify they are created.
time.sleep(30)
quality_schedule_description = model_quality_monitor.describe_schedule()
print("Quality monitoring schedule description:")
print(quality_schedule_description)

drift_schedule_description = drift_monitor.describe_schedule()
print("Drift monitoring schedule description:")
print(drift_schedule_description)

Quality monitoring schedule description:
{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:221082214706:monitoring-schedule/arxiv-quality-monitor-20250304011445', 'MonitoringScheduleName': 'arxiv-quality-monitor-20250304011445', 'MonitoringScheduleStatus': 'Scheduled', 'MonitoringType': 'ModelQuality', 'CreationTime': datetime.datetime(2025, 3, 4, 1, 14, 51, 280000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2025, 3, 4, 1, 14, 57, 750000, tzinfo=tzlocal()), 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'}, 'MonitoringJobDefinitionName': 'model-quality-job-definition-2025-03-04-01-14-50-779', 'MonitoringType': 'ModelQuality'}, 'EndpointName': 'arxiv-clustering-endpoint20250304010548', 'ResponseMetadata': {'RequestId': '6b177f9e-6a58-4087-9fc1-0186d4733632', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6b177f9e-6a58-4087-9fc1-0186d4733632', 'content-type': 'application/x-amz-json-1.1', 'content-length': '589', 'd

In [None]:
#-------------------------------------------------------------------------------

In [None]:
# Deletion of all items listed to ensure closing of notebook and resources

In [None]:
'''
# Deletion of MOnitoring Drift Schedule
region = "us-east-1"
schedule_name = "arxiv-drift-monitor-20250228031805"

sm_client = boto3.client("sagemaker", region_name=region)
sm_client.delete_monitoring_schedule(MonitoringScheduleName=schedule_name)
print(f"Deleted monitoring schedule: {schedule_name}")
'''

In [None]:
'''
# Delition of Quality monitor
region = "us-east-1"
schedule_name = "arxiv-quality-monitor-20250228031805"

sm_client = boto3.client("sagemaker", region_name=region)
sm_client.delete_monitoring_schedule(MonitoringScheduleName=schedule_name)
print(f"Deleted monitoring schedule: {schedule_name}")
'''

In [None]:
# Deletion of endpoint
'''
sm_client = boto3.client('sagemaker', region_name=region)
# Leave variable below if used in current session, otherwise hardcode to kill because all are datetime stamped
# endpoint_name = "arxiv-clustering-endpoint20250303231924"
response = sm_client.describe_endpoint(EndpointName=endpoint_name)
print(response['EndpointStatus'])

print(f"Initiating deletion of endpoint: {endpoint_name}")
sm_client.delete_endpoint(EndpointName=endpoint_name)

# Poll until the endpoint is deleted
while True:
    try:
        response = sm_client.describe_endpoint(EndpointName=endpoint_name)
        status = response["EndpointStatus"]
        print("Endpoint status:", status)
        if status in ["InService", "Updating"]:
            time.sleep(10)
        else:
            print("Endpoint status is now", status)
            break
    except sm_client.exceptions.ClientError as e:
        # If endpoint is not found, it has been deleted.
        print("Endpoint deleted.")
        break

# Now delete the endpoint configuration
print(f"Deleting endpoint configuration: {endpoint_name}")
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
print("Endpoint configuration deleted.")
'''