# Assignment 5.1: ML System Observability

**Student name: John Kalaiselvan**

## Setup

In [9]:
!pip install kagglehub

Collecting kagglehub
  Using cached kagglehub-0.4.3-py3-none-any.whl.metadata (40 kB)
Collecting kagglesdk<1.0,>=0.1.14 (from kagglehub)
  Using cached kagglesdk-0.1.15-py3-none-any.whl.metadata (13 kB)
Using cached kagglehub-0.4.3-py3-none-any.whl (70 kB)
Using cached kagglesdk-0.1.15-py3-none-any.whl (160 kB)
Installing collected packages: kagglesdk, kagglehub
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [kagglehub]
[1A[2KSuccessfully installed kagglehub-0.4.3 kagglesdk-0.1.15


### Import required libraries

In [1]:
%%time

from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
from time import sleep
from threading import Thread

import pandas as pd

from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer

from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig

session = Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
CPU times: user 2.64 s, sys: 412 ms, total: 3.05 s
Wall time: 5.07 s


## Region and role

In [2]:
# Get Execution role
role = get_execution_role()
print("RoleArn:", role)

region = session.boto_region_name
print("Region:", region)

RoleArn: arn:aws:iam::823460696669:role/LabRole
Region: us-east-1


### S3 bucket and prefixes

In [3]:
bucket = session.default_bucket()
print("Bucket:", bucket)

Bucket: sagemaker-us-east-1-823460696669


In [4]:
prefix = "sagemaker/ModelQualityMonitor-20260208"

## S3 prefixes
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

## Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/datacapture
Ground truth path: s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/ground_truth_data/2026-02-08-18-35-29
Report path: s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/reports


## Deploy pre-trained model with data capture enabled

### Create SageMaker Model entity

In [5]:
model_url = "s3://sagemaker-us-east-1-823460696669/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-02-01-10-14-48/xgb-2026-02-01-10-14-48/output/model.tar.gz"

model_name = f"DEMO-xgb-bcd-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

image_uri = image_uris.retrieve(framework="xgboost", version="1.7-1", region=region)

model = Model(image_uri=image_uri, model_data=model_url, role=role, sagemaker_session=session)

  model_name = f"DEMO-xgb-bcd-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"


### Deploy the model with data capture

In [6]:
endpoint_name = f"DEMO-xgb-bcd-pred-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("EndpointName =", endpoint_name)

data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)


  endpoint_name = f"DEMO-xgb-bcd-pred-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"


EndpointName = DEMO-xgb-bcd-pred-model-quality-monitor-2026-02-08-1835
------!

### Create the SageMaker Predictor object

In [7]:
from sagemaker.predictor import Predictor

predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=session, serializer=CSVSerializer()
)

## Generate a baseline for model quality performance

### Execute predictions using the validation dataset

In [10]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("uciml/breast-cancer-wisconsin-data")

print("Path to dataset files:", path)

Path to dataset files: /home/sagemaker-user/.cache/kagglehub/datasets/uciml/breast-cancer-wisconsin-data/versions/2


In [11]:
!ls "/home/sagemaker-user/.cache/kagglehub/datasets/uciml/breast-cancer-wisconsin-data/versions/2"

data.csv


In [12]:
import boto3
import os

s3 = boto3.client("s3")
dataset_prefix = "datasets/breast-cancer/raw/"

for root, _, files in os.walk(path):
    for file in files:
        local_path = os.path.join(root, file)
        s3_key = prefix + file

        s3.upload_file(local_path, bucket, s3_key)

print("Uploaded KaggleHub dataset to S3")


Uploaded KaggleHub dataset to S3


In [13]:
import pandas as pd

s3_uri = (
    "s3://sagemaker-us-east-1-823460696669/"
    "datasets/breast-cancer/raw/data.csv"
)

data = pd.read_csv(s3_uri)
print(data.shape)
print(data.columns)


(569, 33)
Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')


In [14]:
import numpy as np

np.random.seed(42)

rand_split = np.random.rand(len(data))
train_list = rand_split < 0.8
val_list   = (rand_split >= 0.8) & (rand_split < 0.9)
batch_list = rand_split >= 0.9

data_train = data[train_list].drop(["id"], axis=1)
data_val   = data[val_list].drop(["id"], axis=1)
data_batch = data[batch_list].drop(["diagnosis"], axis=1)

In [15]:
label_col = "diagnosis"

cols = [label_col] + [c for c in data_val.columns if c != label_col]
data_val_ordered = data_val[cols]

In [16]:
import os

os.makedirs("test_data", exist_ok=True)

data_val_ordered.to_csv(
    "test_data/validation.csv",
    header=False,
    index=False
)

In [25]:
import os

pred_cutoff = 0.8
validate_dataset = "validation_with_predictions.csv"

os.makedirs("test_data", exist_ok=True)

label_map = {
    "M": 1,
    "B": 0
}

label_map = {
    "M": 1,
    "B": 0
}

batch = []

with open(f"test_data/{validate_dataset}", "w") as baseline_file:
    baseline_file.write("probability,prediction,label\n")

    with open("test_data/validation.csv", "r") as f:
        for row in f:
            raw_label, input_cols = row.strip().rstrip(",").split(",", 1)

            label = label_map[raw_label]

            batch.append((label, input_cols))

            if len(batch) == 20:
                payload = "\n".join([x[1] for x in batch])

                result = predictor.predict(payload)
                if isinstance(result, bytes):
                    result = result.decode("utf-8")

                preds = result.strip().split("\n")

                for (label, _), prob in zip(batch, preds):
                    probability = float(prob)
                    prediction = 1 if probability > pred_cutoff else 0

                    baseline_file.write(
                        f"{probability},{prediction},{label}\n"
                    )

                batch = []

        # Handle leftover rows
        if batch:
            payload = "\n".join([x[1] for x in batch])

            result = predictor.predict(payload)
            if isinstance(result, bytes):
                result = result.decode("utf-8")

            preds = result.strip().split("\n")

            for (label, _), prob in zip(batch, preds):
                probability = float(prob)
                prediction = 1 if probability > pred_cutoff else 0

                baseline_file.write(
                    f"{probability},{prediction},{label}\n"
                )


### Examine the predictions from the model

In [26]:
!head test_data/validation_with_predictions.csv

probability,prediction,label
0.9856777787208557,1,1
0.9912415742874146,1,1
0.992909848690033,1,1
0.8963014483451843,1,1
0.9889357686042786,1,1
0.005776811391115189,0,0
0.33277589082717896,0,1
0.04967235028743744,0,0
0.0624757818877697,0,0


### Upload the predictions as a baseline dataset

In [27]:
baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

Baseline data uri: s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/baselining/data
Baseline results uri: s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/baselining/results


In [28]:
baseline_dataset_uri = S3Uploader.upload(f"test_data/{validate_dataset}", baseline_data_uri)
baseline_dataset_uri

's3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/baselining/data/validation_with_predictions.csv'

### Create a baselining job with validation dataset predictions

In [29]:
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import EndpointInput
from sagemaker.model_monitor.dataset_format import DatasetFormat

In [30]:
# Create the model quality monitoring object
pred_model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=session,
)

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [31]:
# Name of the model quality baseline job
baseline_job_name = f"DEMO-xgb-bcd-pred-model-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"

  baseline_job_name = f"DEMO-xgb-bcd-pred-model-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"


In [32]:
# Execute the baseline suggestion job.
job = pred_model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    problem_type="BinaryClassification",
    inference_attribute="prediction",
    probability_attribute="probability",
    ground_truth_attribute="label",
)
job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name DEMO-xgb-bcd-pred-model-baseline-job-2026-02-08-1858


...........................................................!

### Explore the results of the baselining job

In [33]:
baseline_job = pred_model_quality_monitor.latest_baselining_job

In [34]:
binary_metrics = baseline_job.baseline_statistics().body_dict["binary_classification_metrics"]
pd.json_normalize(binary_metrics).T

Unnamed: 0,0
confusion_matrix.0.0,35
confusion_matrix.0.1,0
confusion_matrix.1.0,5
confusion_matrix.1.1,22
recall.value,0.814815
recall.standard_deviation,
precision.value,1.0
precision.standard_deviation,
accuracy.value,0.919355
accuracy.standard_deviation,


In [35]:
pd.DataFrame(baseline_job.suggested_constraints().body_dict["binary_classification_constraints"]).T

Unnamed: 0,threshold,comparison_operator
recall,0.814815,LessThanThreshold
precision,1.0,LessThanThreshold
accuracy,0.919355,LessThanThreshold
true_positive_rate,0.814815,LessThanThreshold
true_negative_rate,1.0,LessThanThreshold
false_positive_rate,0.0,GreaterThanThreshold
false_negative_rate,0.185185,GreaterThanThreshold
auc,1.0,LessThanThreshold
f0_5,0.956522,LessThanThreshold
f1,0.897959,LessThanThreshold


## Setup continuous model monitoring to identify model quality drift

### Generate prediction data for Model Quality Monitoring

In [45]:
data_val = data_val.drop(columns=["Unnamed: 32"])
data_val_inputs = data_val.drop(columns=["diagnosis"])

data_val_inputs.to_csv(
    "test_data/test-dataset-input-cols.csv",
    index=False,
    header=False,
    float_format="%.6f"
)



In [46]:
with open("test_data/test-dataset-input-cols.csv") as f:
    for i, line in enumerate(f):
        cols = line.strip().split(",")
        if len(cols) != 30:
            print(f"Row {i} has {len(cols)} columns")
            break


In [47]:
def invoke_endpoint(ep_name, file_name):
    with open(file_name, "r") as f:
        i = 0
        for row in f:
            payload = row.rstrip("\n")
            response = session.sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                ContentType="text/csv",
                Body=payload,
                InferenceId=str(i),  # unique ID per row
            )["Body"].read()
            i += 1
            sleep(1)


def invoke_endpoint_forever():
    while True:
        try:
            invoke_endpoint(endpoint_name, "test_data/test-dataset-input-cols.csv")
        except session.sagemaker_runtime_client.exceptions.ValidationError:
            pass


thread = Thread(target=invoke_endpoint_forever)
thread.start()

### View captured data

In [48]:
print("Waiting for captures to show up", end="")
for _ in range(120):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")
        capture_record = json.loads(capture_file[0])
        if "inferenceId" in capture_record["eventMetadata"]:
            break
    print(".", end="", flush=True)
    sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-3:]))

Waiting for captures to show up
Found Capture Files:
s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/datacapture/DEMO-xgb-bcd-pred-model-quality-monitor-2026-02-08-1835/AllTraffic/2026/02/08/18/58-01-072-ead4b685-2e06-496b-809a-bc9b2f87734a.jsonl
 s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/datacapture/DEMO-xgb-bcd-pred-model-quality-monitor-2026-02-08-1835/AllTraffic/2026/02/08/19/25-22-572-d00823b7-77d7-4c08-9b7f-7811993d2af1.jsonl
 s3://sagemaker-us-east-1-823460696669/sagemaker/ModelQualityMonitor-20260208/datacapture/DEMO-xgb-bcd-pred-model-quality-monitor-2026-02-08-1835/AllTraffic/2026/02/08/19/30-42-025-4fa9878e-d57c-4737-8c41-310c8b3e0cf4.jsonl


In [49]:
print("\n".join(capture_file[-3:-1]))

{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"11.750000,17.560000,75.890000,422.900000,0.107300,0.097130,0.052820,0.044400,0.159800,0.066770,0.438400,1.907000,3.149000,30.660000,0.006587,0.018150,0.017370,0.013160,0.018350,0.002318,13.500000,27.980000,88.520000,552.300000,0.134900,0.185400,0.136600,0.101000,0.247800,0.077570","encoding":"CSV"},"endpointOutput":{"observedContentType":"text/csv; charset=utf-8","mode":"OUTPUT","data":"0.0086357481777668\n","encoding":"CSV"}},"eventMetadata":{"eventId":"629cf792-18d8-4066-920f-b09a9929b6af","inferenceId":"58","inferenceTime":"2026-02-08T19:31:40Z"},"eventVersion":"0"}
{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"14.270000,22.550000,93.770000,629.800000,0.103800,0.115400,0.146300,0.061390,0.192600,0.059820,0.202700,1.851000,1.895000,18.540000,0.006113,0.025830,0.046450,0.012760,0.014510,0.003756,15.290000,34.270000,104.300000,728.300000,0.138000,0.273300,

In [50]:
print(json.dumps(capture_record, indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "text/csv",
      "mode": "INPUT",
      "data": "13.710000,20.830000,90.200000,577.900000,0.118900,0.164500,0.093660,0.059850,0.219600,0.074510,0.583500,1.377000,3.856000,50.960000,0.008805,0.030290,0.024880,0.014480,0.014860,0.005412,17.060000,28.140000,110.600000,897.000000,0.165400,0.368200,0.267800,0.155600,0.319600,0.115100",
      "encoding": "CSV"
    },
    "endpointOutput": {
      "observedContentType": "text/csv; charset=utf-8",
      "mode": "OUTPUT",
      "data": "0.9856777787208557\n",
      "encoding": "CSV"
    }
  },
  "eventMetadata": {
    "eventId": "1dd212c5-fa33-4a11-8e28-3963671c5a63",
    "inferenceId": "0",
    "inferenceTime": "2026-02-08T19:30:42Z"
  },
  "eventVersion": "0"
}
