In [1]:
# Import libraries
from datetime import datetime, timedelta, timezone
import json
import uuid
import time
from threading import Thread
import pandas as pd
from sagemaker import get_execution_role, Session
from sagemaker.s3 import S3Uploader
from time import gmtime, strftime, sleep
from tqdm.notebook import tqdm

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
# Load variables from previous notebooks
sagemaker_session = Session()
role = get_execution_role()
region = sagemaker_session.boto_region_name
%store -r prod_scaled_path
%store -r endpoint_name
%store -r ground_truth_upload_path
prod_scaled_df = pd.read_csv(prod_scaled_path)

In [3]:
endpoint_name

'xgb-nutriscore-monitor-2025-10-17-20-37-47'

In [4]:
ground_truth_upload_path

's3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43'

In [None]:
# !aws sagemaker list-monitoring-schedules
# !aws sagemaker delete-monitoring-schedule --monitoring-schedule-name "nutriscore-data-quality-schedule-2025-10-17-17-57-23"
# !aws sagemaker list-monitoring-executions --monitoring-schedule-name "nutriscore-data-quality-schedule-2025-10-17-17-57-23" --status-equals InProgress
# !aws sagemaker stop-processing-job --processing-job-name "model-monitoring-202510172000-9b5da9b1cb51000db3fa4ea8"

In [6]:
# Helper function for prod data simulation
def upload_ground_truth(records, upload_path, timestamp):
    # Convert records to JSON-lines format
    ground_truth_data_to_upload = "\n".join([json.dumps(r) for r in records])
    
    # Set S3 path based on timestamp
    target_s3_uri = f"{upload_path}/{timestamp:%Y/%m/%d/%H}/ground_truth_{timestamp:%M%S}.jsonl"
    
    print(f"  Uploading {len(records)} ground truth records to {target_s3_uri}")
    S3Uploader.upload_string_as_file_body(ground_truth_data_to_upload, target_s3_uri)

# Define live traffic simulation
def simulate_live_traffic_for_duration(
    endpoint_name,
    sagemaker_session,
    prod_df,
    gt_upload_path,
    duration_hours=2,
    sample_size=500,        
    wait_time_seconds=600 
):
    print(f"--- Starting Traffic Simulation for {duration_hours} hours ---")
    print(f"Sending {sample_size} records every {wait_time_seconds / 60} minutes.")
    
    # Get the start time and calculate the total duration in seconds
    start_time = time.time()
    duration_seconds = duration_hours * 3600

    # Loop until kernel restart or reached max duration
    while time.time() - start_time < duration_seconds:
        try:
            print(f"\n--- Generating new data batch ---")
            
            sample_traffic_df = prod_df.sample(n=sample_size)
            ground_truth_records_for_this_batch = []
            current_timestamp = datetime.utcnow()

            for index, row in tqdm(sample_traffic_df.iterrows(), total=sample_traffic_df.shape[0]):
                 # First column is the label (true score) and the rest are features
                true_label = row.iloc[0]
                features = row.iloc[1:]
                features_payload = ",".join(features.astype(str).values) + "\n"
                inference_id = str(uuid.uuid4()) # generate unique id
                sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
                    EndpointName=endpoint_name,
                    ContentType="text/csv",
                    Body=features_payload,
                    InferenceId=inference_id,
                )
                ground_truth_records_for_this_batch.append({
                    "groundTruthData": {"data": str(true_label), "encoding": "CSV"},
                    "eventMetadata": {"eventId": inference_id},
                    "eventVersion": "0",
                })
            
            print(f"Sent {sample_size} predictions to endpoint.")

            # Wait 5 minutes to give the SageMaker Data Capture service
            # time to write its files to the correct S3 hourly folder.
            print("Waiting 5 minutes for data capture to land in S3...")
            sleep(300) 
            
            upload_ground_truth(
                ground_truth_records_for_this_batch,
                gt_upload_path,
                current_timestamp
            )
            
            # Check if the next wait period would exceed the duration
            if (time.time() - start_time + wait_time_seconds) > duration_seconds:
                print("\nDuration reached. Stopping simulation after this batch.")
                break

            print(f"Batch complete. Waiting {wait_time_seconds / 60} minutes...")
            sleep(wait_time_seconds)

        except Exception as e:
            print(f"Error in simulation loop: {e}")
            print("Restarting loop after a 1-minute wait...")
            sleep(60)
            
    print(f"\n--- Simulation finished after approximately {duration_hours} hours. ---")

In [7]:
# Start the simulation in a background thread
simulation_thread = Thread(
    target=simulate_live_traffic_for_duration,
    args=(
        endpoint_name,
        sagemaker_session,
        prod_scaled_df,
        ground_truth_upload_path,
        2 # hours duration for simulation
    )
)
simulation_thread.start()

--- Starting Traffic Simulation for 2 hours ---
Sending 500 records every 10.0 minutes.

[2025-10-17 22:20:29.619650] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/22/ground_truth_2029.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 22:35:53.485242] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/22/ground_truth_3553.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 22:51:19.538822] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/22/ground_truth_5119.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 23:06:42.529266] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/23/ground_truth_0642.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 23:22:07.935290] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/23/ground_truth_2207.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 23:37:32.397547] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/23/ground_truth_3732.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-17 23:52:56.251426] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/17/23/ground_truth_5256.jsonl
Batch complete. Waiting 10.0 minutes...

[2025-10-18 00:08:21.837767] --- Generating new data batch ---


  current_timestamp = datetime.utcnow()


Sent 500 predictions to endpoint.
Waiting 5 minutes for data capture to land in S3...
  Uploading 500 ground truth records to s3://sagemaker-us-east-1-654654380268/sagemaker/FoodLens-ModelQualityMonitor-2025-10-17-20-37-43/ground_truth_data/2025-10-17-20-37-43/2025/10/18/00/ground_truth_0821.jsonl

Duration reached. Stopping simulation after this batch.

--- Simulation finished after approximately 2 hours. ---
