# Traffic Simulation for Monitoring Dashboard

Send periodic inference requests to the deployed endpoint to simulate
real-world traffic patterns and populate CloudWatch metrics and Model Monitor data.

## Setup

In [1]:
import boto3
import sagemaker
import pandas as pd
import numpy as np
import time
from datetime import datetime
from sqlalchemy import create_engine
from sagemaker.predictor import Predictor
from sklearn.metrics import accuracy_score, roc_auc_score

sess   = sagemaker.Session()
region = boto3.Session().region_name
bucket = sess.default_bucket()

s3_prefix = "aai540/model/xgboost-binary"

FEATURE_COLS = [
    "duration", "pkt_total", "bytes_total",
    "pkt_fwd", "pkt_bwd", "bytes_fwd", "bytes_bwd",
    "pkt_rate", "byte_rate", "bytes_per_pkt",
    "pkt_ratio", "byte_ratio",
]
LABEL_COL = "label"

ENDPOINT_NAME = "ids-xgboost-binary-monitor"

print(f"Region   : {region}")
print(f"Endpoint : {ENDPOINT_NAME}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Region   : us-east-1
Endpoint : ids-xgboost-binary-monitor


## Load Production Data

Using the production holdout dataset (40% split) which represents truly unseen data
that wasn't used for training, validation, or testing. This simulates real production
inference traffic.

In [2]:
# S3 path where production holdout was exported
prod_s3_path = f"s3://{bucket}/aai540/production_holdout/production_data.csv"

print("Loading production data from S3...")
df_prod = pd.read_csv(prod_s3_path)

print(f"Production rows loaded: {len(df_prod):,}")

# keep the rest of your notebook unchanged
X_test = df_prod[FEATURE_COLS].values
y_true = df_prod[LABEL_COL].values

Loading production data from S3...
Production rows loaded: 200,308


## Connect to Endpoint

In [3]:
predictor = Predictor(
    endpoint_name=ENDPOINT_NAME,
    sagemaker_session=sess,
    serializer=sagemaker.serializers.CSVSerializer(),
    deserializer=sagemaker.deserializers.CSVDeserializer(),
)
print(f"Connected to endpoint: {ENDPOINT_NAME}")

Connected to endpoint: ids-xgboost-binary-monitor


## Traffic Simulation Configuration

In [4]:
# Configuration
DURATION_MINUTES = 60       # Total simulation duration (1 hour)
INTERVAL_SECONDS = 60        # Time between batches
BATCH_SIZE_MIN = 50          # Minimum samples per batch
BATCH_SIZE_MAX = 200         # Maximum samples per batch
BATCH_SIZE_AVG = 100         # Average samples per batch

total_iterations = (DURATION_MINUTES * 60) // INTERVAL_SECONDS

print("Traffic Simulation Configuration")

print(f"  Duration        : {DURATION_MINUTES} minutes ({DURATION_MINUTES/60:.1f} hours)")
print(f"  Interval        : {INTERVAL_SECONDS} seconds between batches")
print(f"  Batch size      : {BATCH_SIZE_MIN}-{BATCH_SIZE_MAX} samples (avg ~{BATCH_SIZE_AVG})")
print(f"  Total batches   : {total_iterations}")
print(f"  Est. requests   : ~{total_iterations * BATCH_SIZE_AVG:,} (variable load)")
print(f"\nStart time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Traffic Simulation Configuration
  Duration        : 60 minutes (1.0 hours)
  Interval        : 60 seconds between batches
  Batch size      : 50-200 samples (avg ~100)
  Total batches   : 60
  Est. requests   : ~6,000 (variable load)

Start time: 2026-02-20 20:06:51


## Run Simulation

This cell will run for the configured duration

In [5]:
all_predictions = []
all_actuals = []
iteration = 0

try:
    for i in range(total_iterations):
        iteration = i + 1
        
        # Variable batch size to simulate realistic traffic patterns
        # Create periods of high/medium/low traffic
        hour_position = (i / total_iterations) * DURATION_MINUTES / 60
        
        # Sine wave pattern for natural variation + random noise
        traffic_cycle = np.sin(hour_position * 2 * np.pi) * 0.3 + 0.7  # 0.4 to 1.0
        random_variation = np.random.uniform(0.85, 1.15)  # Â±15% random noise
        
        batch_size = int(BATCH_SIZE_AVG * traffic_cycle * random_variation)
        batch_size = max(BATCH_SIZE_MIN, min(BATCH_SIZE_MAX, batch_size))  # Clamp
        
        # Random sample from test set
        indices = np.random.choice(len(X_test), size=batch_size, replace=False)
        batch_X = X_test[indices]
        batch_y = y_true[indices]
        
        # Send predictions
        start_time = time.time()
        response = predictor.predict(batch_X)
        latency = (time.time() - start_time) * 1000  # ms
        
        # Parse predictions
        y_prob = np.array([float(row[0]) for row in response])
        y_pred = (y_prob >= 0.5).astype(int)
        
        # Track results
        all_predictions.extend(y_pred)
        all_actuals.extend(batch_y)
        
        # Compute running metrics
        batch_accuracy = accuracy_score(batch_y, y_pred)
        cumulative_accuracy = accuracy_score(all_actuals, all_predictions)
        
        # Progress update
        timestamp = datetime.now().strftime('%H:%M:%S')
        elapsed = i * INTERVAL_SECONDS
        remaining = (total_iterations - i - 1) * INTERVAL_SECONDS
        
        print(
            f"[{timestamp}] Batch {iteration}/{total_iterations} | "
            f"Samples: {batch_size:3d} | Latency: {latency:.0f}ms | "
            f"Batch Acc: {batch_accuracy:.3f} | "
            f"Cumulative Acc: {cumulative_accuracy:.4f} | "
            f"Remaining: {remaining//60}m {remaining%60}s"
        )
        
        # Wait before next batch (unless last iteration)
        if i < total_iterations - 1:
            time.sleep(INTERVAL_SECONDS)
            
except KeyboardInterrupt:
    print(f"\nSimulation interrupted at batch {iteration}/{total_iterations}")

# Final summary
print("Simulation Complete")
print(f"  Batches sent    : {iteration}/{total_iterations}")
print(f"  Total samples   : {len(all_predictions):,}")
print(f"  Overall accuracy: {accuracy_score(all_actuals, all_predictions):.4f}")
print(f"  End time        : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

[20:06:58] Batch 1/60 | Samples:  70 | Latency: 73ms | Batch Acc: 0.343 | Cumulative Acc: 0.3429 | Remaining: 59m 0s
[20:07:58] Batch 2/60 | Samples:  64 | Latency: 14ms | Batch Acc: 0.156 | Cumulative Acc: 0.2537 | Remaining: 58m 0s
[20:08:59] Batch 3/60 | Samples:  74 | Latency: 15ms | Batch Acc: 0.270 | Cumulative Acc: 0.2596 | Remaining: 57m 0s
[20:09:59] Batch 4/60 | Samples:  79 | Latency: 15ms | Batch Acc: 0.203 | Cumulative Acc: 0.2439 | Remaining: 56m 0s
[20:10:59] Batch 5/60 | Samples:  79 | Latency: 15ms | Batch Acc: 0.215 | Cumulative Acc: 0.2377 | Remaining: 55m 0s
[20:11:59] Batch 6/60 | Samples:  79 | Latency: 75ms | Batch Acc: 0.266 | Cumulative Acc: 0.2427 | Remaining: 54m 0s
[20:12:59] Batch 7/60 | Samples:  99 | Latency: 15ms | Batch Acc: 0.202 | Cumulative Acc: 0.2353 | Remaining: 53m 0s
[20:13:59] Batch 8/60 | Samples:  88 | Latency: 15ms | Batch Acc: 0.250 | Cumulative Acc: 0.2373 | Remaining: 52m 0s
[20:14:59] Batch 9/60 | Samples:  84 | Latency: 15ms | Batch Acc

## Quick Dashboard Link

In [6]:
dashboard_url = (
    f"https://{region}.console.aws.amazon.com/cloudwatch/home"
    f"?region={region}#dashboards/dashboard/IDS-XGBoost-Monitoring"
)
print("CloudWatch Dashboard:")
print(dashboard_url)

CloudWatch Dashboard:
https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#dashboards/dashboard/IDS-XGBoost-Monitoring
