In [1]:
# Quick Titanic data prep (if you don't have it already)
import pandas as pd
from sklearn.model_selection import train_test_split
import os

import sagemaker
from sagemaker import image_uris
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput


import boto3
from datetime import datetime



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## Training Data

In [2]:
# Initialize SageMaker session
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()

# Download Titanic dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)

# Basic preprocessing
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# Prepare for XGBoost (label first, then features)
df_xgb = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]

# Split and save
train_data, test_data = train_test_split(df_xgb, test_size=0.2, random_state=42)

train_data.to_csv('train.csv', header=False, index=False)

# Upload to S3
train_path = f's3://{bucket}/titanic-data/train/train.csv'
boto3.Session().resource('s3').Bucket(bucket).Object('titanic-data/train/train.csv').upload_file('train.csv')

print(f"‚úÖ Training data uploaded to: {train_path}")

‚úÖ Training data uploaded to: s3://sagemaker-us-east-2-854757836160/titanic-data/train/train.csv


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)


## Model Training

In [3]:
print(f"Using bucket: {bucket}")
print(f"Using role: {role}")

# Get XGBoost container
container = image_uris.retrieve('xgboost', sess.boto_region_name, '1.5-1')

# Path to your training data (assuming you have it from Week 2)
train_path = f's3://{bucket}/titanic-data/train/'

print("\n" + "="*60)
print("TRAINING MODEL A (Conservative)")
print("="*60)

# Model A: Conservative hyperparameters
xgb_model_a = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=f's3://{bucket}/ab-test/model-a/',
    sagemaker_session=sess
)

# Conservative hyperparameters (faster, simpler)
xgb_model_a.set_hyperparameters(
    objective='binary:logistic',
    num_round=50,         # Fewer rounds
    max_depth=3,          # Shallower trees
    eta=0.3,              # Higher learning rate
    subsample=1.0         # Use all data
)

# Train Model A
xgb_model_a.fit({'train': TrainingInput(train_path, content_type='text/csv')})

print(f"\n‚úÖ Model A trained successfully!")
print(f"Model A artifact: {xgb_model_a.model_data}")

print("\n" + "="*60)
print("TRAINING MODEL B (Aggressive - Better Accuracy)")
print("="*60)

# Model B: Aggressive hyperparameters
xgb_model_b = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=f's3://{bucket}/ab-test/model-b/',
    sagemaker_session=sess
)

# Aggressive hyperparameters (more accurate, slower)
xgb_model_b.set_hyperparameters(
    objective='binary:logistic',
    num_round=100,        # More rounds
    max_depth=6,          # Deeper trees
    eta=0.1,              # Lower learning rate
    subsample=0.8         # Use 80% of data per tree
)

# Train Model B
xgb_model_b.fit({'train': TrainingInput(train_path, content_type='text/csv')})

print(f"\n‚úÖ Model B trained successfully!")
print(f"Model B artifact: {xgb_model_b.model_data}")

print("\n" + "="*60)
print("PHASE 1 COMPLETE!")
print("="*60)
print("\nBoth models trained and saved to S3:")
print(f"  Model A: {xgb_model_a.model_data}")
print(f"  Model B: {xgb_model_b.model_data}")
print("\nReady for Phase 2: Deployment!")

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-11-17-19-57-50-238


Using bucket: sagemaker-us-east-2-854757836160
Using role: arn:aws:iam::854757836160:role/service-role/AmazonSageMaker-ExecutionRole-20251026T175451

TRAINING MODEL A (Conservative)
2025-11-17 19:57:52 Starting - Starting the training job...
2025-11-17 19:58:25 Downloading - Downloading input data...
2025-11-17 19:58:50 Downloading - Downloading the training image......
2025-11-17 19:59:51 Training - Training image download completed. Training in progress.
  from pandas import MultiIndex, Int64Index[0m
[34m[2025-11-17 19:59:47.040 ip-10-0-154-248.us-east-2.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2025-11-17 19:59:47.062 ip-10-0-154-248.us-east-2.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2025-11-17:19:59:47:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2025-11-17:19:59:47:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning 

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-11-17-20-00-37-185


Training seconds: 99
Billable seconds: 99

‚úÖ Model A trained successfully!
Model A artifact: s3://sagemaker-us-east-2-854757836160/ab-test/model-a/sagemaker-xgboost-2025-11-17-19-57-50-238/output/model.tar.gz

TRAINING MODEL B (Aggressive - Better Accuracy)
2025-11-17 20:00:37 Starting - Starting the training job...
2025-11-17 20:01:02 Starting - Preparing the instances for training...
2025-11-17 20:01:19 Downloading - Downloading input data...
2025-11-17 20:01:45 Downloading - Downloading the training image...
  from pandas import MultiIndex, Int64Index[0m
[34m[2025-11-17 20:02:37.562 ip-10-0-78-198.us-east-2.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2025-11-17 20:02:37.585 ip-10-0-78-198.us-east-2.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2025-11-17:20:02:37:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2025-11-17:20:02:37:INFO] Failed to parse hyperparameter obje

## Multi-Variant Endpoint

In [9]:
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from datetime import datetime
import boto3
import time

print("\n" + "="*60)
print("PHASE 2: DEPLOY MULTI-VARIANT ENDPOINT")
print("="*60)

# Get SageMaker client
client = boto3.client('sagemaker')

# ============================================================
# First, check for and delete any existing endpoints
# ============================================================

print("\n" + "-"*60)
print("Checking for existing endpoints to clean up...")
print("-"*60)

try:
    response = client.list_endpoints(
        StatusEquals='InService',
        MaxResults=100
    )
    
    for endpoint in response['Endpoints']:
        if 'titanic-ab-test' in endpoint['EndpointName']:
            print(f"Found existing endpoint: {endpoint['EndpointName']}")
            print(f"  Deleting to free up resources...")
            client.delete_endpoint(EndpointName=endpoint['EndpointName'])
            print(f"  ‚úÖ Deleted")
except Exception as e:
    print(f"Note: {e}")

print("\nWaiting 30 seconds for resources to be released...")
time.sleep(30)

# ============================================================
# Create and Register Both Models
# ============================================================

print("\n" + "-"*60)
print("Creating Model A and Model B")
print("-"*60)

# Create Model A
model_a_name = f'model-a-{datetime.now().strftime("%Y%m%d%H%M%S")}'
print(f"Creating Model A: {model_a_name}")

client.create_model(
    ModelName=model_a_name,
    PrimaryContainer={
        'Image': container,
        'ModelDataUrl': xgb_model_a.model_data
    },
    ExecutionRoleArn=role
)
print("‚úÖ Model A created successfully")

# Create Model B
model_b_name = f'model-b-{datetime.now().strftime("%Y%m%d%H%M%S")}'
print(f"Creating Model B: {model_b_name}")

client.create_model(
    ModelName=model_b_name,
    PrimaryContainer={
        'Image': container,
        'ModelDataUrl': xgb_model_b.model_data
    },
    ExecutionRoleArn=role
)
print("‚úÖ Model B created successfully")

# ============================================================
# Deploy Both Models with Traffic Split (using ml.t2.medium)
# ============================================================

print("\n" + "-"*60)
print("Deploying both models with 80/20 traffic split")
print("Using ml.t2.medium instances (within quota)")
print("-"*60)

# Create endpoint configuration
endpoint_config_name = f'ab-config-{datetime.now().strftime("%Y%m%d%H%M%S")}'
endpoint_name = f'titanic-ab-test-{datetime.now().strftime("%Y%m%d-%H%M%S")}'

print(f"Creating endpoint config: {endpoint_config_name}")

client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            'VariantName': 'VariantA',
            'ModelName': model_a_name,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.t2.medium',  # Smaller instance
            'InitialVariantWeight': 80
        },
        {
            'VariantName': 'VariantB',
            'ModelName': model_b_name,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.t2.medium',  # Smaller instance
            'InitialVariantWeight': 20
        }
    ]
)
print("‚úÖ Endpoint config created")

# Create endpoint
print(f"\nCreating endpoint: {endpoint_name}")
print("(This takes ~8-10 minutes...)")

client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)

# Wait for endpoint to be in service
print("Waiting for endpoint to be in service...")
waiter = client.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=endpoint_name)

print(f"\n‚úÖ Endpoint deployed successfully!")
print(f"\n" + "="*60)
print("PHASE 2 COMPLETE!")
print("="*60)
print(f"\nEndpoint: {endpoint_name}")
print(f"Traffic split:")
print(f"  VariantA (Conservative): 80%")
print(f"  VariantB (Aggressive):   20%")
print(f"\nBoth variants are now live and receiving traffic!")

# Create predictor for Phase 3
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer

predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess,
    serializer=CSVSerializer()
)

print(f"\nüíæ Save this endpoint name: {endpoint_name}")


PHASE 2: DEPLOY MULTI-VARIANT ENDPOINT

------------------------------------------------------------
Checking for existing endpoints to clean up...
------------------------------------------------------------
Found existing endpoint: titanic-ab-test-20251117-200652
  Deleting to free up resources...
  ‚úÖ Deleted

Waiting 30 seconds for resources to be released...

------------------------------------------------------------
Creating Model A and Model B
------------------------------------------------------------
Creating Model A: model-a-20251117201745
‚úÖ Model A created successfully
Creating Model B: model-b-20251117201746
‚úÖ Model B created successfully

------------------------------------------------------------
Deploying both models with 80/20 traffic split
Using ml.t2.medium instances (within quota)
------------------------------------------------------------
Creating endpoint config: ab-config-20251117201747
‚úÖ Endpoint config created

Creating endpoint: titanic-ab-test-202

## Testing the Traffic Split

In [12]:
import numpy as np
import pandas as pd
from collections import Counter

print("\n" + "="*60)
print("PHASE 3: TEST TRAFFIC SPLIT")
print("="*60)

# ============================================================
# STEP 1: Send test predictions and track which variant responds
# ============================================================

print("\n" + "-"*60)
print("STEP 1: Sending 100 test predictions")
print("-"*60)

# Sample test data (passenger features)
# Format: Pclass, Sex, Age, SibSp, Parch, Fare, Embarked
test_samples = [
    '3,0,22,1,0,7.25,0',      # Young male, 3rd class
    '1,1,38,1,0,71.28,1',     # Female, 1st class
    '3,1,26,0,0,7.92,0',      # Young female, 3rd class
    '1,0,35,1,0,53.10,0',     # Male, 1st class
    '3,0,35,0,0,8.05,0',      # Male, 3rd class
]

# Send predictions and track variants
variant_counts = []

print("\nSending predictions and tracking variants...")
print("(This will take ~30 seconds)\n")

runtime_client = boto3.client('sagemaker-runtime')

for i in range(100):
    # Cycle through test samples
    sample = test_samples[i % len(test_samples)]
    
    # Send prediction and get full response
    full_response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=sample,
        ContentType='text/csv'
    )
    
    # Get the variant that served this request
    variant_name = full_response['InvokedProductionVariant']
    variant_counts.append(variant_name)
    
    # Progress indicator
    if (i + 1) % 20 == 0:
        print(f"  Sent {i + 1}/100 predictions...")

# ============================================================
# STEP 2: Analyze traffic distribution
# ============================================================

print("\n" + "-"*60)
print("STEP 2: Traffic Distribution Analysis")
print("-"*60)

# Count variant responses
variant_distribution = Counter(variant_counts)

print(f"\nResults from 100 predictions:")
print(f"  VariantA (Conservative): {variant_distribution.get('VariantA', 0)} requests ({variant_distribution.get('VariantA', 0)}%)")
print(f"  VariantB (Aggressive):   {variant_distribution.get('VariantB', 0)} requests ({variant_distribution.get('VariantB', 0)}%)")

# Check if distribution is close to expected (80/20)
variant_a_pct = variant_distribution.get('VariantA', 0)
variant_b_pct = variant_distribution.get('VariantB', 0)

print("\nExpected distribution:")
print("  VariantA: 80%")
print("  VariantB: 20%")

# Tolerance check (allow ¬±10% variance due to randomness)
if 70 <= variant_a_pct <= 90 and 10 <= variant_b_pct <= 30:
    print("\n‚úÖ Traffic split is working correctly!")
else:
    print("\n‚ö†Ô∏è  Traffic split differs from expected (this can happen with small samples)")

# ============================================================
# STEP 3: Test targeting specific variants
# ============================================================

print("\n" + "-"*60)
print("STEP 3: Testing Variant Targeting")
print("-"*60)

# Send requests to specific variants
sample = test_samples[0]

print("\nSending prediction to VariantA specifically:")
response_a = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=sample,
    ContentType='text/csv',
    TargetVariant='VariantA'
)
prediction_a = response_a['Body'].read().decode('utf-8')
print(f"  Variant: {response_a['InvokedProductionVariant']}")
print(f"  Prediction: {prediction_a}")

print("\nSending prediction to VariantB specifically:")
response_b = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=sample,
    ContentType='text/csv',
    TargetVariant='VariantB'
)
prediction_b = response_b['Body'].read().decode('utf-8')
print(f"  Variant: {response_b['InvokedProductionVariant']}")
print(f"  Prediction: {prediction_b}")

print("\n‚úÖ Both variants are responding correctly!")

# ============================================================
# STEP 4: Check endpoint status
# ============================================================

print("\n" + "-"*60)
print("STEP 4: Endpoint Status Check")
print("-"*60)

endpoint_desc = client.describe_endpoint(EndpointName=endpoint_name)

print(f"\nEndpoint: {endpoint_name}")
print(f"Status: {endpoint_desc['EndpointStatus']}")
print(f"\nProduction Variants:")

for variant in endpoint_desc['ProductionVariants']:
    print(f"\n  {variant['VariantName']}:")
    print(f"    Current Weight: {variant['CurrentWeight']}")
    print(f"    Desired Weight: {variant['DesiredWeight']}")
    print(f"    Current Instance Count: {variant['CurrentInstanceCount']}")

print("\n" + "="*60)
print("PHASE 3 COMPLETE!")
print("="*60)
print("\n‚úÖ Traffic split verified and working!")
print("‚úÖ Both variants tested successfully!")
print("\nReady for Phase 4: Monitoring metrics in CloudWatch")


PHASE 3: TEST TRAFFIC SPLIT

------------------------------------------------------------
STEP 1: Sending 100 test predictions
------------------------------------------------------------

Sending predictions and tracking variants...
(This will take ~30 seconds)

  Sent 20/100 predictions...
  Sent 40/100 predictions...
  Sent 60/100 predictions...
  Sent 80/100 predictions...
  Sent 100/100 predictions...

------------------------------------------------------------
STEP 2: Traffic Distribution Analysis
------------------------------------------------------------

Results from 100 predictions:
  VariantA (Conservative): 86 requests (86%)
  VariantB (Aggressive):   14 requests (14%)

Expected distribution:
  VariantA: 80%
  VariantB: 20%

‚úÖ Traffic split is working correctly!

------------------------------------------------------------
STEP 3: Testing Variant Targeting
------------------------------------------------------------

Sending prediction to VariantA specifically:
  Varia

## Monitoring with Cloudwatch

In [14]:
import boto3
from datetime import datetime, timedelta
import time

print("\n" + "="*60)
print("PHASE 4: MONITOR METRICS IN CLOUDWATCH")
print("="*60)

cloudwatch = boto3.client('cloudwatch')

# ============================================================
# STEP 1: Generate some traffic for metrics
# ============================================================

print("\n" + "-"*60)
print("STEP 1: Generating traffic for metrics")
print("-"*60)

print("\nSending 50 more predictions to generate CloudWatch data...")

runtime_client = boto3.client('sagemaker-runtime')
test_sample = '3,0,22,1,0,7.25,0'

for i in range(50):
    runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=test_sample,
        ContentType='text/csv'
    )
    if (i + 1) % 10 == 0:
        print(f"  Sent {i + 1}/50 predictions...")

print("\n‚úÖ Traffic generated")
print("Waiting 2 minutes for CloudWatch metrics to populate...")
time.sleep(120)  # CloudWatch has a delay

# ============================================================
# STEP 2: Retrieve invocation metrics
# ============================================================

print("\n" + "-"*60)
print("STEP 2: Retrieving Invocation Metrics")
print("-"*60)

# Time range: last 30 minutes
end_time = datetime.utcnow()
start_time = end_time - timedelta(minutes=30)

print(f"\nQuerying metrics from {start_time.strftime('%H:%M')} to {end_time.strftime('%H:%M')} UTC")

# Get invocation counts for each variant
for variant in ['VariantA', 'VariantB']:
    print(f"\n{variant}:")
    
    try:
        response = cloudwatch.get_metric_statistics(
            Namespace='AWS/SageMaker',
            MetricName='Invocations',
            Dimensions=[
                {'Name': 'EndpointName', 'Value': endpoint_name},
                {'Name': 'VariantName', 'Value': variant}
            ],
            StartTime=start_time,
            EndTime=end_time,
            Period=300,  # 5-minute periods
            Statistics=['Sum']
        )
        
        if response['Datapoints']:
            datapoints = sorted(response['Datapoints'], key=lambda x: x['Timestamp'])
            total_invocations = sum([dp['Sum'] for dp in datapoints])
            print(f"  Total Invocations: {int(total_invocations)}")
            
            print(f"  Invocations by period:")
            for dp in datapoints:
                print(f"    {dp['Timestamp'].strftime('%H:%M')}: {int(dp['Sum'])} invocations")
        else:
            print(f"  No data available yet (metrics may still be processing)")
            
    except Exception as e:
        print(f"  Error retrieving metrics: {e}")

# ============================================================
# STEP 3: Retrieve latency metrics
# ============================================================

print("\n" + "-"*60)
print("STEP 3: Retrieving Latency Metrics")
print("-"*60)

for variant in ['VariantA', 'VariantB']:
    print(f"\n{variant}:")
    
    try:
        response = cloudwatch.get_metric_statistics(
            Namespace='AWS/SageMaker',
            MetricName='ModelLatency',
            Dimensions=[
                {'Name': 'EndpointName', 'Value': endpoint_name},
                {'Name': 'VariantName', 'Value': variant}
            ],
            StartTime=start_time,
            EndTime=end_time,
            Period=300,
            Statistics=['Average', 'Maximum', 'Minimum']
        )
        
        if response['Datapoints']:
            datapoints = sorted(response['Datapoints'], key=lambda x: x['Timestamp'])
            
            # Get overall statistics
            avg_latencies = [dp['Average'] for dp in datapoints]
            overall_avg = sum(avg_latencies) / len(avg_latencies)
            
            print(f"  Average Latency: {overall_avg:.2f} ms")
            print(f"  Min Latency: {min([dp['Minimum'] for dp in datapoints]):.2f} ms")
            print(f"  Max Latency: {max([dp['Maximum'] for dp in datapoints]):.2f} ms")
        else:
            print(f"  No latency data available yet")
            
    except Exception as e:
        print(f"  Error retrieving metrics: {e}")

# ============================================================
# STEP 4: Compare variant performance
# ============================================================

print("\n" + "-"*60)
print("STEP 4: Variant Performance Comparison")
print("-"*60)

print("\nSummary:")
print("="*50)

# Get invocation counts
variant_invocations = {}
for variant in ['VariantA', 'VariantB']:
    try:
        response = cloudwatch.get_metric_statistics(
            Namespace='AWS/SageMaker',
            MetricName='Invocations',
            Dimensions=[
                {'Name': 'EndpointName', 'Value': endpoint_name},
                {'Name': 'VariantName', 'Value': variant}
            ],
            StartTime=start_time,
            EndTime=end_time,
            Period=1800,  # 30-minute period
            Statistics=['Sum']
        )
        
        if response['Datapoints']:
            total = sum([dp['Sum'] for dp in response['Datapoints']])
            variant_invocations[variant] = int(total)
        else:
            variant_invocations[variant] = 0
    except:
        variant_invocations[variant] = 0

total_invocations = sum(variant_invocations.values())

if total_invocations > 0:
    print(f"\nTotal Invocations: {total_invocations}")
    print(f"\nVariantA (Conservative):")
    print(f"  Invocations: {variant_invocations['VariantA']} ({variant_invocations['VariantA']/total_invocations*100:.1f}%)")
    print(f"\nVariantB (Aggressive):")
    print(f"  Invocations: {variant_invocations['VariantB']} ({variant_invocations['VariantB']/total_invocations*100:.1f}%)")
    
    print("\n" + "="*50)
    print("\nüìä In a real scenario, you would:")
    print("  1. Monitor these metrics over days/weeks")
    print("  2. Compare model accuracy on production data")
    print("  3. Track business metrics (conversions, etc.)")
    print("  4. Gradually shift traffic if VariantB performs better")
    print("  5. Eventually promote winning variant to 100%")
else:
    print("\n‚ö†Ô∏è  Not enough CloudWatch data yet.")
    print("   Metrics can take 5-15 minutes to appear.")
    print("   In production, you'd monitor over days/weeks.")

print("\n" + "="*60)
print("PHASE 4 COMPLETE!")
print("="*60)
print("\n‚úÖ CloudWatch monitoring configured and tested!")
print("\nNext: Phase 5 (Cleanup) to delete resources and avoid charges")


PHASE 4: MONITOR METRICS IN CLOUDWATCH

------------------------------------------------------------
STEP 1: Generating traffic for metrics
------------------------------------------------------------

Sending 50 more predictions to generate CloudWatch data...
  Sent 10/50 predictions...
  Sent 20/50 predictions...
  Sent 30/50 predictions...
  Sent 40/50 predictions...
  Sent 50/50 predictions...

‚úÖ Traffic generated
Waiting 2 minutes for CloudWatch metrics to populate...

------------------------------------------------------------
STEP 2: Retrieving Invocation Metrics
------------------------------------------------------------

Querying metrics from 20:03 to 20:33 UTC

VariantA:
  Total Invocations: 129
  Invocations by period:
    20:23: 87 invocations
    20:28: 42 invocations

VariantB:
  Total Invocations: 23
  Invocations by period:
    20:23: 15 invocations
    20:28: 8 invocations

------------------------------------------------------------
STEP 3: Retrieving Latency Met

  end_time = datetime.utcnow()


## Cleanup

In [16]:
import boto3

print("\n" + "="*60)
print("PHASE 5: CLEANUP")
print("="*60)

client = boto3.client('sagemaker')

# ============================================================
# STEP 1: Delete Endpoint
# ============================================================

print("\n" + "-"*60)
print("STEP 1: Deleting Endpoint")
print("-"*60)

print(f"\nDeleting endpoint: {endpoint_name}")
print("(This stops all billing for instances)")

try:
    client.delete_endpoint(EndpointName=endpoint_name)
    print(f"‚úÖ Endpoint '{endpoint_name}' deleted successfully")
except Exception as e:
    print(f"‚ö†Ô∏è  Error deleting endpoint: {e}")

# ============================================================
# STEP 2: Delete Endpoint Configuration
# ============================================================

print("\n" + "-"*60)
print("STEP 2: Deleting Endpoint Configuration")
print("-"*60)

print(f"\nDeleting endpoint config: {endpoint_config_name}")

try:
    client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
    print(f"‚úÖ Endpoint config '{endpoint_config_name}' deleted successfully")
except Exception as e:
    print(f"‚ö†Ô∏è  Error deleting endpoint config: {e}")

# ============================================================
# STEP 3: Delete Models (Optional - saves clutter)
# ============================================================

print("\n" + "-"*60)
print("STEP 3: Deleting Models (optional)")
print("-"*60)

for model_name in [model_a_name, model_b_name]:
    print(f"\nDeleting model: {model_name}")
    try:
        client.delete_model(ModelName=model_name)
        print(f"‚úÖ Model '{model_name}' deleted")
    except Exception as e:
        print(f"‚ö†Ô∏è  Error deleting model: {e}")

# ============================================================
# STEP 4: Summary
# ============================================================

print("\n" + "="*60)
print("CLEANUP COMPLETE!")
print("="*60)

print("\n‚úÖ Endpoint deleted (billing stopped)")
print("‚úÖ Endpoint config deleted")
print("‚úÖ Models deleted")

print("\nüì¶ What's still in S3 (minimal cost):")
print(f"  - Training data: s3://{bucket}/titanic-data/")
print(f"  - Model A artifact: {xgb_model_a.model_data}")
print(f"  - Model B artifact: {xgb_model_b.model_data}")
print("\nüí° These S3 files cost ~$0.023/GB/month (pennies)")
print("   You can delete them later if needed.")

print("\n" + "="*60)
print("A/B TESTING PROJECT COMPLETE! üéâ")
print("="*60)

print("\nüéì What you accomplished:")
print("  ‚úÖ Trained two XGBoost models with different hyperparameters")
print("  ‚úÖ Deployed multi-variant endpoint with traffic splitting")
print("  ‚úÖ Verified 80/20 traffic distribution")
print("  ‚úÖ Monitored performance with CloudWatch")
print("  ‚úÖ Compared latency between variants")
print("  ‚úÖ Cleaned up resources to avoid charges")

print("\nüìù Key learnings:")
print("  ‚Ä¢ Multi-variant endpoints enable A/B testing")
print("  ‚Ä¢ Traffic weights control distribution")
print("  ‚Ä¢ CloudWatch provides production monitoring")
print("  ‚Ä¢ Trade-offs exist between accuracy and latency")
print("  ‚Ä¢ Proper cleanup prevents unexpected AWS bills")

print("\nüöÄ Ready for your AWS ML certification exam!")


PHASE 5: CLEANUP

------------------------------------------------------------
STEP 1: Deleting Endpoint
------------------------------------------------------------

Deleting endpoint: titanic-ab-test-20251117-201747
(This stops all billing for instances)
‚úÖ Endpoint 'titanic-ab-test-20251117-201747' deleted successfully

------------------------------------------------------------
STEP 2: Deleting Endpoint Configuration
------------------------------------------------------------

Deleting endpoint config: ab-config-20251117201747
‚úÖ Endpoint config 'ab-config-20251117201747' deleted successfully

------------------------------------------------------------
STEP 3: Deleting Models (optional)
------------------------------------------------------------

Deleting model: model-a-20251117201745
‚úÖ Model 'model-a-20251117201745' deleted

Deleting model: model-b-20251117201746
‚úÖ Model 'model-b-20251117201746' deleted

CLEANUP COMPLETE!

‚úÖ Endpoint deleted (billing stopped)
‚úÖ Endp