In [5]:
%%time

from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
from time import sleep
from threading import Thread

import pandas as pd

from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer

from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig

session = Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
CPU times: user 902 ms, sys: 181 ms, total: 1.08 s
Wall time: 2.59 s


### AWS Region and IAM Role + Setup

In [6]:
role = get_execution_role()
print(f"RoleArn: {role}")

sagemaker_session = Session()
sagemaker_client = sagemaker_session.sagemaker_client
sagemaker_runtime_client = sagemaker_session.sagemaker_runtime_client

region = sagemaker_session.boto_region_name
print(f"AWS region: {region}")

# A different bucket can be used, but make sure the role for this notebook has
# the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = Session().default_bucket()
print(f"Demo Bucket: {bucket}")
prefix = "sagemaker/Final_Project_Model_Monitor"
s3_key = f"s3://{bucket}/{prefix}"
print(f"S3 key: {s3_key}")

s3_capture_upload_path = f"{s3_key}/datacapture/final_project"
ground_truth_upload_path = f"{s3_key}/ground_truth_data/final_project/{datetime.now():%Y-%m-%d-%H-%M-%S}"
s3_report_path = f"{s3_key}/reports"

print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

baseline_results_uri = f"{s3_key}/baselining/final_project"
print(f"Baseline results uri: {baseline_results_uri}")

endpoint_instance_count = 1
endpoint_instance_type = "ml.m5.large"


RoleArn: arn:aws:iam::904981812149:role/LabRole
AWS region: us-east-1
Demo Bucket: sagemaker-us-east-1-904981812149
S3 key: s3://sagemaker-us-east-1-904981812149/sagemaker/Final_Project_Model_Monitor
Capture path: s3://sagemaker-us-east-1-904981812149/sagemaker/Final_Project_Model_Monitor/datacapture/final_project
Ground truth path: s3://sagemaker-us-east-1-904981812149/sagemaker/Final_Project_Model_Monitor/ground_truth_data/final_project/2024-06-19-22-51-37
Report path: s3://sagemaker-us-east-1-904981812149/sagemaker/Final_Project_Model_Monitor/reports
Baseline results uri: s3://sagemaker-us-east-1-904981812149/sagemaker/Final_Project_Model_Monitor/baselining/final_project


### Model Files and Data Files

In [7]:
base_path = "/root/AAI-540-Final-Project/"
model_file = f"{base_path}/Models/xgb_regressor_model.tar.gz"
test_dataset = f"{base_path}/Data/test_data_no_head.csv"
validation_dataset = f"{base_path}/Data/validation_data_head.csv"
dataset_type = "text/csv"

with open(validation_dataset) as f:
    headers_line = f.readline().rstrip()
all_headers = headers_line.split(",")
label_header = all_headers[0]

### Deploy model to Amazon SageMaker

In [8]:
model_name = f"Final-Project-xgb-regression-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Model name: ", model_name)
endpoint_name = f"Final-Project-xgb-regression-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Endpoint name: ", endpoint_name)

Model name:  Final-Project-xgb-regression-model-monitor-2024-06-19-2251
Endpoint name:  Final-Project-xgb-regression-model-monitor-2024-06-19-2251


In [9]:
#  Read in the modle URL from setup
%store -r model_url

no stored variable or alias model_url


In [10]:
model_url

NameError: name 'model_url' is not defined

### Invoke Deployed Model

In [11]:
image_uri = image_uris.retrieve("xgboost", region, "0.90-1")
print(f"XGBoost image uri: {image_uri}")
model = Model(
    role=role,
    name=model_name,
    image_uri=image_uri,
    model_data=model_url,
    sagemaker_session=sagemaker_session,
)

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=s3_capture_upload_path,
)
print(f"Deploying model {model_name} to endpoint {endpoint_name}")
model.deploy(
    initial_instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

XGBoost image uri: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3


NameError: name 'model_url' is not defined

### Create Predictor

In [12]:
from sagemaker.predictor import Predictor

predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, serializer=CSVSerializer()
)

In [None]:
### Generate Baseline

In [13]:
import pandas as pd
df = pd.read_csv("Data/val_with_target.csv")
df.columns

Index(['AdjSquareFeet', 'DistancetoCoast', 'DistancetoSinkhole',
       'DistancetoFireDepartment', 'LocationWindSpeed', 'Terrain',
       'ValueofHome', 'NumberOfBuildings', 'NumberOfUnits', 'Age'],
      dtype='object')

In [15]:
from time import sleep

# 'predictor' is  regression model object
validate_dataset = "validation_with_predictions.csv"

limit = 200  # To control the number of predictions made
i = 0
with open(f"Data/{validate_dataset}", "w") as baseline_file:
    # Adjust the header for regression output
    baseline_file.write("actual_value,predicted_value\n")  # our header
    with open("Data/val_with_target.csv", "r") as f:
        header = next(f).strip().split(',')  # Get header to find target column index
        target_index = header.index('ValueofHome')  # Find the index of the target column
        
        for row in f:
            if i >= limit:
                break
            split_row = row.strip().split(',')
            
            # Extract the actual target value using the target_index
            actual_value = split_row[target_index]
            
            # Prepare the input columns by excluding the target column
            input_cols = split_row[:target_index] + split_row[target_index + 1:]
            
            # Convert the list back to a comma-separated string
            input_cols_string = ','.join(input_cols)
            
            # Predict the value
            predicted_value = predictor.predict(input_cols_string)
            
            # Write the actual and predicted values to the file
            baseline_file.write(f"{actual_value},{predicted_value}\n")
            i += 1
            print(".", end="", flush=True)
            sleep(0.5)
print()
print("Done!")


ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint Final-Project-xgb-regression-model-monitor-2024-06-19-2251 of account 904981812149 not found.

### Clean Up

In [9]:
invoke_endpoint_thread.terminate()
ground_truth_thread.terminate()

NameError: name 'invoke_endpoint_thread' is not defined

In [None]:
from sagemaker.predictor import Predictor

predictor = Predictor(endpoint_name, sagemaker_session=sagemaker_session)
model_monitors = predictor.list_monitors()
for model_monitor in model_monitors:
    model_monitor.stop_monitoring_schedule()
    wait_for_execution_to_finish(model_monitor)
    model_monitor.delete_monitoring_schedule()

In [10]:
predictor.delete_endpoint()
predictor.delete_model()

ClientError: An error occurred (ValidationException) when calling the DescribeEndpointConfig operation: Could not find endpoint configuration "Final-Project-xgb-regression-model-monitor-2024-06-19-0541".

In [11]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>