In [1]:
# Importing the necessary library
import boto3
import sagemaker
import pandas as pd
from joblib import dump, load
import s3fs


In [2]:
# Initialising new sagemaker session as "sess".
sess = sagemaker.Session()
# Bucket variable is used for storing the location of the bucket
bucket = 'sagemaker-studio-009676737623-l4vs7j0o0ib'
# Assigning the prefix variable 
prefix = 'mlops-level1-data' 
# Check for necessary permission needed for training and deploying models. 
role = sagemaker.get_execution_role()
# To understand where this session is configured to operate.
region = boto3.Session().region_name


In [7]:
endpoint_name = 'HumanActivity-InferenceEndpoint-final-2023-09-24-0840'

### Model Prediction

In [11]:
data_capture_prefix = "{}/datacapture".format(prefix)
s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix)
reports_prefix = "{}/reports".format(prefix)
s3_report_path = "s3://{}/{}".format(bucket, reports_prefix)

print("Capture path: {}".format(s3_capture_upload_path))
print("Report path: {}".format(s3_report_path))

Capture path: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/datacapture
Report path: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/reports


In [12]:
!aws s3 ls {s3_capture_upload_path}/input/ --recursive


2023-09-21 17:27:17        103 mlops-level1-data/datacapture/input/2023/09/21/17/4cd90f84-650d-4233-99c5-83ffb84d8364.json
2023-09-23 07:28:39        119 mlops-level1-data/datacapture/input/2023/09/23/07/1431e7d4-96b3-4767-a45d-5c6204bf5280.json


In [13]:
s3 = boto3.client("s3")

captured_input_s3_key = [
    k["Key"]
    for k in s3.list_objects_v2(Bucket=bucket, Prefix=f"{data_capture_prefix}/input/")["Contents"]
]
assert len(captured_input_s3_key) > 0


In [14]:
import json
sample_input_body = s3.get_object(Bucket=bucket, Key=captured_input_s3_key[0])["Body"]
sample_input_content = json.loads(sample_input_body.read())


In [15]:
sample_input_content

[{'prefix': 's3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/baseline/data/v1.csv'},
 '']

In [16]:
!aws s3 ls {s3_capture_upload_path}/output/ --recursive


2023-09-21 17:27:17        114 mlops-level1-data/datacapture/output/2023/09/21/17/b66aed65-49f5-4a4d-a6ee-243ccc0860a9.json
2023-09-23 07:28:39        119 mlops-level1-data/datacapture/output/2023/09/23/07/571f3b71-0d2a-46d5-922f-8fdfa070f008.json


In [17]:
captured_input_s3_key = [
    k["Key"]
    for k in s3.list_objects_v2(Bucket=bucket, Prefix=f"{data_capture_prefix}/output/")["Contents"]
]
assert len(captured_input_s3_key) > 0
sample_output_body = s3.get_object(Bucket=bucket, Key=captured_input_s3_key[0])["Body"]
sample_output_content = json.loads(sample_output_body.read())


In [18]:
sample_output_content


[{'prefix': 's3://sagemaker-ap-south-1-009676737623/sagemaker-scikit-learn-2023-09-21-17-23-05-878/'},
 'v1.csv.out']

In [106]:
df = pd.read_csv( baseline_data_uri + "/training-inputs-with-header.csv")

## Model Baseline


In [71]:
from sagemaker.inputs import BatchDataCaptureConfig


In [19]:
# copy over the training dataset to Amazon S3 (if you already have it in Amazon S3, you could reuse it)
baseline_prefix = prefix + "/baseline"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = "s3://{}/{}".format(bucket, baseline_data_prefix)
baseline_results_uri = "s3://{}/{}".format(bucket, baseline_results_prefix)
print("Baseline data uri: {}".format(baseline_data_uri))
print("Baseline results uri: {}".format(baseline_results_uri))

Baseline data uri: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/baseline/data
Baseline results uri: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/baseline/results


In [21]:
training_data_path = baseline_data_uri + "/training-inputs-with-header.csv"
#feature_v1_data.to_csv(training_data_path,index = False)

In [22]:
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat

my_default_monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
)

my_default_monitor.suggest_baseline(
    baseline_dataset=training_data_path,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    wait=True,
)

INFO:sagemaker:Creating processing-job with name baseline-suggestion-job-2023-09-24-09-53-53-668


.......................[34m2023-09-24 09:57:45.324399: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory[0m
[34m2023-09-24 09:57:45.324432: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2023-09-24 09:57:46.835755: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory[0m
[34m2023-09-24 09:57:46.835783: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)[0m
[34m2023-09-24 09:57:46.835802: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-0-93-246.ap-south-1.compute.internal): /proc/driver/

<sagemaker.processing.ProcessingJob at 0x7f5f8b9a5ff0>

In [23]:
s3_client = boto3.Session().client("s3")
result = s3_client.list_objects(Bucket=bucket, Prefix=baseline_results_prefix)
report_files = [report_file.get("Key") for report_file in result.get("Contents")]
print("Found Files:")
print("\n ".join(report_files))

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


Found Files:
mlops-level1-data/baseline/results/constraints.json
 mlops-level1-data/baseline/results/statistics.json


## Monitor Schedule

In [45]:
from sagemaker.model_monitor import CronExpressionGenerator
from sagemaker.model_monitor import EndpointInput
from time import gmtime, strftime

mon_schedule_name = "model-monitor-schedule-realtime" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
my_default_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    endpoint_input=EndpointInput(
        endpoint_name=endpoint_name,
        destination="/opt/ml/processing/input/endpoint"
    ),
    output_s3_uri=s3_report_path,
    statistics=statistics_path,
    constraints=constraints_path,
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: model-monitor-schedule-realtime2023-09-24-10-30-51


In [40]:
"""
from sagemaker.model_monitor import CronExpressionGenerator
from sagemaker.model_monitor import BatchTransformInput
from sagemaker.model_monitor import MonitoringDatasetFormat
from time import gmtime, strftime

statistics_path = "{}/statistics.json".format(baseline_results_uri)
constraints_path = "{}/constraints.json".format(baseline_results_uri)

mon_schedule_name = "DEMO-mlops1-model-monitor-schedule-" + strftime(
    "%Y-%m-%d-%H-%M-%S", gmtime()
)
my_default_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    batch_transform_input=BatchTransformInput(
        data_captured_destination_s3_uri=s3_capture_upload_path,
        destination="/opt/ml/processing/input",
        dataset_format=MonitoringDatasetFormat.csv(header=False),
    ),
    output_s3_uri=s3_report_path,
    statistics=statistics_path,
    constraints=constraints_path,
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)
"""

'\nfrom sagemaker.model_monitor import CronExpressionGenerator\nfrom sagemaker.model_monitor import BatchTransformInput\nfrom sagemaker.model_monitor import MonitoringDatasetFormat\nfrom time import gmtime, strftime\n\nstatistics_path = "{}/statistics.json".format(baseline_results_uri)\nconstraints_path = "{}/constraints.json".format(baseline_results_uri)\n\nmon_schedule_name = "DEMO-mlops1-model-monitor-schedule-" + strftime(\n    "%Y-%m-%d-%H-%M-%S", gmtime()\n)\nmy_default_monitor.create_monitoring_schedule(\n    monitor_schedule_name=mon_schedule_name,\n    batch_transform_input=BatchTransformInput(\n        data_captured_destination_s3_uri=s3_capture_upload_path,\n        destination="/opt/ml/processing/input",\n        dataset_format=MonitoringDatasetFormat.csv(header=False),\n    ),\n    output_s3_uri=s3_report_path,\n    statistics=statistics_path,\n    constraints=constraints_path,\n    schedule_cron_expression=CronExpressionGenerator.hourly(),\n    enable_cloudwatch_metrics=T

In [47]:
desc_schedule_result = my_default_monitor.describe_schedule()
print("Schedule status: {}".format(desc_schedule_result["MonitoringScheduleStatus"]))

Schedule status: Scheduled


In [62]:
import time

mon_executions = my_default_monitor.list_executions()
print(
    "We created a hourly schedule above and it will kick off executions ON the hour (plus 0 - 20 min buffer.\nWe will have to wait till we hit the hour..."
)

while len(mon_executions) == 0:
    print("Waiting for the 1st execution to happen...")
    time.sleep(60)
    mon_executions = my_default_monitor.list_executions()

We created a hourly schedule above and it will kick off executions ON the hour (plus 0 - 20 min buffer.
We will have to wait till we hit the hour...


In [55]:
latest_execution = mon_executions[
    -1
]  # latest execution's index is -1, second to last is -2 and so on..
# time.sleep(60)
latest_execution.wait(logs=False)

print("Latest execution status: {}".format(latest_execution.describe()["ProcessingJobStatus"]))
print("Latest execution result: {}".format(latest_execution.describe()["ExitMessage"]))

latest_job = latest_execution.describe()
if latest_job["ProcessingJobStatus"] != "Completed":
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

*

UnexpectedStatusException: Error for Processing job model-monitoring-202309241100-3ebbbf6e34f2080a3c63bb75: Failed. Reason: AlgorithmError: See job logs for more information

In [66]:
report_uri = latest_execution.output.destination
print("Report Uri: {}".format(report_uri))

Report Uri: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/reports/DEMO-xgb-churn-pred-model-monitor-schedule-2023-09-21-17-54-46/2023/09/21/18


In [67]:
from urllib.parse import urlparse

s3uri = urlparse(report_uri)
report_bucket = s3uri.netloc
report_key = s3uri.path.lstrip("/")
print("Report bucket: {}".format(report_bucket))
print("Report key: {}".format(report_key))

s3_client = boto3.Session().client("s3")
result = s3_client.list_objects(Bucket=report_bucket, Prefix=report_key)
report_files = [report_file.get("Key") for report_file in result.get("Contents")]
print("Found Report Files:")
print("\n ".join(report_files))

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


Report bucket: sagemaker-studio-009676737623-l4vs7j0o0ib
Report key: mlops-level1-data/reports/DEMO-xgb-churn-pred-model-monitor-schedule-2023-09-21-17-54-46/2023/09/21/18
Found Report Files:
mlops-level1-data/reports/DEMO-xgb-churn-pred-model-monitor-schedule-2023-09-21-17-54-46/2023/09/21/18/constraint_violations.json


In [70]:
violations = my_default_monitor.latest_monitoring_constraint_violations()
#pd.set_option("display.max_colwidth", -1)
constraints_df = json_normalize(violations.body_dict["violations"])
constraints_df.head(10)

Unnamed: 0,feature_name,constraint_check_type,description
0,Missing columns,missing_column_check,There are missing columns in current dataset. ...
