# Install Required Packages

In [25]:
!pip install pyathena
!pip3 install -U sagemaker

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Import Required Libraries

In [26]:
import boto3 # aws sdk for python
import sagemaker # machine learning platform
import numpy as np # array manipulation
import os # operating system interfaces
import pandas as pd # python data analysis
import re # regular expressions
from pyathena import connect # athena client
from sagemaker.pytorch.estimator import PyTorch # PyTorch estimator
from time import gmtime, strftime, sleep # time-related functions

# Perform Prerequisites

In [27]:
# establish sagemaker session, provide permissions
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

# create a boto3 session for the sagemaker service
sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# client to make featurestore record calls
featurestore_runtime = boto3.Session().client(
    service_name="sagemaker-featurestore-runtime", region_name=region
)

# create boto3 session to establish feature store session
boto_session = boto3.Session(region_name=region)

# create featurestore session
feature_store_session = sagemaker.Session(
    boto_session=boto_session,
    sagemaker_client=sm,
    sagemaker_featurestore_runtime_client=featurestore_runtime,
)

# define prefixes for the safety data directory and featurestore
prefix_data = 'safety/data'
prefix_featurestore = 'safety-featurestore'

#-------------------------------------------
s3_capture_upload_path = f"s3://{bucket}/{prefix_data}"
s3_report_path = f"s3://{bucket}/{prefix_featurestore}\n"
#-------------------------------------------

# print s3 locations
print('Data directory location:', f"s3://{bucket}/{prefix_data}")
print('FeatureStore directory location:', f"s3://{bucket}/{prefix_featurestore}\n")

# print current IAM role for notebook instance
role = sagemaker.get_execution_role()
print('Execution Role:', role)

Data directory location: s3://sagemaker-us-east-1-752648173624/safety/data
FeatureStore directory location: s3://sagemaker-us-east-1-752648173624/safety-featurestore

Execution Role: arn:aws:iam::752648173624:role/LabRole


# Data Preparation

## Query Catalog Data

In [28]:
# define database name
database_name = 'safetydb'

# define table name
table_name_csv = 'catalog_csv'

# set s3 temporary staging directory
s3_staging_dir = "s3://{0}/athena/staging".format(bucket)

# define connection parameters
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

# define sql query statement
statement = """SELECT * FROM {}.{}
    WHERE img_filename like '%.jpg'
    AND label_filename like '%.txt'
    LIMIT 25000""".format(
    database_name, table_name_csv
)

# print sql statement for review before executing
print('SQL query SELECT statement:\n', statement)

SQL query SELECT statement:
 SELECT * FROM safetydb.catalog_csv
    WHERE img_filename like '%.jpg'
    AND label_filename like '%.txt'
    LIMIT 25000


In [29]:
# execute sql query and display results
df_catalog_query = pd.read_sql(statement, conn)
df_catalog_query.head(10)

  df_catalog_query = pd.read_sql(statement, conn)


Unnamed: 0,sample_id,img_filename,label_filename
0,1,000001.jpg,000001.txt
1,2,000002.jpg,000002.txt
2,3,000003.jpg,000003.txt
3,4,000004.jpg,000004.txt
4,5,000005.jpg,000005.txt
5,6,000006.jpg,000006.txt
6,7,000007.jpg,000007.txt
7,8,000008.jpg,000008.txt
8,9,000009.jpg,000009.txt
9,10,000010.jpg,000010.txt


## Combine with FeatureGroup Data

### Get Batch Records from FeatureGroups

In [30]:
# define featuregroup name patterns
image_feature_group_name_pattern = 'image-feature-group-'
label_feature_group_name_pattern = 'label-feature-group-'

# obtain latest version of featuregroup names, if multiples exist
all_image_feature_groups = sm.list_feature_groups(NameContains=image_feature_group_name_pattern, SortBy='CreationTime', SortOrder='Descending')
all_label_feature_groups = sm.list_feature_groups(NameContains=label_feature_group_name_pattern, SortBy='CreationTime', SortOrder='Descending')
image_feature_group_name = all_image_feature_groups['FeatureGroupSummaries'][0]['FeatureGroupName']
label_feature_group_name = all_label_feature_groups['FeatureGroupSummaries'][0]['FeatureGroupName']

# print featuregroup names
print('Image FeatureGroup Name:', image_feature_group_name)
print('Label FeatureGroup Name:', label_feature_group_name)

Image FeatureGroup Name: image-feature-group-07-13-08-39
Label FeatureGroup Name: label-feature-group-07-13-08-39


In [31]:
# specify record identifiers (i.e., sample ids) from previous catalog query
record_identifiers_value = df_catalog_query['sample_id'].values.astype(str).tolist()

# query image featuregroup by using record_id as primary key
df_image_feature_group = featurestore_runtime.batch_get_record(
    Identifiers=[
        {
            'FeatureGroupName': image_feature_group_name,
            'RecordIdentifiersValueAsString':record_identifiers_value,
        }
    ]
)

# query label featuregroup by using record_id as primary key
df_label_feature_group = featurestore_runtime.batch_get_record(
    Identifiers=[
        {
            'FeatureGroupName': label_feature_group_name,
            'RecordIdentifiersValueAsString': record_identifiers_value,
        }
    ]
)

### Display Image FeatureGroup Records

In [32]:
# create list of image records and feature names, exclude eventtime
image_records = [sample['Record'] for sample in df_image_feature_group['Records']]
image_feature_names = [feature['FeatureName'] for feature in image_records[0] if feature['FeatureName'] != 'EventTime']

# create list of image data
image_data = [image_feature_names]

# iterate through each record in image featuregroup
for record in image_records:
    
    # iterate through each feature in individual record
    image_data.append([feature['ValueAsString'] for feature in record if feature['FeatureName'] != 'EventTime'])
                       
# create images dataframe
df_images = pd.DataFrame(image_data[1:], columns=image_data[0])
                       
# display dataframe head
df_images.head()

Unnamed: 0,sample_id,img_format,img_mode,img_height,img_width
0,12,JPEG,RGB,640,640
1,6,JPEG,RGB,640,640
2,16,JPEG,RGB,640,640
3,2,JPEG,RGB,640,640
4,11,JPEG,RGB,640,640


### Display Label FeatureGroup Records

In [33]:
# create list of label records and feature names, exclude eventtime
label_records = [sample['Record'] for sample in df_label_feature_group['Records']]
label_feature_names = [feature['FeatureName'] for feature in label_records[0] if feature['FeatureName'] != 'EventTime']

# create list of label data
label_data = [label_feature_names]

# iterate through each record in label featuregroup
for record in label_records:
    
    # iterate through each feature in individual record
    label_data.append([feature['ValueAsString'] for feature in record if feature['FeatureName'] != 'EventTime'])
                       
# create labels dataframe
df_labels = pd.DataFrame(label_data[1:], columns=label_data[0])
                       
# display dataframe head
df_labels.head()

Unnamed: 0,sample_id,count_helmet,count_vest,count_head
0,12,4,0,0
1,6,0,0,5
2,16,1,0,0
3,2,2,1,0
4,11,13,0,0


### Join Records

In [34]:
# merge catalog query and images dataframes, then the resulting with labels dataframe
df_combined = pd.merge(df_catalog_query, df_images, on='sample_id')
df_combined = pd.merge(df_combined, df_labels, on='sample_id')

# display resulting datafrmae
df_combined.head()

Unnamed: 0,sample_id,img_filename,label_filename,img_format,img_mode,img_height,img_width,count_helmet,count_vest,count_head
0,1,000001.jpg,000001.txt,JPEG,RGB,640,640,2,0,0
1,2,000002.jpg,000002.txt,JPEG,RGB,640,640,2,1,0
2,3,000003.jpg,000003.txt,JPEG,RGB,640,640,4,0,0
3,4,000004.jpg,000004.txt,JPEG,RGB,640,640,0,0,5
4,5,000005.jpg,000005.txt,JPEG,RGB,640,640,2,0,0


## Split Data

In [35]:
# use a copy of dataframe, can manipulate if desired for experimentation
data = df_combined.copy()

# data split in four sets - training, validation, test, and batch inference
rand_split = np.random.rand(len(data))
train_list = rand_split < 0.4
val_list = (rand_split >= 0.4) & (rand_split < 0.5)
test_list = (rand_split >= 0.5) & (rand_split < 0.6)
batch_list = rand_split >= 0.6 # "production" data

# print data splits
print('Data Splits:')
print('------------')
print(f"Train :   {sum(train_list)} samples")
print(f"Val   :   {sum(val_list)} samples")
print(f"Test  :   {sum(test_list)} samples")
print(f"Batch :   {sum(batch_list)} samples")

Data Splits:
------------
Train :   9 samples
Val   :   3 samples
Test  :   0 samples
Batch :   8 samples


## Create Split Datasets in S3

In [36]:
# define and print source s3 locations
s3_images_source = f"s3://{bucket}/{prefix_data}/images/"
s3_labels_source = f"s3://{bucket}/{prefix_data}/labels/"
print('Images source directory location:', s3_images_source)
print('Labels source directory location:', s3_labels_source, '\n')

# define and print destination s3 location for data splits
s3_split_dest = f"s3://{bucket}/{prefix_data}/split/"
print('Split destination directory location:', s3_split_dest)

Images source directory location: s3://sagemaker-us-east-1-752648173624/safety/data/images/
Labels source directory location: s3://sagemaker-us-east-1-752648173624/safety/data/labels/ 

Split destination directory location: s3://sagemaker-us-east-1-752648173624/safety/data/split/


In [37]:
# define function to copy files for respective data splits to corresponding s3 destinations
# provide 'split_name' as either 'train', 'val', 'test', or 'batch'
# provide 'split_list' as either 'train_list', 'val_list', 'test_list', or 'batch_list'
def split_dataset(split_name, split_list):

    # iterate through each sample in split
    for index, sample in data[split_list].iterrows():

        # source/destination variables for individual sample
        cp_image_source = f"{s3_images_source}{sample['img_filename']}"
        cp_image_dest = f"{s3_split_dest}{split_name}/images/"
        cp_label_source = f"{s3_labels_source}{sample['label_filename']}"
        cp_label_dest = f"{s3_split_dest}{split_name}/labels/"

        # copy from source to destination
        !aws s3 cp $cp_image_source $cp_image_dest
        !aws s3 cp $cp_label_source $cp_label_dest

In [38]:
# perform data copies
print('Beginning TRAIN data split copies.')
split_dataset(split_name='train', split_list=train_list)
print('Completed TRAIN data split copies.\n')

print('Beginning VAL data split copies.')
split_dataset(split_name='val', split_list=val_list)
print('Completed VAL data split copies.\n')

print('Beginning TEST data split copies.')
split_dataset(split_name='test', split_list=test_list)
print('Completed TEST data split copies.\n')

print('Beginning BATCH data split copies.')
split_dataset(split_name='batch', split_list=batch_list)
print('Completed BATCH data split copies.')

Beginning TRAIN data split copies.
copy: s3://sagemaker-us-east-1-752648173624/safety/data/images/000002.jpg to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/images/000002.jpg
copy: s3://sagemaker-us-east-1-752648173624/safety/data/labels/000002.txt to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/labels/000002.txt
copy: s3://sagemaker-us-east-1-752648173624/safety/data/images/000003.jpg to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/images/000003.jpg
copy: s3://sagemaker-us-east-1-752648173624/safety/data/labels/000003.txt to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/labels/000003.txt
copy: s3://sagemaker-us-east-1-752648173624/safety/data/images/000004.jpg to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/images/000004.jpg
copy: s3://sagemaker-us-east-1-752648173624/safety/data/labels/000004.txt to s3://sagemaker-us-east-1-752648173624/safety/data/split/train/labels/000004.txt
copy: s3://sagemaker-us

# Training Job and Model Creation

## Create and Run Training Job

In [39]:
%%time

# define job name and output location
job_name = 'yolov8-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
output_location = "s3://{}/{}/output/{}".format(bucket, prefix_data, job_name)

# build a PyTorch estimator
pytorch_estimator = PyTorch(
    role=role,
    entry_point='train.py', # custom training script, locate in code directory
    framework_version='2.0.1', # training - CPU - Python 3.10
    py_version='py310',
    source_dir='./code',
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=output_location,
    sagemaker_session=sess,
    hyperparameters = {'data': 'data.yaml', # yaml config file for custom dataset
                       'epochs': 5, # number of training epochs
                       'yolo_model': 'yolov8n.pt', # pretrained base model
                       'saved_model_name': 'benchmark_model.pt' # name for model export
                      }
)

# s3 location where training data is saved
inputs = s3_split_dest[:-1]

# begin training job
pytorch_estimator.fit(inputs=inputs, job_name=job_name, logs='All')

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: yolov8-2024-02-11-19-09-42


2024-02-11 19:09:43 Starting - Starting the training job...
2024-02-11 19:09:58 Starting - Preparing the instances for training......
2024-02-11 19:11:08 Downloading - Downloading input data...
2024-02-11 19:11:38 Downloading - Downloading the training image......
2024-02-11 19:12:23 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2024-02-11 19:12:30,423 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2024-02-11 19:12:30,424 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-02-11 19:12:30,424 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-02-11 19:12:30,434 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2024-02-11 19:12:30,436 sagem

## Batch Transform

In [None]:
# create a transform job...

In [43]:
%%time
from sagemaker.pytorch.model import PyTorchModel # PyTorch model

# define paths
model_data = f"{output_location}/{job_name}/output/model.tar.gz" # trained model artifacts
transformer_input = f"{s3_split_dest}batch/images" # batch directory for input data
transformer_output = f"{output_location}/{job_name}/output/transformer" # transformer job results

# create a PyTorch model
pytorch_model = PyTorchModel(
    model_data=model_data, # trained model artifacts
    role=role,
    entry_point='inference.py', # custom inference script, locate in code directory
    framework_version='2.1.0', # CPU - Python 3.10
    py_version='py310',
    source_dir='code',
    sagemaker_session=sess
)

# create a transformer from the PyTorch model
transformer = pytorch_model.transformer(
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=transformer_output,
    accept='application/json',
    max_payload=10
)

# begin batch transform job
transformer.transform(
    data=transformer_input,
    content_type='image/jpeg'
)

# wait for job to complete
transformer.wait()

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-752648173624/safety/data/output/yolov8-2024-02-11-19-09-42/yolov8-2024-02-11-19-09-42/output/model.tar.gz), script artifact (code), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-east-1-752648173624/pytorch-inference-2024-02-11-19-16-12-669/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: pytorch-inference-2024-02-11-19-16-19-361
INFO:sagemaker:Creating transform job with name: pytorch-inference-2024-02-11-19-16-20-093


..............................[34mCollecting ultralytics==8.1.9 (from -r /opt/ml/model/code/requirements.txt (line 1))
  Downloading ultralytics-8.1.9-py3-none-any.whl.metadata (40 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.2/40.2 kB 2.8 MB/s eta 0:00:00[0m
[34mCollecting py-cpuinfo (from ultralytics==8.1.9->-r /opt/ml/model/code/requirements.txt (line 1))
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)[0m
[34mCollecting thop>=0.1.1 (from ultralytics==8.1.9->-r /opt/ml/model/code/requirements.txt (line 1))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)[0m
[34mCollecting seaborn>=0.11.0 (from ultralytics==8.1.9->-r /opt/ml/model/code/requirements.txt (line 1))
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)[0m
[35mCollecting ultralytics==8.1.9 (from -r /opt/ml/model/code/requirements.txt (line 1))
  Downloading ultralytics-8.1.9-py3-none-any.whl.metadata (40 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.2/40.2 kB 2.8 MB

KeyboardInterrupt: 

In [None]:
# define s3 client
s3_client = boto3.client("s3", region_name=region)

# define prefix for transformer job output
transformer_output_s3_prefix = transformer_output.replace(
    f"s3://{bucket}/", ""
)

# get job output files from bucket
objects_in_bucket = s3_client.list_objects(
    Bucket=bucket, Prefix=transformer_output_s3_prefix
)

# define function to convert an s3 file to dataframe
def s3_file_to_df(key):
    
    # get single s3 file
    s3_file = s3_client.get_object(Bucket=bucket, Key=key)
    
    # decode, read, and create list of json strings
    body = s3_file['Body'].read().decode('utf-8')
    json_strings = json.loads(body)
    
    # create list for data to create dataframe
    data_list = []

    # parse json strings and append to data list
    for json_str in json_strings:
        json_obj = json.loads(json_str)
        data_list.append(json_obj)
        
    # create dataframe
    df = pd.DataFrame(data_list)
    
    # insert column for sample id, obtained from s3 filename
    df.insert(0, 'sample_id', key.split("/")[-1].split(".")[0])
    return df

# create list to store dataframes for concatenation
df_list = []

# iterate through each object in s3 bucket
for obj in objects_in_bucket.get('Contents', []):
    
    # get filename
    s3_filename = obj['Key']

    # skip over any files without expected '.out' file extension
    if s3_filename.endswith('.out'):

        # call function to create dataframe of current file contents
        df_current_file = s3_file_to_df(s3_filename)
        
        # insert column for timestamp
        df_current_file.insert(0, 'timestamp', obj['LastModified'])

        # append to dataframe list for concatenation
        df_list.append(df_current_file)

# concatenate dataframes to create end result
df_batch_transform = pd.concat(df_list, ignore_index=True)

# convert class id to int
df_batch_transform['class_id'] = df_batch_transform['class_id'].astype(int)

# display dataframe
df_batch_transform

# Example Business Problem Query

Query the dataframe to obtain number of detections of each class we are interested in.

In [None]:
# create new dataframe copy
df_business_query = df_batch_transform.copy()

# add a date column in YYYY-MM-DD format derived from timestamp
df_business_query['date'] = df_batch_transform['timestamp'].dt.date

# create new dataframe with business query results
df_business_query = pd.DataFrame(df_business_query.groupby(['date', 'class_id', 'class_name']).size().reset_index(name='detections'))

# filter out class id of 1 for 'vest'
df_business_query = df_business_query.query('class_id != 1')

# display dataframe
df_business_query

# Monitors

In [None]:
# imports
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat
from sagemaker import get_execution_role, session, Session
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import CronExpressionGenerator

### Create Baseline
Create a baseline job that compares your model predictions with ground truth labels in a baseline dataset that you have stored in Amazon S3.

In [None]:
baseline_data_uri = f"{s3_split_dest}batch/images" # validation data
baseline_results_uri = f"{output_location}/{job_name}/output/transformer" # predicted data
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

In [None]:
# combination of validatioon and predicted data
baseline_dataset_uri = S3Uploader.upload(f"test_data/{validate_dataset}", baseline_data_uri)
baseline_dataset_uri

In [None]:
baseline_job_name = "MyBaseLineJob"

In [None]:
role = get_execution_role()
session = Session()

my_default_monitor = DefaultModelMonitor(
    role=sagemaker.get_execution_role(), # role
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=session
)

# Execute the baseline suggestion job.
baseline_job_name = "MyBaseLineJob"
job = model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri, # The S3 location of the validation dataset.
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri = baseline_results_uri, # The S3 location to store the results.
    problem_type='Classification',
    inference_attribute= "class_name", # The column in the dataset that contains predictions.
    probability_attribute= "confidence", # The column in the dataset that contains probabilities.
    ground_truth_attribute= "??????" # The column in the dataset that contains ground truth labels.
)
job.wait(logs=False)

In [None]:
# constraints that the job generated
baseline_job = model_quality_monitor.latest_baselining_job

### Implement model monitors on your ML system
Quality

In [None]:
model_quality_model_monitor = ModelQualityMonitor(
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
)

schedule = model_quality_model_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    batch_transform_input=BatchTransformInput(
        data_captured_destination_s3_uri=s3_capture_upload_path,
        destination="/opt/ml/processing/input", 
        dataset_format=MonitoringDatasetFormat.csv(header=False),
        # the column index of the output representing the inference probablity
        probability_attribute="0",
        # the threshold to classify the inference probablity to class 0 or 1 in 
        # binary classification problem
        probability_threshold_attribute=0.5,
        # look back 6 hour for transform job outputs.
        start_time_offset="-PT6H",
        end_time_offset="-PT0H"
    ),
    ground_truth_input=gt_s3_uri,
    output_s3_uri=s3_report_path,
    problem_type="Classification",
    constraints = baseline_job.suggested_constraints(),
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

### Implement data monitors on your ML system.
Quality

In [None]:
data_quality_model_monitor = DefaultModelMonitor(
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
)

schedule = data_quality_model_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    batch_transform_input=BatchTransformInput(
        data_captured_destination_s3_uri=s3_capture_upload_path,
        destination="?????", #/opt/ml/processing/input
        dataset_format=MonitoringDatasetFormat.csv(header=False),
    ),
    output_s3_uri=s3_report_path,
    statistics= statistics_path,
    constraints = baseline_job.suggested_constraints(),
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

### Implement infrastructure monitors on your ML system.

### Create a monitoring dashboard for your ML endpoint/job on CloudWatch.

In [None]:
# Create CloudWatch client
cw_client = boto3.Session().client("cloudwatch")

namespace = "aws/sagemaker/Endpoints/model-metrics"

cw_dimensions = [

]

In [None]:
# Create an Alarm
alarm_name = "MODEL_QUALITY_F2_SCORE"
alarm_desc = (
    "Trigger an CloudWatch alarm when the f2 score drifts away from the baseline constraints"
)
mdoel_quality_f2_drift_threshold = (
    0.625  ##Setting this threshold purposefully low to see the alarm quickly.
)
metric_name = "f2"
namespace = "?????"

cw_client.put_metric_alarm(
    AlarmName=alarm_name,
    AlarmDescription=alarm_desc,
    ActionsEnabled=True,
    MetricName=metric_name,
    Namespace=namespace,
    Statistic="Average",
    Dimensions=[

    ],
    Period=600,
    EvaluationPeriods=1,
    DatapointsToAlarm=1,
    Threshold=mdoel_quality_f2_drift_threshold,
    ComparisonOperator="LessThanOrEqualToThreshold",
    TreatMissingData="breaching",
)

### Generate model and data reports on SageMaker.

from the frontend