# CloudWatch

In [12]:
%%time
import boto3
import pandas as pd
from datetime import datetime, timedelta, timezone


import sagemaker
from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.huggingface.model import HuggingFaceModel

CPU times: user 4.73 ms, sys: 0 ns, total: 4.73 ms
Wall time: 8.43 ms


In [5]:
##S3 prefixes
prefix = "sagemaker/AIEmotions-ModelQualityMonitor"
bucket_name = 'aai-540-final-data'
region_name = 'us-west-2'
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket_name}/{data_capture_prefix}"


ground_truth_upload_path = (
    f"s3://{bucket_name}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket_name}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region_name)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Image URI: 159807026194.dkr.ecr.us-west-2.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://aai-540-final-data/sagemaker/AIEmotions-ModelQualityMonitor/datacapture
Ground truth path: s3://aai-540-final-data/sagemaker/AIEmotions-ModelQualityMonitor/ground_truth_data/2024-02-09-14-34-05
Report path: s3://aai-540-final-data/sagemaker/AIEmotions-ModelQualityMonitor/reports


In [13]:
#Retrieve Model
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
s3 = boto3.client('s3')
model_dir = 'models/'
tar_file = 'model.tar.gz'


model_name = f"AIEmotion-base-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

model_data = f's3://{bucket_name}/{model_dir}{tar_file}'
tensorflow_version = '2.6.3'
transformers_version='4.17.0'
py_version = 'py38'
huggingface_model = HuggingFaceModel(model_data=model_data,
                                     role=role,
                                     transformers_version=transformers_version,
                                     tensorflow_version=tensorflow_version,
                                     py_version=py_version,
                                     entry_point='inference.py'
                                   )

In [None]:
endpoint_name = f"AIEmotion-base-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("EndpointName =", endpoint_name)

data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.g4dn.xlarge',
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

In [None]:
from sagemaker.predictor import Predictor

predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=session, serializer=CSVSerializer()
)

In [None]:
# Predictions as baseline dataset

baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket_name}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket_name}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

In [15]:
import io
s3_path = 'data/pre_processed_data.tsv'
data_obj = s3.get_object(Bucket=bucket_name, Key=s3_path)
df = pd.read_csv(io.BytesIO(data_obj['Body'].read()), delimiter='\t')
df.to_csv()

Unnamed: 0,text,emotions,id
0,"He isn't as big, but he's still quite popular....",0,eczuekb
1,that's adorable asf,0,ef961hv
2,"I have, and now that you mention it, I think t...",27,ed9w1hm
3,"I wanted to downvote this, but it's not your f...",27,ee52cjs
4,Build a wall? /jk,27,edsqvyx


In [None]:
baseline_dataset_uri = S3Uploader.upload(f"test_data/{validate_dataset}", baseline_data_uri)
baseline_dataset_uri