# Now let's monitor the training/deploying process

In [None]:
!pip install tqdm

In [None]:
import boto3
import json
import ipywidgets as widgets
import time

from IPython.display import display

## Helper functions

In [None]:
def get_actions():
    actions = []
    executionId = None
    resp = codepipeline.get_pipeline_state( name=pipeline_name )
    for stage in resp['stageStates']:
        stageName = stage['stageName']
        stageStatus = None
        if stage.get('latestExecution') is not None:
            stageStatus = stage['latestExecution']['status']
            if executionId is None:
                executionId = stage['latestExecution']['pipelineExecutionId']
            elif stage['latestExecution']['pipelineExecutionId'] != executionId:
                stageStatus = 'Old'
        for action in stage['actionStates']:
            actionName = action['actionName']
            actionStatus = 'Old'
            if action.get('latestExecution') is not None and stageStatus != 'Old':
                actionStatus = action['latestExecution']['status']
            actions.append( {'stageName': stageName, 
                             'stageStatus': stageStatus, 
                             'actionName': actionName, 
                             'actionStatus': actionStatus})
    return actions

In [None]:
def get_approval_token():
    resp = codepipeline.get_pipeline_state( name=pipeline_name )
    token = None
    # Get the approve train status token
    for stageState in resp['stageStates']:
        if stageState['stageName'] == 'DeployDev':
            for actionState in stageState['actionStates']:
                if actionState['actionName'] == 'ApproveDeploy':
                    if actionState.get('latestExecution') is None:
                        return None
                    latestExecution = actionState['latestExecution']
                    if latestExecution['status'] == 'InProgress':
                        token = latestExecution['token']
    return token

In [None]:
def approval(token, result):
    if token is None:
        return
    
    codepipeline.put_approval_result(
      pipelineName=pipeline_name,
      stageName='DeployDev',
      actionName='ApproveDeploy',
      result=result,
      token=token
    )

In [None]:
def approve(b):
    result={
        'summary': 'This is a great model! Put into production.',
        'status': 'Approved'
    }
    approval(get_approval_token(), result) 
    button_box.close()
    start_monitoring()

In [None]:
def reject(b):
    result={
        'summary': 'This is a rubbish model. Discard it',
        'status': 'Rejected'
    }
    approval(get_approval_token(), result)
    button_box.close()
    start_monitoring()

In [None]:
def start_monitoring():
    global button_box
    
    running = True
    while running:
        steps_ok = 0
        for k,action in enumerate(get_actions()):
            if action['actionStatus'] == 'Failed':
                bar.bar_style='danger'
                label.value='Ops! Something went wrong Stage[{}] Action[{}]'.format(
                    action['stageName'], action['actionName'])
                running = False
                return

            elif action['actionStatus'] == 'InProgress':
                if get_approval_token() is not None:
                    display(button_box)
                    running = False
                break
            elif action['actionStatus'] == 'Old':
                break
            elif action['actionStatus'] == 'Succeeded':
                steps_ok += 1
        
        label.value = "Actions {}/{} - Current: Stage[{}] Action[{}]".format( 
                k+1,max_actions, action['stageName'], action['actionName'] )
        bar.value = steps_ok

        if steps_ok == max_actions:
            running = False
        else:    
            time.sleep(2)

## Job monitoring

In [None]:
import os

codepipeline = boto3.client('codepipeline')
pipeline_name = os.environ['PIPELINE_NAME']
model_name = os.environ['MODEL_NAME']

print('pipeline: {}'.format(pipeline_name))
print('model name: {}'.format(model_name))

In [None]:
approve_btn = widgets.Button(description="Approve", button_style='success', icon='check')
reject_btn = widgets.Button(description="Reject", button_style='danger', icon='close')
approve_btn.on_click(approve)
reject_btn.on_click(reject)
button_box = widgets.HBox([approve_btn, reject_btn])
                
max_actions = len(get_actions())
label = widgets.Label(value="Loading...")
bar = widgets.IntProgress( value=0, min=0, max=max_actions, step=1, bar_style='info' )
info_box = widgets.VBox([label, bar])

display(info_box)
start_monitoring()

## Now, if everything went fine, we can test our models

In [None]:
# Get the current execution id, and production endpoints
response = codepipeline.get_pipeline_state( name=pipeline_name )
executionId = response['stageStates'][-1]['latestExecution']['pipelineExecutionId']

endpoint_name='mlops-{}-prd-{}'.format(model_name, executionId)
processing_job_name='mlops-{}-pbl-{}'.format(model_name, executionId)
schedule_name='mlops-{}-pms-{}'.format(model_name, executionId)

print('execution id: {}'.format(executionId))

In [None]:
#executionId = '46d4a3c0-517c-4e77-a2b7-3f84cb6e4738'
endpoint_name='mlops-{}-prd-{}'.format(model_name, executionId)
processing_job_name='mlops-{}-pbl-{}'.format(model_name, executionId)
schedule_name='mlops-{}-pms-{}'.format(model_name, executionId)

Call the endpoint with some expected, and unexpected data

In [None]:
sm_runtime = boto3.client('sagemaker-runtime')

def test_endpoint(endpoint_name, payload, content_type='text/csv', custom_attributes=''):
    resp = sm_runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=payload,
        ContentType=content_type,
        CustomAttributes=custom_attributes
    )
    return resp['Body'].read()

In [None]:
# Validate that we can send some traffic to the end point
test_endpoint(endpoint_name, 'text\nthis is a test'.encode('utf-8'))

In [None]:
import pandas as pd

# Load sample data
monitor_sample = [
# Define some typical data
    'cool asian food pty melbourne au', # eathing out
    'woolworths 3188 brunswick au', # groceries
    'airbnb * blaa surry hills au', # travel
    'lido cinemas hawthorn au', # entertainment
    'northcote indoor spo thornbury au', # health
# Define some data which is out of bounds of usual sample
    'one',
    '1',
    'fdkslfjkdlsjfkdsfkldjsklfmdskfjkdlsjfkldsjfkldsjfkldsjklfjsdkjfklds',
    'this is a very long sentance that should skew the character and word count',
]

# Send off a series of invidual requests for each sample
from tqdm import tqdm
for i in tqdm(range(100)):
    for sample in monitor_sample:
        payload = 'text\n{}'.format(sample).encode('utf-8')
        test_endpoint(endpoint_name, payload).decode('utf-8')

## Load baseline

Load baseline processing job

In [None]:
import boto3
import sagemaker
from sagemaker.model_monitor import BaseliningJob
from sagemaker.model_monitor import MonitoringExecution
from sagemaker.s3 import S3Downloader
import pandas as pd

s3 = boto3.client('s3')
sm = boto3.client('sagemaker')

sagemaker_session = sagemaker.Session()

In [None]:
baseline_job = BaseliningJob.from_processing_name(sagemaker_session, processing_job_name)
status = baseline_job.describe()['ProcessingJobStatus']
if status != 'Stopped':
    raise(Exception('Processing job not complete, status: {}'.format(status)))
    
baseline_results_uri  = baseline_job.outputs[0].destination
print('baseline results uri: {}'.format(baseline_results_uri))

### Explore the generated constraints and statistics

In [None]:
schema_df = pd.io.json.json_normalize(baseline_job.baseline_statistics().body_dict["features"])
schema_df

In [None]:
constraints_df = pd.io.json.json_normalize(baseline_job.suggested_constraints().body_dict["features"])
constraints_df

### View Data Capture

In [None]:
bucket = sagemaker_session.default_bucket()
data_capture_prefix = '{}/datacapture/{}/AllTraffic/'.format(model_name, endpoint_name)
print('data capture prefix: {}'.format(data_capture_prefix))

In [None]:
# Get capture files for this new endpoint
result = s3.list_objects(Bucket=bucket, Prefix=data_capture_prefix)
if not 'Contents' in result:
    raise(Exception('No results vailable yet for location: {}'.format(results_prefix)))
else:
    capture_files = ['s3://{0}/{1}'.format(bucket, capture_file.get("Key")) 
                     for capture_file in result.get('Contents')][::-1]
    print("Captured Files: {}, top 3:".format(len(capture_files)))
    print("\n ".join(capture_files[:3]))

In [None]:
!mkdir -p output/datacapture
!aws s3 cp {capture_files[1]} output/datacapture/captured_data_example.jsonl

In [None]:
import json

with open('output/datacapture/captured_data_example.jsonl', 'r') as f:
    lines = f.read().split('\n')
    event = json.loads(lines[0])
    print(event)

### Monitoring Schedule

The functions for plotting and rendering distribution statistics or constraint violations are implemented in a `utils` file so let's grab that.

In [None]:
!wget https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker_model_monitor/visualization/utils.py
import utils as mu

Load the last succesful monitoring schedule

In [None]:
response = sm.list_monitoring_executions(MonitoringScheduleName=schedule_name)
schedules = [m for m in response['MonitoringExecutionSummaries'] if m['MonitoringExecutionStatus'] == 'Stopped']
if len(schedules) == 0:
    raise(Exception('No completed schedules'))
    
schedule = schedules[0]   
print('Schedule status: {}'.format(schedule['MonitoringExecutionStatus']))

Inspect the underlying processing job

In [167]:
from sagemaker.processing import ProcessingJob
schedule_processing_job = ProcessingJob.from_processing_arn(sagemaker_session, processing_job_arn)
schedule_processing_job.describe()

{'ProcessingInputs': [{'InputName': 'input_1',
   'S3Input': {'S3Uri': 's3://sagemaker-ap-southeast-2-691313291965/text-multiclass/datacapture/mlops-text-multiclass-prd-b4e2720b-3d39-41c3-8fb3-52e05fcfbbe4/AllTraffic/2020/03/03/11',
    'LocalPath': '/opt/ml/processing/endpointdata/mlops-text-multiclass-prd-b4e2720b-3d39-41c3-8fb3-52e05fcfbbe4/AllTraffic/2020/03/03/11',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}},
  {'InputName': 'baseline',
   'S3Input': {'S3Uri': 's3://sagemaker-ap-southeast-2-691313291965/text-multiclass/baselining/results/statistics.json',
    'LocalPath': '/opt/ml/processing/baseline/stats',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated'}},
  {'InputName': 'constraints',
   'S3Input': {'S3Uri': 's3://sagemaker-ap-southeast-2-691313291965/text-multiclass/baselining/results/constraints.json',
    'LocalPath': '/opt

Load monitoring execution and list the results

In [168]:
processing_job_arn = schedule['ProcessingJobArn']
execution = MonitoringExecution.from_processing_arn(sagemaker_session=sagemaker.Session(), processing_job_arn=processing_job_arn)
exec_inputs = {inp['InputName']: inp for inp in execution.describe()['ProcessingInputs']}
exec_results = execution.output.destination

In [169]:
!aws s3 ls $exec_results

## Overview

The code below shows the violations and constraichecks across all features in a simple table.

In [None]:
latest_statistics, violations = execution.statistics(), execution.constraint_violations()
mu.show_violation_df(baseline_statistics=baseline_statistics, latest_statistics=execution_statistics, violations=violations)

## Distributions

This section visualizes the distribution and renders the distribution statistics for all features

In [None]:
features = mu.get_features(execution_statistics)
feature_baselines = mu.get_features(baseline_statistics)

In [None]:
mu.show_distributions(features)

### Execution Stats vs Baseline

In [None]:
mu.show_distributions(features, feature_baselines)