Installing required libraries for stepfunctions

In [75]:
#  import sys
#  !{sys.executable} -m pip install --upgrade pip
#  !{sys.executable} -m pip install -qU awscli boto3 "sagemaker>=2.0.0"
#  !{sys.executable} -m pip install -qU "stepfunctions>=2.0.0"
#  !{sys.executable} -m pip show sagemaker stepfunctions

# 1. Setting up notebook with parameters and libraries

In [119]:
# importing Python libraies
import stepfunctions
import logging

from stepfunctions.steps import *
from stepfunctions.workflow import Workflow
from stepfunctions import steps
from stepfunctions.inputs import ExecutionInput
from sagemaker.processing import Processor,ProcessingInput, ProcessingOutput
import calendar
import time
import sagemaker
from sagemaker.inputs import TrainingInput
import boto3
from sagemaker.network import NetworkConfig

stepfunctions.set_stream_logger(level=logging.INFO)

import stepfunctions
import logging



### Defining paramters

This needs to be changed if we are taking it to different environment

In [120]:
v_workflow_execution_role = "arn:aws:iam::014257795134:role/ds-mlops-stepfunction-role" # Step function IAM role ARN
v_preprocessing_iam_role = "arn:aws:iam::014257795134:role/ds-mlops-sagemaker-role" # IAM role for preprocessing container
v_preprocessing_instance_type = "ml.m5.xlarge" # Instance type for preprocessing container it changes as per workload
v_s3_input_bucket = "ds-mlops-s3" # S3 bucket for input and output data
v_prefix_for_input_data = "data/input/train_baseline.csv"  # Prefix where data is stored
v_region = 'us-east-1' # AWS region
sec_groups = ["sg-01d629a900f9b4d92"]
subnets = ["subnet-07bd1dfe6aee76227",
           "subnet-076950ecc89d4340b",
           "subnet-0c5a462cb45a14bab"]
config_bucket = "ds-mlops-s3"


## 2. Defining preprocessing jobs

In [121]:
# Defining environment config for baseline jobs
environment = {
                "dataset_format": "{\"csv\":{\"header\": true,\"output_columns_position\": \"START\"}}",
                "dataset_source": "/opt/ml/processing/input/baseline_dataset_input",
                "output_path": "/opt/ml/processing/output",
                "publish_cloudwatch_metrics": "Disabled"
        }


In [122]:
# Here we are creating baseline preprocesor
baseline_processor = Processor(image_uri='156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer',
                     role=v_preprocessing_iam_role,
                     instance_count=1,
                     instance_type=v_preprocessing_instance_type,
                     network_config = NetworkConfig(security_group_ids = sec_groups, subnets = subnets),
                     env=environment)

In [123]:
input_data = "s3://{}/{}".format(v_s3_input_bucket,v_prefix_for_input_data)
inputs = [
    ProcessingInput(
        source=input_data, destination="/opt/ml/processing/input/baseline_dataset_input", input_name="input_data"
    )
]

outputs = [
    ProcessingOutput(
        source="/opt/ml/processing/output",
        destination="s3://{}/{}".format(config_bucket,"customonitor/"),
        output_name="tr_data",
    )
]
gmt = time.gmtime()
ts = calendar.timegm(gmt)
baseline_name = "baseline-{}".format(ts)



In [124]:
print("s3://{}/{}".format(config_bucket,"customonitor"))


s3://ds-mlops-s3/customonitor


In [125]:
print(input_data)

s3://ds-mlops-s3/data/input/train_baseline.csv


In [126]:
baseline_preprocessing_step = steps.ProcessingStep(
    state_id='Baseline', 
    processor=baseline_processor,
    job_name=baseline_name, 
    inputs=inputs, 
    outputs=outputs, 
    experiment_config=None, 
    wait_for_completion=True
)

## 3. Step Function

In [127]:
# First we chain the start pass state,preprocessing_step,
basic_path=Chain([baseline_preprocessing_step])

In [128]:
# Next, we define the workflow
basic_workflow = Workflow(
    name="ds-mlops-baseline-job-v10",
    definition=basic_path,
    role=v_workflow_execution_role
)

#Render the workflow
basic_workflow.render_graph()

## 3.1 Create the workflow on AWS Step Functions

Create the workflow in AWS Step Functions with [create](https://aws-step-functions-data-science-sdk.readthedocs.io/en/latest/workflow.html#stepfunctions.workflow.Workflow.create).

In [129]:
basic_workflow.create()

[32m[INFO] Workflow created successfully on AWS Step Functions.[0m


'arn:aws:states:us-east-1:014257795134:stateMachine:ds-mlops-baseline-job-v10'

In [130]:
basic_workflow.update(definition=basic_workflow.definition,role=basic_workflow.role)

[32m[INFO] Workflow updated successfully on AWS Step Functions. All execute() calls will use the updated definition and role within a few seconds. [0m


'arn:aws:states:us-east-1:014257795134:stateMachine:ds-mlops-baseline-job-v10'

In [131]:
basic_workflow_execution = basic_workflow.execute(
    inputs={
    }
)

[32m[INFO] Workflow execution started successfully on AWS Step Functions.[0m


## 3.2 Review the execution progress

Render workflow progress with the [render_progress](https://aws-step-functions-data-science-sdk.readthedocs.io/en/latest/workflow.html#stepfunctions.workflow.Execution.render_progress).

This generates a snapshot of the current state of your workflow as it executes. This is a static image. Run the cell again to check progress. 

In [132]:
basic_workflow_execution.render_progress()

## 4 Downloading generated report on notebook

In [73]:
# Downloading contraint file for evaluation
!aws s3 cp s3://$config_bucket/monitoring/constraints.json .

download: s3://ds-mlops-s3/monitoring/constraints.json to ./constraints.json


In [74]:
# Dowloading stats file for evaluation
!aws s3 cp s3://$config_bucket/monitoring/statistics.json .

fatal error: An error occurred (404) when calling the HeadObject operation: Key "monitoring/statistics.json" does not exist


We can perform analysis on this file and put it on git hub

**Note :**
In order to refer it for modified location in monitoring schedule lambda function we need to changes the baseline stats location to prefix custom_monitoring/ from monitoring


In [99]:
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat
input_s3_path='s3://ds-mlops-s3/data/input/train_baseline.csv'
output_s3_path='s3://ds-mlops-s3/monitoring/'
my_default_monitor = DefaultModelMonitor(
role=v_preprocessing_iam_role,
instance_count=1,
instance_type="ml.m5.xlarge",
volume_size_in_gb=20,
max_runtime_in_seconds=3600,
)



my_default_monitor.suggest_baseline(
baseline_dataset=input_s3_path,#'s3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/baselining/data/training-dataset-with-header.csv',
dataset_format=DatasetFormat.csv(header=True),
output_s3_uri=output_s3_path,#'s3://wipcoe-datalake-init-s3-mlops-config/customonitor',
wait=True,
)


Job Name:  baseline-suggestion-job-2021-08-06-17-00-40-570
Inputs:  [{'InputName': 'baseline_dataset_input', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://ds-mlops-s3/data/input/train_baseline.csv', 'LocalPath': '/opt/ml/processing/input/baseline_dataset_input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'monitoring_output', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://vw-cred-dsna-dev-s3-mlops-config/monitoring/', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]
...........................[34m2021-08-06 17:04:54,784 - __main__ - INFO - All params:{'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:014257795134:processing-job/baseline-suggestion-job-2021-08-06-17-00-40-570', 'ProcessingJobName': 'baseline-suggestion-job-2021-08-06-17-00-40-570', 'Environment': {'dataset_format': '{"csv": {"header": true, "output_columns_position": "START"}}', 'dataset

<sagemaker.processing.ProcessingJob at 0x7fa6b6ac7eb8>

## Clean-up steps

https://docs.aws.amazon.com/sagemaker/latest/dg/ex1-cleanup.html

In [None]:
# # Clean up end point
# client = boto3.client("sagemaker", region_name=region)
# response=client.delete_endpoint(EndpointName=endpoint_name)