# Demo: Clarify and dense format JSONLines

In [6]:
!curl -OL https://w.amazon.com/bin/download/Deep-engine/Thundera/Onboarding/MinimumBringYourOwnContainer/WebHome/byoc.tar.gz

    
!cat byoc.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   165  100   165    0     0   2323      0 --:--:-- --:--:-- --:--:--  2323
100   298    0   298    0     0   2422      0 --:--:-- --:--:-- --:--:--  2422
{"status":"error","message":"Unauthenticated","desc":"You should authenticate (may use mwinit)","step_up_methods":[{"cap_name":"Midway Authentication","cap_guid":"com.amazon.aea.midway.LDAPSuccess","cap_url":"https://midway-auth.amazon.com/login","cap_display_string":"Please login with Midway."}]}

In [1]:
from sagemaker import Session
session = Session()
bucket = session.default_bucket()
prefix = 'sagemaker/DEMO-sagemaker-clarify'
region = session.boto_region_name
# Define IAM role
from sagemaker import get_execution_role
from sagemaker.s3 import S3Uploader
import pandas as pd
import numpy as np
import urllib
import os

role = get_execution_role()

# Model

Please make sure that you have the [simple-star-rating image](https://w.amazon.com/bin/view/Deep-engine/Thundera/Onboarding/MinimumBringYourOwnContainer/#HBYOC) built & pushed to the same region/account that runs this notebook.

Train a dummy model,

In [None]:
import sagemaker

role = sagemaker.get_execution_role()
session = sagemaker.Session()

account = session.boto_session.client("sts").get_caller_identity()["Account"]
region = session.boto_session.region_name
image = f"{account}.dkr.ecr.{region}.amazonaws.com/simple-star-rating:latest"

estimator = sagemaker.estimator.Estimator(
    image_uri=image,
    role=role,
    instance_count=1,
    instance_type="ml.c4.xlarge",
    sagemaker_session=session,
)

estimator.fit()

Create SageMaker model

In [None]:
model_name = 'simple-start-rating-model'
model = estimator.create_model(name=model_name)
container_def = model.prepare_container_def()
session.create_model(model_name,
                     role,
                     container_def)

## Bias analysis

In [54]:
from sagemaker import clarify
clarify_processor = clarify.SageMakerClarifyProcessor(role=role,
                                                      instance_count=1,
                                                      instance_type='ml.c4.xlarge',
                                                      sagemaker_session=session)

This is a workaround. TODO: Remove this cell and next cell when [the JSONLines fix](https://sim.amazon.com/issues/P43801935) is released (tracking SIM: https://sim.amazon.com/issues/P43584479)

In [55]:
# patch image in Thundera devo account. It is the initial launch image plus the JSONLines bug fix
clarify_processor.image_uri = "678264136642.dkr.ecr.us-east-1.amazonaws.com/sagemaker-clarify-processing:1.0_jsonlines_patch"

# new image with the JSONLines bug fix, to be released soon
#clarify_processor.image_uri = "678264136642.dkr.ecr.us-east-1.amazonaws.com/sagemaker-clarify-processing:f988900c"

In [67]:
bias_dataset_uri = S3Uploader.upload('test_data_for_bias_analysis.jsonl', 's3://{}/{}'.format(bucket, prefix))
bias_dataset_uri

's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/test_data_for_bias_analysis.jsonl'

NOTE: The order of headers is important
* The header of label must be the last one
* The order of feature headers should be the same as the order of features

In [57]:
bias_report_output_path = 's3://{}/{}/clarify-bias'.format(bucket, prefix)
bias_data_config = clarify.DataConfig(s3_data_input_path=bias_dataset_uri,
                                      s3_output_path=bias_report_output_path,
                                      label='star_rating',
                                      features='features',
                                      headers=['review_body','product_category','star_rating'],
                                      dataset_type='application/jsonlines')

model_config = clarify.ModelConfig(model_name=model_name,
                                   instance_type='ml.c5.xlarge',
                                   instance_count=1,
                                   accept_type='application/jsonlines',
                                   content_type='application/jsonlines',
                                   content_template='{"features":$features}')

predictions_config = clarify.ModelPredictedLabelConfig(
    label="predicted_label")

In [58]:
bias_config = clarify.BiasConfig(label_values_or_threshold=[3],
                                facet_name='product_category',
                                facet_values_or_threshold=['Digital_Software'])

In [59]:
clarify_processor.run_bias(data_config=bias_data_config,
                           bias_config=bias_config,
                           model_config=model_config,
                           model_predicted_label_config=predictions_config,
                           pre_training_methods='all',
                           post_training_methods='all')


Job Name:  Clarify-Bias-2021-01-27-11-32-23-573
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/test_data_for_bias_analysis.jsonl', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/Clarify-Bias-2021-01-27-11-32-23-573/input/analysis_config/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/clarify-bias', 'LocalPath': '/opt/ml/processing/output', 'S3Upload

## Explainability analysis

In [68]:
explainability_dataset_uri = S3Uploader.upload('test_data_for_explainability_analysis.jsonl', 's3://{}/{}'.format(bucket, prefix))
explainability_dataset_uri

's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/test_data_for_explainability_analysis.jsonl'

NOTE: The format of baseline samples should be the same as dataset samples. By definition, baseline should either be a S3 URI to a data file that includes a list of samples, or an in-place list of samples. In this case we chose the latter, and only put a single sample to the list.

In [61]:
shap_config = clarify.SHAPConfig(baseline=[{"features":["ok", "Digital_Software"]}],
                                 num_samples=10,
                                 agg_method='mean_abs')

explainability_output_path = 's3://{}/{}/clarify-explainability'.format(bucket, prefix)
explainability_data_config = clarify.DataConfig(s3_data_input_path=explainability_dataset_uri,
                                s3_output_path=explainability_output_path,
                                features='features',
                                headers=['review_body','product_category'],
                                dataset_type='application/jsonlines')

In [62]:
clarify_processor.run_explainability(data_config=explainability_data_config,
                                     model_config=model_config,
                                     explainability_config=shap_config,
                                     model_scores="predicted_label")


Job Name:  Clarify-Explainability-2021-01-27-11-45-44-131
Inputs:  [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/test_data_for_explainability_analysis.jsonl', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/Clarify-Explainability-2021-01-27-11-45-44-131/input/analysis_config/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-1-617993403527/sagemaker/DEMO-sagemaker-clarify/clarify-explainability', 'LocalPath'