In [3]:
# cell 01
from io import StringIO
import os
import time
import sys
import IPython
from time import gmtime, strftime

import boto3
import numpy as np
import pandas as pd
import urllib

import sagemaker
from sagemaker.s3 import S3Uploader
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost import XGBoost
from sagemaker.s3 import S3Downloader
from sagemaker.s3 import S3Uploader
from sagemaker import Session
from sagemaker import get_execution_role
from sagemaker.xgboost import XGBoostModel
from sagemaker.sklearn import SKLearnModel
from sagemaker.pipeline import PipelineModel


session = Session()
bucket = session.default_bucket()
prefix = "hybrid_approach"
region = session.boto_region_name

# Define IAM role
role = get_execution_role()

Captured inference data merged with groundtruth label. Label is "credit_risk"

In [34]:
# Captured inference data merged with groundtruth label. Label is "credit_risk".
# Upload to s3
captured_data="dataset/captured_data.csv"
captured_inference_features_and_groundtruth = S3Uploader.upload(captured_data, "s3://{}/{}/data/test".format(bucket, prefix))
print(captured_inference_features_and_groundtruth)

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


s3://sagemaker-us-east-1-259508681668/hybrid_approach/data/test/captured_data.csv


Captured predicted label from endpoint after running the inference data above (excluding the groundtruth label)

In [35]:
# Predicted label captured from model inference. Label is "credit_risk".
# Upload to s3
predictions="dataset/prediction.csv"
captured_predicted_label = S3Uploader.upload(predictions, "s3://{}/{}/data/test".format(bucket, prefix))
print(captured_predicted_label)

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


s3://sagemaker-us-east-1-259508681668/hybrid_approach/data/test/prediction.csv


In [33]:
# Train data Upload to s3
train="dataset/train.csv"
train_dataset = S3Uploader.upload(train, "s3://{}/{}/data/train".format(bucket, prefix))
print(train_dataset)

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


s3://sagemaker-us-east-1-259508681668/hybrid_approach/data/train/train.csv


In [15]:
# This example uses a pipeline model, where one models does preprocessing on the inference and passes to the other model for prediction.
# Upload bothe models to s3
preprocessor_model_data=S3Uploader.upload("model_sklearn/model.tar.gz", "s3://{}/{}/data/train".format(bucket, prefix))
xgboost_model_data=S3Uploader.upload("model_xgboost/xgb_model.tar.gz", "s3://{}/{}/data/train".format(bucket, prefix))

In [48]:
# Create sagemaker model object for the preprocessing model
sklearn_inference_code_location = "s3://{}/{}/{}/code".format(bucket, prefix, "sklearn")

sklearn_model = SKLearnModel(
    name="sklearn-model-{0}".format(str(int(time.time()))),
    model_data=preprocessor_model_data,
    entry_point="inference.py",
    source_dir="inference/sklearn/",
    code_location=sklearn_inference_code_location,
    role=role,
    sagemaker_session=session,
    framework_version="0.20.0",
    py_version="py3",
)

In [49]:
# Create sagemaker model object for the prediction model

xgboost_inference_code_location = "s3://{}/{}/{}/code".format(bucket, prefix, "xgb_model")

xgboost_model = XGBoostModel(
    name="xgb-model-{0}".format(str(int(time.time()))),
    model_data=xgboost_model_data,
    entry_point="inference.py",
    source_dir="inference/xgboost/",
    code_location=xgboost_inference_code_location,
    framework_version="0.90-2",
    py_version="py3",
    role=role,
    sagemaker_session=session,
)

In [50]:
# create a sagemaker pipeline model object for the two models above
pipeline_model_name = "credit-risk-inference-pipeline-{0}".format(str(int(time.time())))

pipeline_model = PipelineModel(
    name=pipeline_model_name,
    role=role,
    models=[sklearn_model, xgboost_model],
    sagemaker_session=session,
)
pipeline_model.create(instance_type="ml.m5.xlarge")

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker:Creating model with name: credit-risk-inference-pipeline-1705694102


# We create a sagemaker Clarify processor that will run our bias job

In [51]:
# cell 25
from sagemaker import clarify

clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role, instance_count=1, instance_type="ml.c4.xlarge", sagemaker_session=session
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.0.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


### Detect data bias with Amazon SageMaker Clarify
#### Amazon Science: [How Clarify helps machine learning developers detect unintended bias](https://www.amazon.science/latest-news/how-clarify-helps-machine-learning-developers-detect-unintended-bias)

#### [Clarify Terms for Bias and Fairness](https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-detect-data-bias.html) 

#### [Pre-training bias metrics](https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-measure-data-bias.html)   

#### [Post-training bias metrics](https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-measure-post-training-bias.html)

#### Calculate pre-training and post-training Bias metrics

Note: You can also execute pre-training and post-training bias detection jobs separately

A DataConfig object communicates some basic information about data I/O to Clarify. We specify where to find the input dataset, where to store the output, the target column (label), the header names, and the dataset type.

Similarly, the ModelConfig (created earlier for the explainability job) object communicates information about your trained model and ModelPredictedLabelConfig provides information on the format of your predictions.

## First, we get the metrics from the training job to use as a baseline for comparison with out captured inference data bias job

In [32]:
train_headers=pd.read_csv("dataset/train.csv").columns.to_list()
train_headers

['status',
 'duration',
 'credit_history',
 'purpose',
 'amount',
 'savings',
 'employment_duration',
 'installment_rate',
 'personal_status_sex',
 'other_debtors',
 'present_residence',
 'property',
 'age',
 'other_installment_plans',
 'housing',
 'number_credits',
 'job',
 'people_liable',
 'telephone',
 'foreign_worker',
 'credit_risk']

In [52]:
# cell 51
train_bias_report_prefix = "{}/clarify-bias/train".format( prefix)
train_bias_report_output_path = "s3://{}/{}".format(bucket,train_bias_report_prefix)
bias_data_config = clarify.DataConfig(
    s3_data_input_path=train_dataset, # S3 path of the train dataset
    s3_output_path=train_bias_report_output_path,
    label="credit_risk", # label of the train dataset
    headers=captured_data_headers, # headers of the train dataset
    dataset_type="text/csv",   
)
predictions_config = clarify.ModelPredictedLabelConfig(
    probability_threshold=0.7 # threshold for the train label. Prediction above this threshold will be 1 (positive)
)

SageMaker Clarify also needs the sensitive columns (facets) and the desirable outcomes (facet_values_or_threshold).

We specify this information in the BiasConfig API. Here  age is the facet that we analyze and 40 is the threshold. The group 'personal_status_sex' is used to form subgroups for the measurement of Conditional Demographic Disparity (CDD) metric only.

In [53]:

bias_config = clarify.BiasConfig(
    label_values_or_threshold=[1],  #List of label value(s) or threshold to indicate positive outcome used for bias metrics.
    facet_name="age", # Sensitive attribute column name (or index in the input data) to use when computing bias metrics
    facet_values_or_threshold=[40], #The parameter controls the values of the sensitive group
    group_name="personal_status_sex", #Optional column name or index to indicate a group column to be used for the bias metric Conditional Demographic Disparity in Labels `(CDDL) or Conditional Demographic Disparity in Predicted Labels (CDDPL).
)

**ModelConfig**: to specify information about the trained model here we re-use the model name created earlier. 

In [54]:
# Since we do not have predicted label using our train data, we specify a model config. Sagemaker clarify would use this model to spin up an endpoint and run predictions using our train data
model_config = clarify.ModelConfig(
    model_name=pipeline_model.name,  # specify the inference pipeline model name ( for your use case you can just specify the xgboost model name `xgboost_model.name` since you are not using a pipeline model)
    instance_type="ml.c5.xlarge",
    instance_count=1,
    accept_type="text/csv",
)

In [55]:
clarify_processor.run_bias(
    data_config=bias_data_config,
    bias_config=bias_config,
    model_config=model_config, 
    model_predicted_label_config=predictions_config,
    pre_training_methods="all",
    post_training_methods="all",
)

INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['status', 'duration', 'credit_history', 'purpose', 'amount', 'savings', 'employment_duration', 'installment_rate', 'personal_status_sex', 'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans', 'housing', 'number_credits', 'job', 'people_liable', 'telephone', 'foreign_worker', 'credit_risk'], 'label': 'credit_risk', 'label_values_or_threshold': [1], 'facet': [{'name_or_index': 'age', 'value_or_threshold': [40]}], 'group_variable': 'personal_status_sex', 'methods': {'report': {'name': 'report', 'title': 'Analysis Report'}, 'pre_training_bias': {'methods': 'all'}, 'post_training_bias': {'methods': 'all'}}, 'predictor': {'model_name': 'credit-risk-inference-pipeline-1705694102', 'instance_type': 'ml.c5.xlarge', 'initial_instance_count': 1, 'accept_type': 'text/csv'}, 'probability_threshold': 0.7}
INFO:sagemaker:Creating processing-job with name Clarify-Bias-2024-01-19-19-55-17-038


...................................................[34m2024-01-19 20:03:50,130 logging.conf not found when configuring logging, using default logging configuration.[0m
[34m2024-01-19 20:03:50,130 Starting SageMaker Clarify Processing job[0m
[34m2024-01-19 20:03:50,131 Analysis config path: /opt/ml/processing/input/config/analysis_config.json[0m
[34m2024-01-19 20:03:50,131 Analysis result path: /opt/ml/processing/output[0m
[34m2024-01-19 20:03:50,132 This host is algo-1.[0m
[34m2024-01-19 20:03:50,132 This host is the leader.[0m
[34m2024-01-19 20:03:50,132 Number of hosts in the cluster is 1.[0m
[34m2024-01-19 20:03:50,429 Running Python / Pandas based analyzer.[0m
[34m2024-01-19 20:03:50,429 Dataset type: text/csv uri: /opt/ml/processing/input/data[0m
[34m2024-01-19 20:03:50,435 Loading dataset...[0m
  df = df.append(df_tmp, ignore_index=True)[0m
[34m2024-01-19 20:03:50,447 Loaded dataset. Dataset info:[0m
[34m<class 'pandas.core.frame.DataFrame'>[0m
[34mRange

In [57]:
#Output from the Bias Job on your inference data
!aws s3 ls $train_bias_report_output_path/

2024-01-19 20:08:22       5548 analysis.json
2024-01-19 19:55:18        873 analysis_config.json
2024-01-19 20:08:22     774760 report.html
2024-01-19 20:08:22     503844 report.ipynb
2024-01-19 20:08:22     475454 report.pdf


In [60]:
# Download all the output files from s3 to local
!aws s3 cp --recursive $train_bias_report_output_path "./train_data_bias_report"

download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/analysis_config.json to train_data_bias_report/analysis_config.json
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.ipynb to train_data_bias_report/report.ipynb
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.html to train_data_bias_report/report.html
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/analysis.json to train_data_bias_report/analysis.json
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.pdf to train_data_bias_report/report.pdf


In [61]:
# View the bias report pdf that contains the pre-training bias and post-training bias metrics for the inference data. 
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="./train_data_bias_report/report.html">Bias Report</a></b>'))

  from IPython.core.display import display, HTML


In [62]:
# Load the analysis file with the computed bias meterics on inference data and print some of those metrics
import json
with open("train_data_bias_report/analysis.json") as json_file:
    data = json.load(json_file)
    print("pre-training bias metrics")
    class_imbalance = data["pre_training_bias_metrics"]["facets"]["age"][0]["metrics"][1]["value"]
    print("class imbalance: ", class_imbalance)
    DPL = data["pre_training_bias_metrics"]["facets"]["age"][0]["metrics"][2]["value"]
    print("DPL: ", DPL)
    print("\n")
    print("post training bias metrics")
    DPPL = data["post_training_bias_metrics"]["facets"]["age"][0]["metrics"][6]["value"]
    print("DPPL: ", DPPL)
    DI = data["post_training_bias_metrics"]["facets"]["age"][0]["metrics"][5]["value"]
    print("DI: ", DI)

pre-training bias metrics
class imbalance:  0.456
DPL:  -0.04848093083387206


post training bias metrics
DPPL:  -0.08403361344537819
DI:  1.13840830449827


## BIAS FOR CAPTURED INFERENCE DATA

In [63]:
captured_data_headers=pd.read_csv("dataset/captured_data.csv").columns.to_list()
captured_data_headers

['status',
 'duration',
 'credit_history',
 'purpose',
 'amount',
 'savings',
 'employment_duration',
 'installment_rate',
 'personal_status_sex',
 'other_debtors',
 'present_residence',
 'property',
 'age',
 'other_installment_plans',
 'housing',
 'number_credits',
 'job',
 'people_liable',
 'telephone',
 'foreign_worker',
 'credit_risk']

In [64]:
# cell 51
bias_report_prefix = "{}/clarify-bias".format( prefix)
bias_report_output_path = "s3://{}/{}".format(bucket,bias_report_prefix)
bias_data_config = clarify.DataConfig(
    s3_data_input_path=captured_inference_features_and_groundtruth, # S3 path of the captured inference dataset
    s3_output_path=bias_report_output_path,
    label="credit_risk", # label of the captured inference dataset
    headers=captured_data_headers, # headers of the captured inference dataset
    dataset_type="text/csv",
    predicted_label_dataset_uri=captured_predicted_label, # s3 path of the captured predicted dataset 
    predicted_label_headers=["credit_risk"], # headers of the captured predicted dataset
    predicted_label="credit_risk", # label of the captured predicted dataset
)
predictions_config = clarify.ModelPredictedLabelConfig(
    probability_threshold=0.7 # threshold for the predicted label. Prediction above this threshold will be 1 (positive)
)

In [65]:

bias_config = clarify.BiasConfig(
    label_values_or_threshold=[1],  #List of label value(s) or threshold to indicate positive outcome used for bias metrics.
    facet_name="age", # Sensitive attribute column name (or index in the input data) to use when computing bias metrics
    facet_values_or_threshold=[40], #The parameter controls the values of the sensitive group
    group_name="personal_status_sex", #Optional column name or index to indicate a group column to be used for the bias metric Conditional Demographic Disparity in Labels `(CDDL) or Conditional Demographic Disparity in Predicted Labels (CDDPL).
)

In [None]:
clarify_processor.run_bias(
    data_config=bias_data_config,
    bias_config=bias_config,
    # model_config=model_config, #since we already have our captured predicted inference label, no need for model_config
    model_predicted_label_config=predictions_config,
    pre_training_methods="all",
    post_training_methods="all",
)

INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['status', 'duration', 'credit_history', 'purpose', 'amount', 'savings', 'employment_duration', 'installment_rate', 'personal_status_sex', 'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans', 'housing', 'number_credits', 'job', 'people_liable', 'telephone', 'foreign_worker', 'credit_risk'], 'label': 'credit_risk', 'predicted_label_dataset_uri': 's3://sagemaker-us-east-1-259508681668/hybrid_approach/data/test/prediction.csv', 'predicted_label_headers': ['credit_risk'], 'predicted_label': 'credit_risk', 'label_values_or_threshold': [1], 'facet': [{'name_or_index': 'age', 'value_or_threshold': [40]}], 'group_variable': 'personal_status_sex', 'methods': {'report': {'name': 'report', 'title': 'Analysis Report'}, 'pre_training_bias': {'methods': 'all'}, 'post_training_bias': {'methods': 'all'}}, 'probability_threshold': 0.7}
INFO:sagemaker:Creating processing-job with name Clarify-B

.......................

In [25]:
#Output from the Bias Job on your inference data
!aws s3 ls $bias_report_output_path/

2024-01-19 18:00:23       5548 analysis.json
2024-01-19 17:51:43       1065 analysis_config.json
2024-01-19 18:00:23     774174 report.html
2024-01-19 18:00:23     503315 report.ipynb
2024-01-19 18:00:23     475915 report.pdf


In [27]:
# Download all the output files from s3 to local
!aws s3 cp --recursive $bias_report_output_path "./inference_data_bias_report"

download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/analysis.json to inference_data_bias_report/analysis.json
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/analysis_config.json to inference_data_bias_report/analysis_config.json
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.pdf to inference_data_bias_report/report.pdf
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.ipynb to inference_data_bias_report/report.ipynb
download: s3://sagemaker-us-east-1-259508681668/hybrid_approach/clarify-bias/report.html to inference_data_bias_report/report.html


In [28]:
# View the bias report pdf that contains the pre-training bias and post-training bias metrics for the inference data. 
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="./inference_data_bias_report/report.html">Bias Report</a></b>'))

  from IPython.core.display import display, HTML


In [29]:
# Load the analysis file with the computed bias meterics on inference data and print some of those metrics
import json
with open("inference_data_bias_report/analysis.json") as json_file:
    data = json.load(json_file)
    print("pre-training bias metrics")
    class_imbalance = data["pre_training_bias_metrics"]["facets"]["age"][0]["metrics"][1]["value"]
    print("class imbalance: ", class_imbalance)
    DPL = data["pre_training_bias_metrics"]["facets"]["age"][0]["metrics"][2]["value"]
    print("DPL: ", DPL)
    print("\n")
    print("post training bias metrics")
    DPPL = data["post_training_bias_metrics"]["facets"]["age"][0]["metrics"][6]["value"]
    print("DPPL: ", DPPL)
    DI = data["post_training_bias_metrics"]["facets"]["age"][0]["metrics"][5]["value"]
    print("DI: ", DI)

pre-training bias metrics
class imbalance:  0.456
DPL:  -0.04848093083387206


post training bias metrics
DPPL:  -0.08403361344537819
DI:  1.13840830449827


## Compare the BIAS Metric of Cpature inference data to the baseline train data metrics using a 10% threshold

In [None]:
import pandas as pd

def compare_metrics(dictA, dictB, threshold=0.1):
    results = []
    
    metric_names = set()
    
    for training_type in ['pre_training_bias_metrics', 'post_training_bias_metrics']:
        for metric_type in ['facets']:
            for facet in dictA[training_type][metric_type]['age']:
                metric_names.update(metric['name'] for metric in facet['metrics'])
    
    metric_names = sorted(metric_names)

    for training_type in ['pre_training_bias_metrics', 'post_training_bias_metrics']:
        for metric_type in ['facets']:
            for facet in dictA[training_type][metric_type]['age']:
                for metric_name in metric_names:
                    value_A = next((metric['value'] for metric in facet['metrics'] if metric['name'] == metric_name), None)
                    value_B = None
                    error_B = None

                    # Find the corresponding metric in dictB
                    for facet_B in dictB[training_type][metric_type]['age']:
                        if facet_B['value_or_threshold'] == facet['value_or_threshold']:
                            for metric_B in facet_B['metrics']:
                                if metric_B['name'] == metric_name:
                                    value_B = metric_B.get('value')
                                    error_B = metric_B.get('error')
                                    break

                    # Compare values if both values are not None
                    if value_A is not None and value_B is not None and error_B is None:
                        assessment = 'safe' if abs(value_A - value_B) <= threshold * abs(value_A) else 'alarm'
                        results.append([metric_name, value_A, value_B, threshold, assessment])

    if results:
        df_results = pd.DataFrame(results, columns=["Metrics", "Baseline", "Compared", "threshold", "Assessment"])
        return df_results
    else:
        print("No metrics found for comparison.")
        return None

# Call the function with your dictionaries
comparison_df = compare_metrics(data, data)
comparison_df