# Run Ad-Hoc Model Bias Analysis

## Run Bias Analysis In The Notebook using `smclarify`
https://github.com/aws/amazon-sagemaker-clarify

In [None]:
!pip install -q smclarify==0.1

In [None]:
from smclarify.bias.report import *
from smclarify.util.dataset import Datasets, german_lending_readable_values
from typing import Dict
from collections import defaultdict
import pandas as pd

In [None]:
df_pre_training = pd.read_csv('./data-clarify/amazon_reviews_us_giftcards_software_videogames_balanced.csv')
df_pre_training.shape

In [None]:
df_pre_training.head()

# Pre-Training Bias Analysis

In [None]:
facet_column = FacetColumn('product_category')
label_column = LabelColumn('star_rating', df_pre_training['star_rating'], [5, 4])
group_variable = df_pre_training['product_category']

In [None]:
pre_training_report = bias_report(
    df_pre_training, 
    facet_column, 
    label_column, 
    stage_type=StageType.PRE_TRAINING, 
    group_variable=group_variable
)

In [None]:
pre_training_report

# Post-Training Bias Analysis

## _TODO: Implement Batch Prediction_

In [None]:
data = {
    'star_rating':  [1, 2, 3, 4, 5],
    'review_body': ['Worst ever', 'Expected more', 'Its ok', 'I like it', 'I love it'],
    'product_category': ['Gift Card', 'Gift Card', 'Gift Card', 'Digital_Software', 'Digital_Software'],
    'star_rating_predicted': [1, 2, 3, 4, 5]
}

In [None]:
df = pd.DataFrame(data, columns = ['star_rating','review_body', 'product_category','star_rating_predicted'])
print (df)

# Convert data columns into `categorical` data type required for Clarify

In [None]:
df['star_rating'] = df['star_rating'].astype('category')

In [None]:
df['star_rating_predicted'] = df['star_rating_predicted'].astype('category')

In [None]:
df['product_category'] = df['product_category'].astype('category')

# Configure Clarify

In [None]:
facet_column = FacetColumn(
    name='product_category', 
    sensitive_values=['Gift Card']
)

In [None]:
label_column = LabelColumn(
    name='star_rating', 
    data=df['star_rating'], 
    positive_label_values=[5,4])

In [None]:
predicted_label_column = LabelColumn(
    name='star_rating_predicted', 
    data=df['star_rating_predicted'], 
    positive_label_values=[5,4])

In [None]:
group_variable = df['product_category']

In [None]:
post_training_report = bias_report(
    df, 
    facet_column=facet_column, 
    label_column=label_column, 
    stage_type=StageType.POST_TRAINING, 
    predicted_label_column=predicted_label_column,
    metrics=['DPPL', 'DI', 'DCA', 'DCR', 'RD', 'DAR', 'DRR', 'AD', 'CDDPL', 'TE'],
    group_variable=group_variable
)

# Show Post-Training Bias Report

In [None]:
from pprint import pprint
pprint(post_training_report)

# Release Resources

In [None]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}