<a href="https://colab.research.google.com/github/ckaarle/class/blob/training/analysis/AIF360.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf
import pandas as pd
import io

from google.colab import files

# Load model

In [5]:
model_uploaded = files.upload()

Saving credit_model.zip to credit_model.zip


In [6]:
!unzip credit_model.zip

Archive:  credit_model.zip
   creating: content/credit_model/
  inflating: content/credit_model/saved_model.pb  
  inflating: content/credit_model/keras_metadata.pb  
   creating: content/credit_model/variables/
  inflating: content/credit_model/variables/variables.data-00000-of-00001  
  inflating: content/credit_model/variables/variables.index  
   creating: content/credit_model/assets/


In [9]:
model = tf.keras.models.load_model('./content/credit_model')

# Load data set

In [10]:
data_uploaded = files.upload()

Saving test.csv to test.csv
Saving train.csv to train.csv


In [12]:
train = pd.read_csv(io.BytesIO(data_uploaded['train.csv']))
test = pd.read_csv(io.BytesIO(data_uploaded['test.csv']))

# Configure AIF360

In [13]:
pip install 'aif360[all]'

Collecting aif360[all]
  Downloading aif360-0.4.0-py3-none-any.whl (175 kB)
[K     |████████████████████████████████| 175 kB 7.3 MB/s 
Collecting tempeh
  Downloading tempeh-0.1.12-py3-none-any.whl (39 kB)
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[K     |████████████████████████████████| 275 kB 53.9 MB/s 
Collecting fairlearn==0.4.6
  Downloading fairlearn-0.4.6-py3-none-any.whl (21.2 MB)
[K     |████████████████████████████████| 21.2 MB 1.5 MB/s 
[?25hCollecting BlackBoxAuditing
  Downloading BlackBoxAuditing-0.1.54.tar.gz (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 85.7 MB/s 
[?25hCollecting sphinx-rtd-theme
  Downloading sphinx_rtd_theme-1.0.0-py2.py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 63.2 MB/s 
Collecting adversarial-robustness-toolbox>=1.0.0
  Downloading adversarial_robustness_toolbox-1.8.1-py3-none-any.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 65.1 MB/s 
Collecting numba>=0.53.1
  Do

In [44]:
from aif360.datasets import StandardDataset

from aif360.metrics import BinaryLabelDatasetMetric

from aif360.algorithms.preprocessing import Reweighing, DisparateImpactRemover

# Preprocessing

In [30]:
data_orig = StandardDataset(
    df=train,
    label_name='Risk',
    favorable_classes=[0],
    protected_attribute_names=['Sex_male', 'Age_18-30'],
    privileged_classes=[[0], [1]],
)

In [31]:
privileged_groups = [{'Sex_male': 0, 'Age_18-30': 1}]  # did we not just already specify this
unprivileged_groups = [{'Sex_male': 1, 'Age_18-30': 0}]

In [32]:
metric_train_orig = BinaryLabelDatasetMetric(
    data_orig,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
)

In [33]:
print("Difference in mean outcomes between unprivileged and privileged groups ", metric_train_orig.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups  -0.5833333333333334


In [34]:
print('Similarity of labels for similar instances ', metric_train_orig.consistency())

Similarity of labels for similar instances  [0.80225]


In [35]:
print('Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8)', metric_train_orig.disparate_impact())

Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8) 0.0


In [38]:
rw = Reweighing(
    unprivileged_groups=unprivileged_groups,  # and again
    privileged_groups=privileged_groups,
)

In [53]:
data_orig_trans_rw = rw.fit_transform(data_orig)

divide by zero encountered in double_scalars


In [54]:
metric_train_rw = BinaryLabelDatasetMetric(
    data_orig_trans_rw,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
)

In [55]:
print("Difference in mean outcomes between unprivileged and privileged groups ", metric_train_rw.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups  -0.36875


In [56]:
print('Similarity of labels for similar instances ', metric_train_rw.consistency())

Similarity of labels for similar instances  [0.80225]


In [57]:
print('Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8)', metric_train_rw.disparate_impact())

Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8) 0.0


In [58]:
dir = DisparateImpactRemover(sensitive_attribute='Sex_male')  # why only one attribute now

In [59]:
data_orig_trans_dir = dir.fit_transform(data_orig)

In [60]:
metric_train_dir = BinaryLabelDatasetMetric(
    data_orig_trans_dir,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
)

In [61]:
print("Difference in mean outcomes between unprivileged and privileged groups ", metric_train_dir.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups  -0.5833333333333334


In [62]:
print('Similarity of labels for similar instances ', metric_train_dir.consistency())

Similarity of labels for similar instances  [0.784]


In [63]:
print('Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8)', metric_train_dir.disparate_impact())

Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8) 0.0


In [63]:
# what actually changed in the data sets?

In [64]:
data_orig_trans_rw

               instance weights  features  ...           labels
                                           ...                 
                                      Job  ... Age_61-80       
instance names                             ...                 
0                       1.51500  0.129128  ...       0.0    1.0
1                       0.63125  0.129128  ...       0.0    1.0
2                       0.63125  0.129128  ...       0.0    1.0
3                       1.00000  0.129128  ...       0.0    0.0
4                       1.00000 -1.436061  ...       0.0    0.0
...                         ...       ...  ...       ...    ...
795                     0.63125  0.129128  ...       0.0    1.0
796                     1.51500  0.129128  ...       0.0    1.0
797                     0.63125  0.129128  ...       0.0    1.0
798                     0.63125 -1.436061  ...       0.0    1.0
799                     0.63125  1.694317  ...       0.0    1.0

[800 rows x 30 columns]

In [65]:
data_orig_trans_dir

               instance weights  features  ...           labels
                                           ...                 
                                      Job  ... Age_61-80       
instance names                             ...                 
0                           1.0  0.129128  ...       0.0    1.0
1                           1.0  0.129128  ...       0.0    1.0
2                           1.0  0.129128  ...       0.0    1.0
3                           1.0  0.129128  ...       0.0    0.0
4                           1.0 -1.436061  ...       0.0    0.0
...                         ...       ...  ...       ...    ...
795                         1.0  0.129128  ...       0.0    1.0
796                         1.0  0.129128  ...       0.0    1.0
797                         1.0  0.129128  ...       0.0    1.0
798                         1.0 -1.436061  ...       0.0    1.0
799                         1.0  1.694317  ...       0.0    1.0

[800 rows x 30 columns]

# Postprocessing

In [99]:
from aif360.metrics import ClassificationMetric

from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing

In [66]:
X_test = test.drop(columns='Risk')
y_test = test['Risk']

In [70]:
pred = model.predict(X_test)

In [81]:
pred = (pred >= 0.5).astype(int)

In [83]:
test_pred = test.drop(columns='Risk')

In [84]:
test_pred['Risk'] = pred

In [88]:
data_post_orig = StandardDataset(
    df=test,
    label_name='Risk',
    favorable_classes=[0],
    protected_attribute_names=['Sex_male', 'Age_18-30'],
    privileged_classes=[[0], [1]],
)

In [89]:
data_post_orig_pred = StandardDataset(
    df=test_pred,
    label_name='Risk',
    favorable_classes=[0],
    protected_attribute_names=['Sex_male', 'Age_18-30'],
    privileged_classes=[[0], [1]],
)

In [90]:
class_metric = ClassificationMetric(
    dataset=data_post_orig,
    classified_dataset=data_post_orig_pred,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
)

In [91]:
print('Accuracy ', class_metric.accuracy())  # sanity check

Accuracy  0.87


In [92]:
print('Similarity of labels for similar instances ', class_metric.consistency())

Similarity of labels for similar instances  [0.794]


In [93]:
print('Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8)', class_metric.disparate_impact())

Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8) 0.0


In [94]:
print('Difference in error rate for privileged and unprivileged groups ', class_metric.error_rate_difference())

Difference in error rate for privileged and unprivileged groups  -0.42307692307692313


invalid value encountered in double_scalars
invalid value encountered in double_scalars


In [95]:
print('Difference in FPR and TPR for privileged and unprivileged groups ', class_metric.average_abs_odds_difference())

Difference in FPR and TPR for privileged and unprivileged groups  nan


In [98]:
print("Difference in mean outcomes between unprivileged and privileged groups ", class_metric.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups  -0.34615384615384615


In [107]:
eqp = CalibratedEqOddsPostprocessing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

In [100]:
X_train = train.drop(columns='Risk')
y_train = train['Risk']

In [101]:
pred = model.predict(X_train)

In [102]:
pred = (pred >= 0.5).astype(int)

In [103]:
train_pred = train.drop(columns='Risk')

In [104]:
train_pred['Risk'] = pred

In [105]:
data_orig_pred = StandardDataset(
    df=train_pred,
    label_name='Risk',
    favorable_classes=[0],
    protected_attribute_names=['Sex_male', 'Age_18-30'],
    privileged_classes=[[0], [1]],
)

In [108]:
eqp.fit(data_orig, data_orig_pred)

invalid value encountered in double_scalars
invalid value encountered in double_scalars


<aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing.CalibratedEqOddsPostprocessing at 0x7f490145cc90>

In [110]:
data_post_trans_pred = eqp.predict(data_post_orig_pred)

In [111]:
class_metric = ClassificationMetric(
    dataset=data_post_orig,
    classified_dataset=data_post_trans_pred,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups,
)

In [112]:
print('Accuracy ', class_metric.accuracy())  # sanity check

Accuracy  0.715


In [113]:
print('Similarity of labels for similar instances ', class_metric.consistency())

Similarity of labels for similar instances  [0.794]


In [114]:
print('Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8)', class_metric.disparate_impact())

Disparate impact (ratio of unprivileged with negative outcome compared to privileged with negative outcome, should often be < 0.8) 0.0


In [115]:
print('Difference in error rate for privileged and unprivileged groups ', class_metric.error_rate_difference())

Difference in error rate for privileged and unprivileged groups  -0.42307692307692313


invalid value encountered in double_scalars
invalid value encountered in double_scalars


In [116]:
print('Difference in FPR and TPR for privileged and unprivileged groups ', class_metric.average_abs_odds_difference())

Difference in FPR and TPR for privileged and unprivileged groups  nan


In [117]:
print("Difference in mean outcomes between unprivileged and privileged groups ", class_metric.mean_difference())

Difference in mean outcomes between unprivileged and privileged groups  -0.34615384615384615
