### Company X Biased Predictor Analysis

#### Imports

In [412]:
#Standard
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#AIF360 Dataset Classes 
from aif360.datasets import StandardDataset
from aif360.datasets import BinaryLabelDataset

#AIF360 Metrics Classes  
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
from aif360.metrics import ClassificationMetric

#AIF360 Algorithms 
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.inprocessing import AdversarialDebiasing
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
# AI360 Explainers
from aif360.explainers import MetricTextExplainer

#Sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split

#Tensorflow -- install version before 2.0
import tensorflow as tf 

#For Markdown
from IPython.display import Markdown, display

#Helper Functions
from src.classifier_functions import *

#### Load Data


In [335]:
data = pd.read_csv('../company_x.csv', index_col='employee_id')

In [336]:
data.head()


Unnamed: 0_level_0,signing_bonus,salary,degree_level,sex,yrs_experience,dept,is_manager,direct_reports,boss_id,total_reports,company_level
employee_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
138719,0,273000.0,Master,M,2,engineering,False,0.0,43602,0,0
3192,0,301000.0,Bachelor,F,1,sales,False,0.0,87847,0,0
114657,0,261000.0,Master,F,2,sales,False,0.0,180854,0,0
29039,0,86000.0,High_School,F,4,HR,False,0.0,88370,0,0
118607,0,126000.0,Bachelor,F,3,sales,False,0.0,23565,0,0


In [337]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10000 entries, 138719 to 72227
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   signing_bonus   10000 non-null  int64  
 1   salary          10000 non-null  float64
 2   degree_level    10000 non-null  object 
 3   sex             10000 non-null  object 
 4   yrs_experience  10000 non-null  int64  
 5   dept            10000 non-null  object 
 6   is_manager      10000 non-null  bool   
 7   direct_reports  10000 non-null  float64
 8   boss_id         10000 non-null  int64  
 9   total_reports   10000 non-null  int64  
 10  company_level   10000 non-null  int64  
dtypes: bool(1), float64(2), int64(5), object(3)
memory usage: 869.1+ KB


In [338]:
## Create Label 
data_with_label = data.copy()
data_with_label['salary'] = data_with_label['salary'].transform(lambda x: x > 200000).astype(int)
data_with_label['sex'] = data_with_label['sex'].transform(lambda x: x == 'M').astype(int)
data_with_label.head()

Unnamed: 0_level_0,signing_bonus,salary,degree_level,sex,yrs_experience,dept,is_manager,direct_reports,boss_id,total_reports,company_level
employee_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
138719,0,1,Master,1,2,engineering,False,0.0,43602,0,0
3192,0,1,Bachelor,0,1,sales,False,0.0,87847,0,0
114657,0,1,Master,0,2,sales,False,0.0,180854,0,0
29039,0,0,High_School,0,4,HR,False,0.0,88370,0,0
118607,0,0,Bachelor,0,3,sales,False,0.0,23565,0,0


In [339]:
### Create StandardDataset

std_data = StandardDataset(df=data_with_label,   
                         label_name='salary',
                         favorable_classes =[1],
                        protected_attribute_names=['sex'], 
                         privileged_classes=[[1]],
                        categorical_features=['degree_level', 'dept'], 
                          features_to_drop=['boss_id', 'is_manager'])



In [340]:
df_data = std_data.convert_to_dataframe()
binary_data = BinaryLabelDataset(favorable_label=1, unfavorable_label=0, df=df_data[0], label_names=['salary'],
              protected_attribute_names=['sex'])

privileged_groups= [{'sex':1}]
unprivileged_groups= [{'sex': 0}]

In [341]:
## Splitting the Data
data_train, data_vt = binary_data.split([0.7], shuffle=True)

data_val, data_test = data_vt.split([0.5], shuffle=True)

In [342]:
### First Metrics
metric_train = BinaryLabelDatasetMetric(data_train, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)
#Explainer 
ex_metric_train = MetricTextExplainer(metric_train)

print(ex_metric_train.mean_difference())
print('\n')
print(ex_metric_train.disparate_impact())

Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.1415906427075146


Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.7114938286109229


### Random Forest Classifier 

In [343]:
#Creating X,y
X_train = data_train.features
y_train = data_train.labels.ravel()
X_test = data_val.features
y_test = data_val.labels.ravel()

In [431]:
# Creating Random Forest Classifier
rfc = RandomForestClassifier()
#rf_random = RandomizedSearchCV(estimator = rfc, 
                               param_distributions = random_grid, 
                               n_iter = 100, cv = 5, verbose=5, 
                               random_state=42, n_jobs = -1)

In [432]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 5)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(3,10,7)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}
print(random_grid)

{'n_estimators': [200, 650, 1100, 1550, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [3, 4, 5, 6, 7, 8, 10, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}


In [433]:
# Fit the random search modelrfc.fit(X_train, y_train)
#rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   53.5s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed: 19.4min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed: 25.4min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed: 27.6min finished


RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators='warn',
                                                    n_jobs=None

In [438]:
rf_random.best_params_

{'n_estimators': 650,
 'min_samples_split': 10,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': 5}

In [441]:
rfc = RandomForestClassifier(n_estimators=600, 
                             min_samples_split=10,
                             min_samples_leaf=1, 
                             max_features='sqrt',
                            max_depth =5)

In [442]:
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=5, max_features='sqrt', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=600,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [443]:

#Creating Predicted Dataframe
y_pred = rfc.predict(X_test)
data_val_pred = data_val.copy()
data_val_pred.labels = y_pred.ravel()

In [444]:
#Prep dataset to test out Aequitas 
# df = data_val.convert_to_dataframe()[0]
# df.head()
# aeq_df = pd.DataFrame()
# aeq_df['score'] = y_pred
# aeq_df['label_value'] = df['salary'].values
# aeq_df['sex'] = df['sex'].values
# aeq_df['sex'] = aeq_df['sex'].transform(lambda x: 'Male' if x == 1 else 'Female') 
# aeq_df.to_csv('../aeq.csv', index=False)
# aeq_df.head()

In [445]:
rfc.score(X_test, y_test)

0.6706666666666666

In [446]:
rfc.predict_proba(X_test)[:10]

array([[0.57417926, 0.42582074],
       [0.89741028, 0.10258972],
       [0.53269292, 0.46730708],
       [0.36710488, 0.63289512],
       [0.36780655, 0.63219345],
       [0.41904331, 0.58095669],
       [0.57663863, 0.42336137],
       [0.53109794, 0.46890206],
       [0.57417926, 0.42582074],
       [0.53128872, 0.46871128]])

In [447]:
#Getting the Metrics 
metric_preds = BinaryLabelDatasetMetric(data_val_pred, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)

ex_metric_preds = MetricTextExplainer(metric_preds)

print(ex_metric_preds.mean_difference())
print('\n')
print(ex_metric_preds.disparate_impact())


model_metric = ClassificationMetric(data_val, data_val_pred, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)

ex_model_metric= MetricTextExplainer(model_metric)
print('\n')
print(ex_model_metric.recall())
print('\n')
print(ex_model_metric.precision())
print('\n')
print(ex_model_metric.average_odds_difference())
print('\n')
print(ex_model_metric.equal_opportunity_difference())
print('\n')
print(ex_model_metric.theil_index())

Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.11742472165991902


Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.6547154017857143


True positive rate (TPR, recall, sensitivity = TP / (TP + FN)): 0.46540880503144655


Positive predictive value (PPV, precision = TP / (TP + FP)): 0.6577777777777778


Average odds difference (average of TPR difference and FPR difference, 0 = equality of odds): -0.10906119633597297


True positive rate difference (true positive rate on unprivileged instances - true positive rate on privileged instances): -0.10986115333941421


Theil index (generalized entropy index with alpha = 1): 0.29486204315416514


In [448]:
scorecard(y_test, y_pred)

The Accuracy score is 0.671.

The Precision score is 0.658.

The Recall score is 0.465.

      Confusion Matrix


Unnamed: 0,Actual True,Actual False
Predicted True,296,154
Predicted False,340,710


In [207]:
data_val.labels.shape

(1500, 1)

In [208]:
data_val_pred.labels.shape

(1500,)

## Debiasing through Preprocessing 

#### Reweighing

In [351]:
RW = Reweighing(unprivileged_groups=unprivileged_groups, 
               privileged_groups=privileged_groups)

#Splitting the Data
data_rw_train = RW.fit_transform(data_train)

data_rw_val = RW.fit_transform(data_val)

In [406]:
### Reweigh Metrics
metric_rw_train = BinaryLabelDatasetMetric(data_rw_train, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)
#Explainer 
ex_metric_rw_train = MetricTextExplainer(metric_rw_train)

print(ex_metric_rw_train.mean_difference())
print('\n')
print(ex_metric_rw_train.disparate_impact())

Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -5.551115123125783e-17


Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.9999999999999999


### Random Forest Classifier with Reweighing 

In [407]:
#Creating X,y
X_rw_train = data_rw_train.features
y_rw_train = data_rw_train.labels.ravel()
X_rw_test = data_rw_val.features
y_rw_test = data_rw_val.labels.ravel()

In [408]:
# Creating Random Forest Classifier
rfc_rw = RandomForestClassifier(n_estimators=1000, max_depth=5)
rfc_rw.fit(X_rw_train, y_rw_train, sample_weight=data_rw_train.instance_weights)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=5, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [409]:
#Creating Predicted Dataframe
y_rw_pred = rfc_rw.predict(X_test)
data_val_rw_pred = data_rw_val.copy()
data_val_rw_pred.labels = y_rw_pred

In [410]:
#Getting the Metrics 
display(Markdown("#### Model - with reweighing - dataset metrics"))


metric_rw_preds = BinaryLabelDatasetMetric(data_val_rw_pred, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)

ex_metric_rw_preds = MetricTextExplainer(metric_rw_preds)

print(ex_metric_rw_preds.mean_difference())
print('\n')
print(ex_metric_rw_preds.disparate_impact())

display(Markdown("#### Model - with reweighing - classification metrics"))
model_rw_metric = ClassificationMetric(data_rw_val, data_val_rw_pred, 
                            unprivileged_groups=unprivileged_groups, 
                            privileged_groups=privileged_groups)

ex_model_rw_metric= MetricTextExplainer(model_rw_metric)

print(ex_model_rw_metric.recall())
print(ex_model_rw_metric.precision())
print(ex_model_rw_metric.average_odds_difference())
print(ex_model_rw_metric.equal_opportunity_difference())
print(ex_model_rw_metric.theil_index())
print(ex_model_rw_metric.accuracy(privileged=True))
print(ex_model_rw_metric.accuracy(privileged=False))

#### Model - with reweighing - dataset metrics

Mean difference (mean label value on privileged instances - mean label value on unprivileged instances): -0.07808464862761144


Disparate impact (probability of favorable outcome for unprivileged instances / probability of favorable outcome for privileged instances): 0.7649101450933115


#### Model - with reweighing - classification metrics

True positive rate (TPR, recall, sensitivity = TP / (TP + FN)): 0.47225179329527156
Positive predictive value (PPV, precision = TP / (TP + FP)): 0.6554432149997058
Average odds difference (average of TPR difference and FPR difference, 0 = equality of odds): -0.07625539126475658
True positive rate difference (true positive rate on unprivileged instances - true positive rate on privileged instances): -0.0642208033512382
Theil index (generalized entropy index with alpha = 1): 0.290954929988672
Classification accuracy on privileged instances: 0.6629101750962215
Classification accuracy on unprivileged instances: 0.6865355824819831


In [411]:
scorecard(y_rw_test, y_rw_pred)

The Accuracy score is 0.672.

The Precision score is 0.657.

The Recall score is 0.473.

      Confusion Matrix


Unnamed: 0,Actual True,Actual False
Predicted True,301,157
Predicted False,335,707


### Inprocessing with Adversarial Debiaising



In [360]:
sess.close()
tf.reset_default_graph()
sess = tf.Session()

In [361]:
##Creating Tensorflow Session - must not use Tensorflow 2.0 
sess = tf.Session()
db_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='debiased_classifier',
                            num_epochs  = 100,
                            batch_size = 100,
                            adversary_loss_weight = .1,     
                            debias=True,
                          sess=sess)

In [362]:
db_model.fit(data_train) 

epoch 0; iter: 0; batch classifier loss: 0.751847; batch adversarial loss: 0.680444
epoch 1; iter: 0; batch classifier loss: 0.647049; batch adversarial loss: 0.659144
epoch 2; iter: 0; batch classifier loss: 0.622300; batch adversarial loss: 0.672310
epoch 3; iter: 0; batch classifier loss: 0.569649; batch adversarial loss: 0.667673
epoch 4; iter: 0; batch classifier loss: 0.608396; batch adversarial loss: 0.652698
epoch 5; iter: 0; batch classifier loss: 0.844618; batch adversarial loss: 0.655446
epoch 6; iter: 0; batch classifier loss: 0.464263; batch adversarial loss: 0.658076
epoch 7; iter: 0; batch classifier loss: 0.550507; batch adversarial loss: 0.689431
epoch 8; iter: 0; batch classifier loss: 0.575595; batch adversarial loss: 0.656772
epoch 9; iter: 0; batch classifier loss: 0.577544; batch adversarial loss: 0.620880
epoch 10; iter: 0; batch classifier loss: 0.574099; batch adversarial loss: 0.624721
epoch 11; iter: 0; batch classifier loss: 0.596727; batch adversarial loss:

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x1503aa780>

In [363]:
dataset_debiasing_train = db_model.predict(data_train)
dataset_debiasing_test = db_model.predict(data_val)

In [364]:

# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())

metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())



display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(data_val, 
                                                 dataset_debiasing_test,
                                                    unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())
                                                        
                                                        

#### Model - with debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = 0.139711
Test set: Difference in mean outcomes between unprivileged and privileged groups = 0.163841


#### Model - with debiasing - classification metrics

Test set: Classification accuracy = 0.670000
Test set: Balanced classification accuracy = 0.662092
Test set: Disparate impact = 1.445937
Test set: Equal opportunity difference = 0.277288
Test set: Average odds difference = 0.188830
Test set: Theil_index = 0.229096


In [370]:
scorecard(data_val.labels, dataset_debiasing_test.labels)

The Accuracy score is 0.670.

The Precision score is 0.611.

The Recall score is 0.610.

      Confusion Matrix


Unnamed: 0,Actual True,Actual False
Predicted True,388,247
Predicted False,248,617


### Postprocessing with Equalized Odds Postprocessing

In [459]:

# Learn parameters to equalize odds and apply to create a new dataset
cpp = CalibratedEqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     cost_constraint='fpr')
cpp = cpp.fit_predict(data_val, data_val_pred)

In [460]:
cpp_metric = ClassificationMetric(data_val, cpp,
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)

In [461]:
print(cpp_metric.generalized_false_positive_rate())

0.0


In [462]:
print(cpp_metric.generalized_false_negative_rate())

0.0


In [458]:
scorecard(data_val.labels, cpp.labels)

The Accuracy score is 1.000.

The Precision score is 1.000.

The Recall score is 1.000.

      Confusion Matrix


Unnamed: 0,Actual True,Actual False
Predicted True,636,0
Predicted False,0,864
