# Imbalanced Learning with SMOTE Variants (K-means, Borderline etc.)

In [1]:
'''
If you wish to download these packages into the same virtual environment as aif360, you can use the following commands in 
Jupyter notebook:
!pip install -U imbalanced-learn
!pip install smote_variants
'''

!pip install --upgrade kmodes

Collecting kmodes
  Downloading kmodes-0.11.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: kmodes
Successfully installed kmodes-0.11.0


In [2]:
!pip freeze > requirements.txt

In [2]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose
'''because of the version conflicts of sklearn and imblearn, I had to import six separately (because sklearn depreciated utils.six
in the version 0.23 and I am using 0.24). Imblearn gives a lot of conflicts with sklearn if you install any version lower than 0.8'''

import numpy as np
import pandas as pd
from imblearn.datasets import fetch_datasets
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, KMeansSMOTE, SVMSMOTE
from imblearn.pipeline import make_pipeline
from imblearn.over_sampling import RandomOverSampler
from imblearn import FunctionSampler  # to use a idendity sampler

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import StructuredDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
#from aif360.metrics.utils import compute_boolean_conditioning_vector

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import Markdown, display

sns.set_context("poster")

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
    import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas

#### Example imbalanced dataset label count

In [3]:
datasets = fetch_datasets(filter_data=['oil'])
X, y = datasets['oil']['data'], datasets['oil']['target']

[print('Class {} has {} instances'.format(label, count))
 for label, count in zip(*np.unique(y, return_counts=True))]

Class -1 has 896 instances
Class 1 has 41 instances


[None, None]

### Using fairness dataset with imbalanced learning

In [4]:
## import dataset
dataset_used = "adult" # "adult", "german", "compas"
protected_attribute_used = 1 # 1, 2

if dataset_used == "adult":
#    dataset_orig = AdultDataset()
    dataset_orig = load_preproc_data_adult()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 1}]
        unprivileged_groups = [{'sex': 0}]
    else:
        privileged_groups = [{'race': 1}]
        unprivileged_groups = [{'race': 0}]
    
elif dataset_used == "german":
    dataset_orig = GermanDataset()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 1}]
        unprivileged_groups = [{'sex': 0}]
    else:
        privileged_groups = [{'age': 1}]
        unprivileged_groups = [{'age': 0}]
        
    for i in range(1000):
        if (dataset_orig.labels[i] == 2.0):
            dataset_orig.labels[i] = 0
        else:
            dataset_orig.labels[i] = 1
        
    dataset_orig.favorable_label = 1
    dataset_orig.unfavorable_label = 0

    
elif dataset_used == "compas":
#     dataset_orig = CompasDataset()
    dataset_orig = load_preproc_data_compas()
    if protected_attribute_used == 1:
        privileged_groups = [{'sex': 1}]
        unprivileged_groups = [{'sex': 0}]
    else:
        privileged_groups = [{'race': 1}]
        unprivileged_groups = [{'race': 0}]


In [5]:
dataset_orig.feature_names

['race',
 'sex',
 'Age (decade)=10',
 'Age (decade)=20',
 'Age (decade)=30',
 'Age (decade)=40',
 'Age (decade)=50',
 'Age (decade)=60',
 'Age (decade)=>=70',
 'Education Years=6',
 'Education Years=7',
 'Education Years=8',
 'Education Years=9',
 'Education Years=10',
 'Education Years=11',
 'Education Years=12',
 'Education Years=<6',
 'Education Years=>12']

In [6]:
# Initial disparities in the original datasets

metric_orig = BinaryLabelDatasetMetric(dataset_orig, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

display(Markdown("#### %s original training dataset"%dataset_used))
print("Disparate impact (of original labels) between unprivileged and privileged groups = %f" % metric_orig.disparate_impact())
print("Difference in statistical parity (of original labels) between unprivileged and privileged groups = %f" % metric_orig.statistical_parity_difference())
print("Individual fairness metric from Zemel et.al. that measures how similar the labels are for similar instances = %f" % metric_orig.consistency())

#### adult original training dataset

Disparate impact (of original labels) between unprivileged and privileged groups = 0.359655
Difference in statistical parity (of original labels) between unprivileged and privileged groups = -0.194516




Individual fairness metric from Zemel et.al. that measures how similar the labels are for similar instances = 0.756660


In [7]:
dataset_X = dataset_orig.features
dataset_y = dataset_orig.labels.ravel()
y_pandas = pd.Series(dataset_y)

#Check the ratio of class imbalance
y_pandas.value_counts(normalize=True)

0.0    0.760718
1.0    0.239282
dtype: float64

In [8]:
#Class counts
[print('Class {} has {} instances'.format(label, count))
 for label, count in zip(*np.unique(dataset_y, return_counts=True))]

Class 0.0 has 37155 instances
Class 1.0 has 11687 instances


[None, None]

In [9]:
#Train-test split
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

X_train = dataset_orig_train.features
X_test = dataset_orig_test.features

y_train = dataset_orig_train.labels.ravel()
y_test = dataset_orig_test.labels.ravel()

### Training classifiers to check the predicted performance with imbalance

In [10]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

num_pipe = make_pipeline(
    StandardScaler(), SimpleImputer(strategy="mean", add_indicator=True)
)

index = []
scores = {"Accuracy": [], "Balanced accuracy": []}


ind = []
results = {"AEO Difference": [], "Disparate Impact Ratio": [], "Dem Parity Difference": [], "Predictive Parity Difference": [],
           "Consistency": [],  "Accuracy": [], "Balanced accuracy": [],  "F1-Score": []}


In [11]:
#logistic regression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate

scoring = ["accuracy", "balanced_accuracy"]
lr_clf = make_pipeline(num_pipe, LogisticRegression(max_iter=1000))

index += ["Logistic regression"]
cv_result = cross_validate(lr_clf, dataset_X, dataset_y, scoring=scoring, return_estimator=True)
scores["Accuracy"].append(cv_result["test_accuracy"].mean())
scores["Balanced accuracy"].append(cv_result["test_balanced_accuracy"].mean())

df_scores = pd.DataFrame(scores, index=index)
df_scores

Unnamed: 0,Accuracy,Balanced accuracy
Logistic regression,0.804042,0.660557


In [12]:
cv_result

{'fit_time': array([0.21156502, 0.1884532 , 0.2089653 , 0.21659517, 0.23473287]),
 'score_time': array([0.03127265, 0.03241754, 0.02500296, 0.04905057, 0.0469532 ]),
 'estimator': [Pipeline(steps=[('pipeline',
                   Pipeline(steps=[('standardscaler', StandardScaler()),
                                   ('simpleimputer',
                                    SimpleImputer(add_indicator=True))])),
                  ('logisticregression', LogisticRegression(max_iter=1000))]),
  Pipeline(steps=[('pipeline',
                   Pipeline(steps=[('standardscaler', StandardScaler()),
                                   ('simpleimputer',
                                    SimpleImputer(add_indicator=True))])),
                  ('logisticregression', LogisticRegression(max_iter=1000))]),
  Pipeline(steps=[('pipeline',
                   Pipeline(steps=[('standardscaler', StandardScaler()),
                                   ('simpleimputer',
                                    Simple

In [13]:
#Logistic Regression Training for each dataset
log_reg = LogisticRegression() 

#Fitting the training set
log_reg.fit(X_train, y_train)
y_test_pred = log_reg.predict(X_test)

display(Markdown("#### LR predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_pred = dataset_orig_test.copy()
testset_pred.labels = y_test_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Log reg imb test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### LR Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### LR predictions of adult Test Set: Fairness Performance Results



#### LR Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.650163
Recall (TPR): 0.402196
Specificity (TNR): 0.933077


In [14]:
df_results

Unnamed: 0,AEO Difference,Disparate Impact Ratio,Dem Parity Difference,Predictive Parity Difference,Consistency,Accuracy,Balanced accuracy,F1-Score
Log reg imb test set,-0.288414,0.0,-0.21714,-0.650163,1.0,0.807684,0.667637,0.496965


In [15]:
#random forest
from sklearn.ensemble import RandomForestClassifier

rf_clf = make_pipeline(
   num_pipe, RandomForestClassifier(random_state=42, n_jobs=2)
)
index += ["Random forest"]
cv_result = cross_validate(rf_clf, dataset_X, dataset_y, scoring=scoring)
scores["Accuracy"].append(cv_result["test_accuracy"].mean())
scores["Balanced accuracy"].append(cv_result["test_balanced_accuracy"].mean())

df_scores = pd.DataFrame(scores, index=index)
df_scores

Unnamed: 0,Accuracy,Balanced accuracy
Logistic regression,0.804042,0.660557
Random forest,0.803284,0.663226


In [16]:
rfc = RandomForestClassifier(random_state=42, n_jobs=2)
rfc.fit(X_train, y_train)
y_test_pred = rfc.predict(X_test)

display(Markdown("#### RFC predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_pred = dataset_orig_test.copy()
testset_pred.labels = y_test_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Random F imb test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### RF Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### RFC predictions of adult Test Set: Fairness Performance Results



#### RF Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.628918
Recall (TPR): 0.434845
Specificity (TNR): 0.920658


In [17]:
df_results

Unnamed: 0,AEO Difference,Disparate Impact Ratio,Dem Parity Difference,Predictive Parity Difference,Consistency,Accuracy,Balanced accuracy,F1-Score
Log reg imb test set,-0.288414,0.0,-0.21714,-0.650163,1.0,0.807684,0.667637,0.496965
Random F imb test set,-0.317505,0.0,-0.242698,-0.628918,0.999877,0.80591,0.677752,0.514178


In [18]:
#adding class weight parameter to logistic regression
lr_clf.set_params(logisticregression__class_weight="balanced")

index += ["Logistic regression with balanced class weights"]
cv_result = cross_validate(lr_clf, dataset_X, dataset_y, scoring=scoring)
scores["Accuracy"].append(cv_result["test_accuracy"].mean())
scores["Balanced accuracy"].append(cv_result["test_balanced_accuracy"].mean())

df_scores = pd.DataFrame(scores, index=index)
df_scores

Unnamed: 0,Accuracy,Balanced accuracy
Logistic regression,0.804042,0.660557
Random forest,0.803284,0.663226
Logistic regression with balanced class weights,0.731993,0.744375


In [19]:
#Logistic Regression Training for each dataset
log_reg_b = LogisticRegression(class_weight='balanced') 

#Fitting the training set
log_reg_b.fit(X_train, y_train)
y_test_pred = log_reg_b.predict(X_test)


display(Markdown("#### Weighted LR predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_pred = dataset_orig_test.copy()
testset_pred.labels = y_test_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Weighted LR imb test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### Weighted LR Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### Weighted LR predictions of adult Test Set: Fairness Performance Results



#### Weighted LR Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.457665
Recall (TPR): 0.774632
Specificity (TNR): 0.716137


In [20]:
#adding class weight parameter to random forest
rf_clf.set_params(randomforestclassifier__class_weight="balanced")

index += ["Random forest with balanced class weights"]
cv_result = cross_validate(rf_clf, dataset_X, dataset_y, scoring=scoring)
scores["Accuracy"].append(cv_result["test_accuracy"].mean())
scores["Balanced accuracy"].append(cv_result["test_balanced_accuracy"].mean())

df_scores = pd.DataFrame(scores, index=index)
df_scores

Unnamed: 0,Accuracy,Balanced accuracy
Logistic regression,0.804042,0.660557
Random forest,0.803284,0.663226
Logistic regression with balanced class weights,0.731993,0.744375
Random forest with balanced class weights,0.728042,0.74339


In [21]:
rfc_b = RandomForestClassifier(random_state=42, n_jobs=2, class_weight='balanced')
rfc_b.fit(X_train, y_train)
y_test_pred = rfc_b.predict(X_test)

display(Markdown("#### Weighted RF predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_pred = dataset_orig_test.copy()
testset_pred.labels = y_test_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Weighted RF imb test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### Weighted RF Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### Weighted RF predictions of adult Test Set: Fairness Performance Results



#### Weighted RF Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.453183
Recall (TPR): 0.777521
Specificity (TNR): 0.709882


In [22]:
df_results

Unnamed: 0,AEO Difference,Disparate Impact Ratio,Dem Parity Difference,Predictive Parity Difference,Consistency,Accuracy,Balanced accuracy,F1-Score
Log reg imb test set,-0.288414,0.0,-0.21714,-0.650163,1.0,0.807684,0.667637,0.496965
Random F imb test set,-0.317505,0.0,-0.242698,-0.628918,0.999877,0.80591,0.677752,0.514178
Weighted LR imb test set,-0.337787,0.270157,-0.383279,-0.161733,0.999481,0.729953,0.745384,0.575384
Weighted RF imb test set,-0.309959,0.302852,-0.365967,-0.16911,0.999563,0.725858,0.743702,0.572614


In [23]:
#It finds that if the dataset has a class imbalance, in terms of positive and negative outcomes.
k=0
for i in range(len(y_train)):
    if(y_train[i] == 1):
        k+=1
    else:
        pass
print(k)

8226


In [24]:
#The oversampling must be done after the dataset is split into train and test sets!
#it cannot provide cluster balance with 0.5
sm = KMeansSMOTE(k_neighbors=2, random_state=42, cluster_balance_threshold=0.30)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

#Class counts
[print('Class {} has {} instances'.format(label, count))
 for label, count in zip(*np.unique(y_train_res, return_counts=True))]

sm.get_params()

Class 0.0 has 25963 instances
Class 1.0 has 25965 instances


{'cluster_balance_threshold': 0.3,
 'density_exponent': 'auto',
 'k_neighbors': 2,
 'kmeans_estimator': None,
 'n_jobs': None,
 'random_state': 42,
 'sampling_strategy': 'auto'}

In [25]:
X_res_p = pd.DataFrame(X_train_res)

X_res_pd = pd.DataFrame(data=X_res_p.values, columns=dataset_orig.feature_names)
X_res_pd['labels'] = y_train_res

feature_names=X_res_pd.columns
print(feature_names)

#Check the ratio of class imbalance
y_res_p = pd.DataFrame(y_train_res)
y_res_p.value_counts(normalize=True)

Index(['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30',
       'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60',
       'Age (decade)=>=70', 'Education Years=6', 'Education Years=7',
       'Education Years=8', 'Education Years=9', 'Education Years=10',
       'Education Years=11', 'Education Years=12', 'Education Years=<6',
       'Education Years=>12', 'labels'],
      dtype='object')


1.0    0.500019
0.0    0.499981
dtype: float64

In [26]:
#transform the oversampled training dataset to aif dataset object
aif_data = StructuredDataset(df=X_res_pd, label_names=['labels'], protected_attribute_names=['sex'])

print(type(aif_data))

aif_binary = BinaryLabelDataset(df=X_res_pd, label_names=['labels'], protected_attribute_names=['sex'])
print(type(aif_binary))
print(aif_binary.protected_attribute_names)

<class 'aif360.datasets.structured_dataset.StructuredDataset'>
<class 'aif360.datasets.binary_label_dataset.BinaryLabelDataset'>
['sex']


In [27]:
aif_binary

               instance weights features                                      \
                                         protected attribute                   
                                    race                 sex Age (decade)=10   
instance names                                                                 
0                           1.0      1.0                 1.0             0.0   
1                           1.0      1.0                 1.0             0.0   
2                           1.0      0.0                 1.0             0.0   
3                           1.0      1.0                 1.0             0.0   
4                           1.0      1.0                 1.0             0.0   
...                         ...      ...                 ...             ...   
51923                       1.0      1.0                 1.0             0.0   
51924                       1.0      1.0                 1.0             0.0   
51925                       1.0      1.0

In [28]:
# Initial disparities in the oversampled datasets

metric_transf = BinaryLabelDatasetMetric(aif_binary, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

display(Markdown("#### Transformed %s train set"%dataset_used))
print("Disparate impact ratio (of transformed labels) between unprivileged and privileged groups = %f" % metric_transf.disparate_impact())
print("Difference in statistical parity (of transformed labels) between unprivileged and privileged groups = %f" % metric_transf.statistical_parity_difference())
print("Individual fairness metric 'consistency' that measures how similar the labels are for similar instances = %f" % metric_transf.consistency())

#### Transformed adult train set

Disparate impact ratio (of transformed labels) between unprivileged and privileged groups = 0.810589
Difference in statistical parity (of transformed labels) between unprivileged and privileged groups = -0.101305




Individual fairness metric 'consistency' that measures how similar the labels are for similar instances = 0.697254


In [29]:
#Training the oversampled dataset with logistic regression
X_transf_train = aif_binary.features
y_transf_train = aif_binary.labels.ravel()

In [30]:
#Logistic Regression Training for each dataset
log_reg_t = LogisticRegression() 

#Fitting the training set
log_reg_t.fit(X_transf_train, y_transf_train)
y_transf_test_pred = log_reg_t.predict(X_test)

display(Markdown("#### LR predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_transf_pred = dataset_orig_test.copy()
testset_transf_pred.labels = y_transf_test_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_transf_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_transf_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Log reg (oversampled) test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### LR Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### LR predictions of adult Test Set: Fairness Performance Results



#### LR Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.553329
Recall (TPR): 0.518636
Specificity (TNR): 0.870533


In [33]:
#Training the oversampled dataset with random forestrfc_t = RandomForestClassifier(random_state=42, n_jobs=2)
rfc_t.fit(X_transf_train, y_transf_train)
y_test_transf_pred = rfc_t.predict(X_test)

display(Markdown("#### RFC predictions of %s Test Set: Fairness Performance Results"%dataset_used))

#Create a new version of the test set with predicted class labels
testset_transf_pred = dataset_orig_test.copy()
testset_transf_pred.labels = y_test_transf_pred

#Construction 1
#to construct this metric function, the predicted labels should be united with the test fetures to make a new datas
metric_pred_test = BinaryLabelDatasetMetric(testset_transf_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
#Construction 2
#both original test dataset with actual labels and the test dataset combined with predicted class labels need to be given to this function
classified_metric = ClassificationMetric(dataset_orig_test, 
                                                 testset_transf_pred,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)


#Checking Equalized Odds: average odds differecence, which is the avg. of differences in FPR&TPR for privileged and unprivileged groups.
aeo = classified_metric.average_odds_difference()

#Disparate Impact ratio between privileged and unprivileged groups.
di = classified_metric.disparate_impact()

#Demographic parity difference between privileged and unprivileged groups.
spd = classified_metric.statistical_parity_difference()

#Predictive parity difference: PPV difference between privileged and unprivileged groups.
ppd = classified_metric.positive_predictive_value(privileged=False) - classified_metric.positive_predictive_value(privileged=True)

#Individual Fairness: 1)Consistency, 2) Euclidean Distance between individuals.
consistency = metric_pred_test.consistency()

TPR = classified_metric.true_positive_rate() #recall
TNR = classified_metric.true_negative_rate() #specificity
PPV = classified_metric.positive_predictive_value() #precision
bal_acc = (TPR+TNR)/2
f1 = 2*((PPV*TPR)/(PPV+TPR))
acc = classified_metric.accuracy()

ind += ["Random F (oversampled) test set"]
results["AEO Difference"].append(aeo)
results["Disparate Impact Ratio"].append(di)
results["Dem Parity Difference"].append(spd)
results["Predictive Parity Difference"].append(ppd)
results["Consistency"].append(consistency)
results["Accuracy"].append(acc)
results["Balanced accuracy"].append(bal_acc)
results["F1-Score"].append(f1)
          
df_results = pd.DataFrame(results, index=ind)
    
display(Markdown("#### RF Prediction Performance on %s Test Set"%dataset_used))

print("For %s dataset"%dataset_used)
print("Precision (PPV): %f" %PPV)
print("Recall (TPR): %f" %TPR)
print("Specificity (TNR): %f" %TNR)

#### RFC predictions of adult Test Set: Fairness Performance Results



#### RF Prediction Performance on adult Test Set

For adult dataset
Precision (PPV): 0.551077
Recall (TPR): 0.517480
Specificity (TNR): 0.869639


In [34]:
df_results

Unnamed: 0,AEO Difference,Disparate Impact Ratio,Dem Parity Difference,Predictive Parity Difference,Consistency,Accuracy,Balanced accuracy,F1-Score
Log reg imb test set,-0.288414,0.0,-0.21714,-0.650163,1.0,0.807684,0.667637,0.496965
Random F imb test set,-0.317505,0.0,-0.242698,-0.628918,0.999877,0.80591,0.677752,0.514178
Weighted LR imb test set,-0.337787,0.270157,-0.383279,-0.161733,0.999481,0.729953,0.745384,0.575384
Weighted RF imb test set,-0.309959,0.302852,-0.365967,-0.16911,0.999563,0.725858,0.743702,0.572614
Log reg (oversampled) test set,-0.040338,0.605822,-0.100184,-0.319864,0.999959,0.787416,0.694584,0.535421
Random F (oversampled) test set,-0.040392,0.604375,-0.100792,-0.316776,0.999864,0.78646,0.69356,0.533751
