In [17]:
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix
from sklearn.ensemble import AdaBoostClassifier
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from AdaFair import AdaFair
import sys

sys.path.append('../')

from loaders.load_adult import load_adult
from loaders.load_bank import load_bank
from loaders.load_compas_data import load_compas

## Metrics

In [3]:
def dFNR(y_true, y_pred, X, sa_index, sa_label):
    sa_pos = (X[:, sa_index] == sa_label) * (y_true == 1)
    sa_neg = (X[:, sa_index] == sa_label) * (y_true == -1)
    nonsa_pos = (X[:, sa_index] != sa_label) * (y_true == 1)
    nonsa_neg = (X[:, sa_index] != sa_label) * (y_true == -1)
    return np.sum(y_pred[sa_pos] != y_true[sa_pos]) / np.sum(sa_pos) - np.sum(y_pred[nonsa_pos] != y_true[nonsa_pos]) / np.sum(nonsa_pos)

In [4]:
def dFPR(y_true, y_pred, X, sa_index, sa_label):
    sa_pos = (X[:, sa_index] == sa_label) * (y_true == 1)
    sa_neg = (X[:, sa_index] == sa_label) * (y_true == -1)
    nonsa_pos = (X[:, sa_index] != sa_label) * (y_true == 1)
    nonsa_neg = (X[:, sa_index] != sa_label) * (y_true == -1)
    
    return np.sum(y_pred[sa_neg] != y_true[sa_neg]) / np.sum(sa_neg) - np.sum(y_pred[nonsa_neg] != y_true[nonsa_neg]) / np.sum(nonsa_neg)

In [5]:
def TPR(y_true, y_pred, X, sa_index, sa_label, agg):
    sa_pos = (X[:, sa_index] == sa_label) * (y_true == 1)
    sa_neg = (X[:, sa_index] == sa_label) * (y_true == -1)
    nonsa_pos = (X[:, sa_index] != sa_label) * (y_true == 1)
    nonsa_neg = (X[:, sa_index] != sa_label) * (y_true == -1)
    
    if agg == 'prot':
        return np.sum(y_pred[nonsa_pos] == y_true[nonsa_pos]) / np.sum(nonsa_pos)
    elif agg == 'non-prot':
        return np.sum(y_pred[sa_pos] == y_true[sa_pos]) / np.sum(sa_pos)

In [6]:
def TNR(y_true, y_pred, X, sa_index, sa_label, agg='diff'):
    sa_pos = (X[:, sa_index] == sa_label) * (y_true == 1)
    sa_neg = (X[:, sa_index] == sa_label) * (y_true == -1)
    nonsa_pos = (X[:, sa_index] != sa_label) * (y_true == 1)
    nonsa_neg = (X[:, sa_index] != sa_label) * (y_true == -1)
    
    if agg == 'prot':
        return np.sum(y_pred[nonsa_neg] == y_true[nonsa_neg]) / np.sum(nonsa_neg)
    elif agg == 'non-prot':
        return np.sum(y_pred[sa_neg] == y_true[sa_neg]) / np.sum(sa_neg)

In [7]:
def model_evaluate(y_true, y_pred, X, sa_index, sa_label):
    metrics = {'Accuracy': accuracy_score(y_true, y_pred), 'Bal. Acc.': balanced_accuracy_score(y_true, y_pred),
              'Eq.Odds': abs(dFPR(y_true, y_pred, X, sa_index, sa_label)) + abs(dFNR(y_true, y_pred, X, sa_index, sa_label)),
              'TPR Prot': TPR(y_true, y_pred, X, sa_index, sa_label, agg='prot'),
              'TPR Non-Prot': TPR(y_true, y_pred, X, sa_index, sa_label, agg='non-prot'),
              'TNR Prot': TNR(y_true, y_pred, X, sa_index, sa_label, agg='prot'),
              'TNR Non-Prot': TNR(y_true, y_pred, X, sa_index, sa_label, agg='non-prot')}
    return metrics

# Adult

In [8]:
X, y, sa_index, p_Group, x_control = load_adult("sex")
sa_label = 1

Counter({"' Male'": 30495, "' Female'": 14680})


In [9]:
nonprot = X[:, sa_index]==sa_label
prot = X[:, sa_index]!=sa_label

## Validation

In [10]:
names = ['Accuracy', 'Bal. Acc.', 'Eq.Odds', 'TPR Prot', 'TPR Non-Prot', 'TNR Prot', 'TNR Non-Prot']
metrics = {'name':[], 'value':[], 'model':[]}

for i in range(10):
#     X_nonprot_train, X_nonprot_test, y_nonprot_train, y_nonprot_test = train_test_split(X[nonprot], y[nonprot], test_size=0.5, stratify=y[nonprot], shuffle=True)
#     X_prot_train, X_prot_test, y_prot_train, y_prot_test = train_test_split(X[prot], y[prot], test_size=0.5, stratify=y[prot], shuffle=True)
    
#     X_train, X_test = np.concatenate((X_nonprot_train, X_prot_train)), np.concatenate((X_nonprot_test, X_prot_test))
#     y_train, y_test = np.concatenate((y_nonprot_train, y_prot_train)), np.concatenate((y_nonprot_test, y_prot_test))
    
#     idxs_train = np.arange(X_train.shape[0])
#     idxs_test = np.arange(X_test.shape[0])
#     np.random.shuffle(idxs_train)
#     np.random.shuffle(idxs_test)
    
#     X_train, X_test = X_train[idxs_train], X_test[idxs_test]
#     y_train, y_test = y_train[idxs_train], y_test[idxs_test]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True)
    
    adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200)
    adaboost.fit(X_train, y_train)
    
    y_pred = adaboost.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    
    adafair = AdaFair(sa_index=sa_index, sa_label=sa_label)
    adafair.fit(X_train, y_train)
    
    y_pred = adafair.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')

In [12]:
f = open('metrics_adult.txt','w')
f.write(str(metrics))
f.close()

# Bank

In [14]:
X, y, sa_index, p_Group, x_control = load_bank()
sa_label=1

Counter({'married': 27214, 'single': 12790})


In [15]:
nonprot = X[:, sa_index]==sa_label
prot = X[:, sa_index]!=sa_label

## Validation

In [16]:
names = ['Accuracy', 'Bal. Acc.', 'Eq.Odds', 'TPR Prot', 'TPR Non-Prot', 'TNR Prot', 'TNR Non-Prot']
metrics = {'name':[], 'value':[], 'model':[]}

for i in range(10):
#     X_nonprot_train, X_nonprot_test, y_nonprot_train, y_nonprot_test = train_test_split(X[nonprot], y[nonprot], test_size=0.5, stratify=y[nonprot], shuffle=True)
#     X_prot_train, X_prot_test, y_prot_train, y_prot_test = train_test_split(X[prot], y[prot], test_size=0.5, stratify=y[prot], shuffle=True)
    
#     X_train, X_test = np.concatenate((X_nonprot_train, X_prot_train)), np.concatenate((X_nonprot_test, X_prot_test))
#     y_train, y_test = np.concatenate((y_nonprot_train, y_prot_train)), np.concatenate((y_nonprot_test, y_prot_test))
    
#     idxs_train = np.arange(X_train.shape[0])
#     idxs_test = np.arange(X_test.shape[0])
#     np.random.shuffle(idxs_train)
#     np.random.shuffle(idxs_test)
    
#     X_train, X_test = X_train[idxs_train], X_test[idxs_test]
#     y_train, y_test = y_train[idxs_train], y_test[idxs_test]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True)
    
    adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200)
    adaboost.fit(X_train, y_train)
    
    y_pred = adaboost.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    
    adafair = AdaFair(sa_index=sa_index, sa_label=sa_label)
    adafair.fit(X_train, y_train)
    
    y_pred = adafair.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')

In [17]:
f = open('metrics_bank.txt','w')
f.write(str(metrics))
f.close()

# Compass

In [18]:
X, y, sa_index, p_Group, x_control = load_compas("sex")
sa_label=1

Counter({'Male': 4247, 'Female': 1031})
Features we will be using for classification are: ['age_cat_25 - 45', 'age_cat_Greater than 45', 'age_cat_Less than 25', 'race', 'sex', 'priors_count', 'c_charge_degree', 'target'] 



In [19]:
nonprot = X[:, sa_index]==sa_label
prot = X[:, sa_index]!=sa_label

## Validation

In [23]:
names = ['Accuracy', 'Bal. Acc.', 'Eq.Odds', 'TPR Prot', 'TPR Non-Prot', 'TNR Prot', 'TNR Non-Prot']
metrics = {'name':[], 'value':[], 'model':[]}

for i in range(10):
#     X_nonprot_train, X_nonprot_test, y_nonprot_train, y_nonprot_test = train_test_split(X[nonprot], y[nonprot], test_size=0.5, stratify=y[nonprot], shuffle=True)
#     X_prot_train, X_prot_test, y_prot_train, y_prot_test = train_test_split(X[prot], y[prot], test_size=0.5, stratify=y[prot], shuffle=True)
    
#     X_train, X_test = np.concatenate((X_nonprot_train, X_prot_train)), np.concatenate((X_nonprot_test, X_prot_test))
#     y_train, y_test = np.concatenate((y_nonprot_train, y_prot_train)), np.concatenate((y_nonprot_test, y_prot_test))
    
#     idxs_train = np.arange(X_train.shape[0])
#     idxs_test = np.arange(X_test.shape[0])
#     np.random.shuffle(idxs_train)
#     np.random.shuffle(idxs_test)
    
#     X_train, X_test = X_train[idxs_train], X_test[idxs_test]
#     y_train, y_test = y_train[idxs_train], y_test[idxs_test]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True)
    
    adaboost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200)
    adaboost.fit(X_train, y_train)
    
    y_pred = adaboost.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaBoost')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaBoost')
    
    adafair = AdaFair(sa_index=sa_index, sa_label=sa_label)
    adafair.fit(X_train, y_train)
    
    y_pred = adafair.predict(X_test)
    
    metrics['name'].append(names[0])
    metrics['value'].append(accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[1])
    metrics['value'].append(balanced_accuracy_score(y_test, y_pred))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[2])
    metrics['value'].append(abs(dFPR(y_test, y_pred, X_test, sa_index, sa_label)) + abs(dFNR(y_test, y_pred, X_test, sa_index, sa_label)))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[3])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[4])
    metrics['value'].append(TPR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[5])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='prot'))
    metrics['model'].append('AdaFair')
    metrics['name'].append(names[6])
    metrics['value'].append(TNR(y_test, y_pred, X_test, sa_index, sa_label, agg='non-prot'))
    metrics['model'].append('AdaFair')

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

In [24]:
f = open('metrics_compass.txt','w')
f.write(str(metrics))
f.close()