In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from fairdata import FairData

In [2]:
df_raw = pd.read_csv('data/COMPAS/compas-scores-two-years.csv')
df_raw = df_raw.loc[:,[
    'sex', 'race', 'age', 'juv_fel_count','juv_misd_count',
    'juv_other_count', 'priors_count', 'two_year_recid'
]]
df_raw = df_raw.loc[df_raw['race'].isin(['African-American', 'Caucasian', 'Hispanic']), :]

In [3]:
# Encode categorical variables
categorical = ['sex', 'race']
for feature in categorical:
    le = LabelEncoder()
    df_raw[feature] = le.fit_transform(df_raw[feature])
a = df_raw.drop(['sex', 'race', 'two_year_recid'], axis=1)
s = pd.DataFrame({'race-sex': df_raw.race * 2 + df_raw.sex})
y = pd.DataFrame({'two_year_recid': df_raw['two_year_recid']})

In [4]:
# Feature scaling
scaler = StandardScaler()
a = pd.DataFrame(scaler.fit_transform(a), columns=a.columns)
pca = PCA()
a = pd.DataFrame(pca.fit_transform(a), columns=a.columns)
a.head()

Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count
0,-0.596519,0.429056,0.061913,0.000674,0.308648
1,0.962933,1.163002,-0.443991,0.970232,-0.761187
2,0.929383,1.03114,-0.217031,-1.391831,0.936699
3,0.54807,-1.669156,-0.304386,-0.308827,-1.258837
4,-0.686701,0.121587,-0.060447,0.156054,0.494724


In [5]:
# Split data into separate training and test set
a_train, a_test, s_train, s_test, y_train, y_test = \
    train_test_split(a, s, y, test_size = 0.25, random_state = 0)

In [6]:
fairdata_ortho = FairData(s_train, a_train, y_train, preprocess_method='o')
fairdata_mdm = FairData(s_train, a_train, y_train, preprocess_method='m')

In [7]:
fairdata_mdm_eval = fairdata_mdm.evaluate(
    a_test, s_test, y_test, metrics=['cfb', 'cfbm', 'mae'], p_range=0, b=1
)
pd.DataFrame(fairdata_mdm_eval, index=['CFB', 'CFBM', 'MAE'], columns=['ML', 'FTU', 'FL', 'AA', 'FLAP-1', 'FLAP-2'])

Unnamed: 0,ML,FTU,FL,AA,FLAP-1,FLAP-2
CFB,0.576973,0.504339,0.458728,0.462043,0.770888,0.776318
CFBM,0.576973,0.504339,0.458728,0.462043,0.770888,0.776318
MAE,0.42557,0.427358,0.440161,0.43906,0.439288,0.439338


In [8]:
fairdata_mdm_eval = fairdata_mdm.evaluate(
    a_test, s_test, y_test, metrics=['cfb', 'cfbm', 'mae'], p_range=0.025, b=25
)
pd.DataFrame(fairdata_mdm_eval, index=['CFB', 'CFBM', 'MAE'], columns=['ML', 'FTU', 'FL', 'AA', 'FLAP-1', 'FLAP-2'])

Unnamed: 0,ML,FTU,FL,AA,FLAP-1,FLAP-2
CFB,0.911392,0.915102,0.860088,0.871446,0.905296,0.903086
CFBM,0.621006,0.552642,0.451626,0.457825,0.373454,0.383937
MAE,0.42557,0.427358,0.440161,0.43906,0.439288,0.439338


In [9]:
fairdata_mdm_eval = fairdata_mdm.evaluate(
    a_test, s_test, y_test, metrics=['cfb', 'cfbm', 'mae'], p_range=0.05, b=50
)
pd.DataFrame(fairdata_mdm_eval, index=['CFB', 'CFBM', 'MAE'], columns=['ML', 'FTU', 'FL', 'AA', 'FLAP-1', 'FLAP-2'])

Unnamed: 0,ML,FTU,FL,AA,FLAP-1,FLAP-2
CFB,0.946242,0.947885,0.916195,0.926402,0.970183,0.969174
CFBM,0.608744,0.589239,0.495641,0.496085,0.401167,0.400746
MAE,0.42557,0.427358,0.440161,0.43906,0.439288,0.439338


In [10]:
fairdata_mdm_eval = fairdata_mdm.evaluate(
    a_test, s_test, y_test, metrics=['cfb', 'cfbm', 'mae'], p_range=0.1, b=100
)
pd.DataFrame(fairdata_mdm_eval, index=['CFB', 'CFBM', 'MAE'], columns=['ML', 'FTU', 'FL', 'AA', 'FLAP-1', 'FLAP-2'])

Unnamed: 0,ML,FTU,FL,AA,FLAP-1,FLAP-2
CFB,0.962516,0.96342,0.937074,0.945559,0.97754,0.976625
CFBM,0.691826,0.633226,0.524853,0.534814,0.4484,0.447273
MAE,0.42557,0.427358,0.440161,0.43906,0.439288,0.439338


In [7]:
fairdata_ortho_eval = fairdata_ortho.evaluate(
    a_test, s_test, y_test, metrics=['cfb', 'cfbm', 'mae'], p_range=0.05, b=50
)
pd.DataFrame(fairdata_ortho_eval, index=['CFB', 'CFBM', 'MAE'], columns=['ML', 'FTU', 'FL', 'AA', 'FLAP-1', 'FLAP-2'])

Unnamed: 0,ML,FTU,FL,AA,FLAP-1,FLAP-2
CFB,0.942896,0.947516,0.907288,0.917727,0.916436,0.907288
CFBM,0.68636,0.627237,0.539475,0.54021,0.539615,0.539475
MAE,0.42557,0.427358,0.440161,0.43906,0.439498,0.440161
