In [2]:
from autosklearn.regression import AutoSklearnRegressor
from sklearn import preprocessing, metrics, model_selection
import pandas as pd
import numpy as np

In [16]:
# Features used in recidivism prediction
features = [
    'male',
    'age',
    'juv_fel_count',
    'juv_misd_count',
    'juv_other_count',
    'priors_count',
    'felony',
    'African-American',
    'Asian',
    'Caucasian',
    'Hispanic',
    'Native American',
    'Other Race',
    'Divorced',
    'Married',
    'Separated',
    'Significant Other',
    'Single',
    'Unknown Relationship',
    'Widowed',
    'supervision_High',
    'supervision_Low',
    'supervision_Medium',
    'supervision_Medium with Override Consideration',
    'two_year_recid'
]
features = [
    'African-American',
    'Asian',
    'Caucasian',
    'Hispanic',
    'Native American',
    'Other Race',
    'RawScore',
    'two_year_recid'
]

# Sensitive features
sensitive = [
    'African-American',
    'Asian',
    'Caucasian',
    'Hispanic',
    'Native American',
    'Other Race'
]

In [17]:
# Import raw data
raw_df = pd.read_csv('compas/compas-scores-raw.csv')
raw_df = raw_df[raw_df['DisplayText'] == 'Risk of Recidivism']
raw_df['LastName'] = raw_df['LastName'].str.lower()
raw_df['FirstName'] = raw_df['FirstName'].str.lower()
raw_df['DateOfBirth'] = pd.to_datetime(raw_df['DateOfBirth'])

# Import two-year recidivism data (includes criminal history)
twoyear_df = pd.read_csv('compas/compas-scores-two-years.csv')
twoyear_df['dob'] = pd.to_datetime(twoyear_df['dob'])

# Preprocessing
df = pd.merge(
    raw_df, 
    twoyear_df, 
    left_on=['LastName', 'FirstName', 'DateOfBirth'],
    right_on=['last', 'first', 'dob']
)
df['male'] = df['sex'] == 'Male'
df['felony'] = df['c_charge_degree'] == 'F'
race_dummies = pd.get_dummies(df['race']).rename(columns={'Other':'Other Race'})
relationship_dummies = pd.get_dummies(df['MaritalStatus']).rename(columns={'Unknown':'Unknown Relationship'})
supervision_dummies = pd.get_dummies(df['RecSupervisionLevelText'], prefix='supervision')
df = pd.concat((df, race_dummies, relationship_dummies, supervision_dummies), axis=1)
df = df[features]

In [18]:
# Split into train and test
X = df.drop('two_year_recid', axis=1)
y = df['two_year_recid']
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y)

# Remove race
X_train_rm = X_train.drop(sensitive, axis=1)
X_test_rm = X_test.drop(sensitive, axis=1)

# Blind X_train
X_test_blind = X_test.copy()
X_sensitive_means = X_test[sensitive].mean()
for c in X_sensitive_means.index:
    X_test_blind[c] = X_sensitive_means[c]

In [19]:
# Full feature benchmark
benchmark_cls = AutoSklearnRegressor(
    time_left_for_this_task=60,
    per_run_time_limit=30,
)
benchmark_cls.fit(X_train, y_train)

# Removal benchmark
removal_cls = AutoSklearnRegressor(
    time_left_for_this_task=60,
    per_run_time_limit=30,
)
removal_cls.fit(X_train_rm, y_train)









AutoSklearnRegressor(delete_output_folder_after_terminate=True,
                     delete_tmp_folder_after_terminate=True,
                     disable_evaluator_output=False, ensemble_memory_limit=1024,
                     ensemble_nbest=50, ensemble_size=50,
                     exclude_estimators=None, exclude_preprocessors=None,
                     get_smac_object_callback=None, include_estimators=None,
                     include_preprocessors=None,
                     initial_configurations_via_metalearning=25,
                     logging_config=None, metadata_directory=None,
                     ml_memory_limit=3072, n_jobs=None, output_folder=None,
                     per_run_time_limit=30, resampling_strategy='holdout',
                     resampling_strategy_arguments=None, seed=1,
                     shared_mode=False, smac_scenario_args=None,
                     time_left_for_this_task=60, tmp_folder=None)

In [25]:
# Predict test sample
test_df = pd.concat((X_test, y_test), axis=1)
test_df['two_year_recid_ff'] = benchmark_cls.predict(X_test)>.5
test_df['two_year_recid_rm'] = removal_cls.predict(X_test_rm)>.5
test_df['two_year_recid_blind'] = benchmark_cls.predict(X_test_blind)>.42
for sfx in ['ff', 'rm', 'blind']:
    print('\n'+sfx)
    var = 'two_year_recid_'+sfx
    print(test_df[var].mean())
    for s in sensitive:
        print(s)
        pred = test_df[test_df[s]==1][var]
        true = y_test[test_df[s]==1]
        print(pred.mean())
        print(metrics.confusion_matrix(true, pred, normalize='all'))
        print(metrics.accuracy_score(true, pred))


ff
0.512453300124533
African-American
0.6363636363636364
[[0.2020202  0.20763187]
 [0.16161616 0.42873176]]
0.6307519640852974
Asian
0.25
[[0.375 0.   ]
 [0.375 0.25 ]]
0.625
Caucasian
0.41739130434782606
[[0.33913043 0.15869565]
 [0.24347826 0.25869565]]
0.5978260869565217
Hispanic
0.3096774193548387
[[0.49032258 0.10967742]
 [0.2        0.2       ]]
0.6903225806451613
Native American
0.3333333333333333
[[0.66666667 0.        ]
 [0.         0.33333333]]
1.0
Other Race
0.14606741573033707
[[0.59550562 0.04494382]
 [0.25842697 0.1011236 ]]
0.6966292134831461

rm
0.537359900373599
African-American
0.6509539842873177
[[0.19416386 0.21548822]
 [0.15488215 0.43546577]]
0.6296296296296297
Asian
0.5
[[0.375 0.   ]
 [0.125 0.5  ]]
0.875
Caucasian
0.45217391304347826
[[0.32391304 0.17391304]
 [0.22391304 0.27826087]]
0.6021739130434782
Hispanic
0.3096774193548387
[[0.49032258 0.10967742]
 [0.2        0.2       ]]
0.6903225806451613
Native American
0.3333333333333333
[[0.66666667 0.        ]
 [

In [None]:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wasserstein_distance.html
# clean up
# modify wasserstein for discrimination metric https://github.com/scipy/scipy/blob/v1.4.1/scipy/stats/stats.py#L6934-L7008
# LIME and SHAP

# move straight to 2-year predictions
# hypothesis: COMPAS takes into account things which correlate with race, e.g. priors, which aren't predictive of future risk
# when priors are in the model, race no longer predicts COMPAS

# COMPAS discriminates against people with prior criminal