In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import roc_curve

truth_col = "Y_truth_%s"
pred_col = [
    "y_pred_%d_lgb", 
    "y_pred_%d_rf", 
    "y_pred_%d_log_reg", 
    "y_pred_%d_l1"
    ]

def read_csv(sep):
    df = pd.read_csv(f"./Output/test_impute_{sep}.csv")
    
    y_true = df[truth_col % sep]
    y_pred = {}
    for col in pred_col:
        y = df[col % sep]
        fpr, tpr, thresholds = roc_curve(y_true, y)
        threshold = thresholds[np.argmax(tpr-fpr)]
        y_pred[col[10:]] = np.array(y > threshold, dtype=float)
    df.drop(truth_col % sep, axis=1, inplace=True)
    df.drop([col % sep for col in pred_col], axis=1, inplace=True)
    
    return df, y_true, y_pred

In [2]:
from sklearn.metrics import roc_curve
from fairlearn import metrics
from fairlearn.reductions import DemographicParity

import warnings
warnings.filterwarnings('ignore')

metricsDi = {
    "demographic_parity": metrics.selection_rate, 
    'equalized_odds(FPR)': metrics.false_positive_rate, 
    'equalized_odds(TPR)': metrics.true_positive_rate, 
    }
metricsFunc = [
    metrics.demographic_parity_difference, 
    metrics.demographic_parity_ratio, 
    metrics.equalized_odds_difference, 
    metrics.equalized_odds_ratio, 
    ]

def cal(y_test, y_test_pred, f):
    tpr_summary = metrics.MetricFrame(
        metrics=metricsDi,
        y_true=y_test,
        y_pred=y_test_pred,
        sensitive_features=f)
    
    return tpr_summary.overall, \
           tpr_summary.by_group, \
           pd.Series({func.__name__:func(y_true=y_test,
                                         y_pred=y_test_pred,
                                         sensitive_features=f) 
                      for func in metricsFunc})

In [4]:
for sep in [30, 60, 90]:
    X_test, true_data, pred_data = read_csv(sep)
    
    payer_type = list(str(x) for x in X_test['payer_type'])
    patient_race = list(str(x) for x in X_test['patient_race'])
    for featureName, f in {'payer_type':payer_type, 'patient_race':patient_race}.items():
        di1 = {}
        di2 = {}
        di3 = {}
        di4 = {}
        for k, v in pred_data.items():
            di1[k], di2[k], di3[k] = cal(true_data, v, f)
            
            dp1 = DemographicParity(difference_bound=0.05)
            dp1.load_data(X_test, true_data, sensitive_features=f)
            
            dp2 = DemographicParity(ratio_bound=0.9, ratio_bound_slack=0.01)
            dp2.load_data(X_test, true_data, sensitive_features=f)
            
            di4[k] = pd.DataFrame(
                {'DemographicParity_difference_bound':dp1.gamma(lambda X: v), 
                 'DemographicParity_ratio_bound':dp2.gamma(lambda X: v)})
            
        overall = pd.DataFrame(di1)
        by_group = pd.concat(di2, axis=1)
        diff_ration = pd.DataFrame(di3)
        parity = pd.concat(di4, axis=1)
        
        with pd.ExcelWriter(f'output_{sep}_{featureName}.xlsx') as writer:
            overall.to_excel(writer, sheet_name='overall')
            by_group.to_excel(writer, sheet_name='by_group')
            diff_ration.to_excel(writer, sheet_name='diff_ration')
            parity.to_excel(writer, sheet_name='parity')
