In [None]:
%run preprocessing.ipynb

# Plot with sklearn
from sklearn.metrics import roc_curve
from sklearn.metrics import RocCurveDisplay
import math
import matplotlib.patches as mpatches
import seaborn as sns

In [None]:
# with inspiration for ROC manual plotting from https://towardsdatascience.com/interpreting-roc-curve-and-roc-auc-for-classification-evaluation-28ec3983f077
def calculate_tpr_fpr(y_real, y_pred):
    # Calculates the confusion matrix and recover each element
    cm = confusion_matrix(y_real, y_pred)
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    TP = cm[1, 1]
    # Calculates tpr and fpr
    tpr =  TP/(TP + FN) # sensitivity - true positive rate
    fpr = 1 - TN/(TN+FP) # 1-specificity - false positive rate
    
    return tpr, fpr

def get_n_roc_coordinates(y_real, y_proba, n = 99):
    tpr_list = [0]
    fpr_list = [0]
    for i in range(n):
        threshold = i/n
        y_pred = y_proba[:, 1] > threshold
        tpr, fpr = calculate_tpr_fpr(y_real, y_pred)
        tpr_list.append(tpr)
        fpr_list.append(fpr)
    return tpr_list, fpr_list

def RMSE(array):

    means = np.mean(array, axis=1)
    SE = np.apply_along_axis(lambda x: (x - means)**2, 0, array)
    MSE = np.mean(SE, axis=1)
    RMSE = np.sqrt(MSE)

    return RMSE



In [None]:
def roc_curve_protected(X_train, y_train, X_test, y_test, group_train, group_test, PROTECTED_FEATURES):
    
    # set seaborn color palette
    sns_colors = sns.color_palette("muted")
    
    # debias data
    X_train_decorrelated = correlation_remover(X_train, group_train, lambda_=0)
    X_test_decorrelated = correlation_remover(X_test, group_test, lambda_=0)

    # get probas for baseline model
    base_preds, base_probas, _ = fit_logistic_regression(X_train, y_train, X_test)
    
    # get probas for debiased data 
    debiased_preds, debiased_probas, _ = fit_logistic_regression(X_train_decorrelated, y_train, X_test_decorrelated)
    
    # compile df for plotting
    plot_data = pd.from_dummies(group_test,sep='_')
    plot_data["baseline_proba_0"] = base_probas[:, 0]
    plot_data["baseline_proba_1"] = base_probas[:, 1]
    plot_data["debiased_proba_0"] = debiased_probas[:, 0]
    plot_data["debiased_proba_1"] = debiased_probas[:, 1]

    plot_data["true"] = y_test

    _bar, axes_bar = plt.subplots(1, len(PROTECTED_FEATURES),figsize=(8,4),sharey=True)
    _, axes = plt.subplots(2, len(PROTECTED_FEATURES),figsize=(12, 8), sharex='col', sharey='row')
    axes[0,0].set_xlim(-0.05, 1.05)
    axes[0,0].set_ylim(-0.05, 1.05)
    axes[0,0].set_ylabel("True Positive Rate", fontsize=15)
    axes[1,0].set_ylabel("True Positive Rate", fontsize=15)
    # _.suptitle("ROC curves for baseline and debiased models",fontsize=15)
    
    c = 0
    feature_names = ["Age", "Race", "Gender"]
    tpr_rmse = {}
    fpr_rmse = {}
    # Iterate through each protected feature
    for i, feature in enumerate(PROTECTED_FEATURES):
        tpr_rmse[feature] = {}
        fpr_rmse[feature] = {}
        sns.lineplot(x = [0, 1], y = [0, 1], color = 'black', ax=axes[0,i], linestyle='--')
        sns.lineplot(x = [0, 1], y = [0, 1], color = 'black', ax=axes[1,i], linestyle='--')
        axes[1,i].set_xlabel("False Positive Rate", fontsize=15)
        axes[0,i].set_title(f"{feature_names[i]}", fontsize=15)
        # Iterate through each category of the protected feature
        for category in np.unique(plot_data[feature]):

            # Calculate tpr and fpr for baseline model and debiased model
            cat_true, cat_proba_preds_baseline_0,  cat_proba_preds_baseline_1 = plot_data[plot_data[feature]==category][["true", "baseline_proba_0", "baseline_proba_1"]].to_numpy().T
            cat_true, cat_proba_preds_debiased_0, cat_proba_preds_debiased_1 = plot_data[plot_data[feature]==category][["true", "debiased_proba_0", "debiased_proba_1"]].to_numpy().T

            # calculate roc curve for with baseline and debiased predictions
            tpr_baseline, fpr_baseline = get_n_roc_coordinates(cat_true, np.column_stack((cat_proba_preds_baseline_0, cat_proba_preds_baseline_1)))
            tpr_debiased, fpr_debiased = get_n_roc_coordinates(cat_true, np.column_stack((cat_proba_preds_debiased_0, cat_proba_preds_debiased_1)))
            
            # plot roc curve for baseline and debiased model
            sns.lineplot(x = fpr_baseline, y = tpr_baseline, ax=axes[0,i],label=category,color=sns_colors[c])
            sns.lineplot(x = fpr_debiased, y = tpr_debiased, ax=axes[1,i],label=category,color=sns_colors[c])
            c += 1

            try: 
                # TPR
                tpr_rmse[feature]["baseline_tpr"] = np.column_stack((tpr_rmse[feature]["baseline_tpr"], np.array(tpr_baseline)))
                tpr_rmse[feature]["debiased_tpr"] = np.column_stack((tpr_rmse[feature]["debiased_tpr"], np.array(tpr_debiased)))
                
                # FPR
                fpr_rmse[feature]["baseline_fpr"] = np.column_stack((fpr_rmse[feature]["baseline_fpr"], np.array(fpr_baseline)))
                fpr_rmse[feature]["debiased_fpr"] = np.column_stack((fpr_rmse[feature]["debiased_fpr"], np.array(fpr_debiased)))
            
            except:
                # TPR
                tpr_rmse[feature]["baseline_tpr"] = np.array(tpr_baseline)
                tpr_rmse[feature]["debiased_tpr"] = np.array(tpr_debiased)
                
                # FPR
                fpr_rmse[feature]["baseline_fpr"] = np.array(fpr_baseline)
                fpr_rmse[feature]["debiased_fpr"] = np.array(fpr_debiased)


        # Calculate RMSE for TPR for each feature before and after debiasing
        # Before Debiasing
        tpr_rmse[feature]["baseline_rmse"] = RMSE(tpr_rmse[feature]["baseline_tpr"])
        tpr_rmse[feature]["baseline_mean_rmse"] = np.mean(tpr_rmse[feature]["baseline_rmse"])
        tpr_rmse[feature]["baseline_std_rmse"] = np.std(tpr_rmse[feature]["baseline_rmse"])
        # After Debiasing
        tpr_rmse[feature]["debiased_rmse"] = RMSE(tpr_rmse[feature]["debiased_tpr"])
        tpr_rmse[feature]["debiased_mean_rmse"] = np.mean(tpr_rmse[feature]["debiased_rmse"])
        tpr_rmse[feature]["debiased_std_rmse"] = np.std(tpr_rmse[feature]["debiased_rmse"])

        # Calculate RMSE for FPR for each feature
        # Before Debiasing
        fpr_rmse[feature]["baseline_rmse"] = RMSE(fpr_rmse[feature]["baseline_fpr"])
        fpr_rmse[feature]["baseline_mean_rmse"] = np.mean(fpr_rmse[feature]["baseline_rmse"])
        fpr_rmse[feature]["baseline_std_rmse"] = np.std(fpr_rmse[feature]["baseline_rmse"])
        
        # After Debiasing
        fpr_rmse[feature]["debiased_rmse"] = RMSE(fpr_rmse[feature]["debiased_fpr"])
        fpr_rmse[feature]["debiased_mean_rmse"] = np.mean(fpr_rmse[feature]["debiased_rmse"])
        fpr_rmse[feature]["debiased_std_rmse"] = np.std(fpr_rmse[feature]["debiased_rmse"])


        # Plot bar plot for RMSE for TPR and FPR to show the difference before and after debiasing
        bars = axes_bar[i].bar(x = [0,1,2,3], tick_label=["Baseline (TPR)", "Debiased (TPR)", "Baseline (FPR)", "Debiased (FPR)"],
            height=[tpr_rmse[feature]["baseline_mean_rmse"], tpr_rmse[feature]["debiased_mean_rmse"], fpr_rmse[feature]["baseline_mean_rmse"], fpr_rmse[feature]["debiased_mean_rmse"]],
            align="center",yerr=[tpr_rmse[feature]["baseline_std_rmse"], tpr_rmse[feature]["debiased_std_rmse"], fpr_rmse[feature]["baseline_std_rmse"], fpr_rmse[feature]["debiased_std_rmse"]],
            capsize=5, color=['tab:blue', 'tab:blue','tab:red', 'tab:red'])
        
        axes_bar[i].set_title(f"{feature_names[i]}")
        axes_bar[i].set_xticklabels(labels=[" ", " ", " ", " "],rotation=45, ha="right")
        for bar, label in zip(bars, ["Baseline (TPR)", "Debiased (TPR)", "Baseline (FPR)", "Debiased (FPR)"]):
            if 'Debiased' in label:
                bar.set_hatch('//')
    # Custom legend
    baseline_patch = mpatches.Patch(color='tab:blue', label='Baseline (TPR)')
    debiased_patch = mpatches.Patch(facecolor='tab:blue', hatch='//', label='Debiased (TPR)')
    fpr_patch = mpatches.Patch(color='tab:red', label='Baseline (FPR)')
    tpr_patch = mpatches.Patch(facecolor='tab:red', hatch='//', label='Debiased (FPR)')

    # Add the legend to the plot
    axes_bar[2].legend(handles=[baseline_patch, debiased_patch, fpr_patch, tpr_patch], loc='upper right')
     # title = RMSE for True Positive Rate before and after debiasing the data
    axes_bar[0].set_ylabel("Aggregated RMSE evaluated at all thresholds")
    return (_, axes), (_bar, axes_bar)

In [None]:
ax, axes_bar = roc_curve_protected(X_train, y_train, X_test, y_test, group_train, group_test, PROTECTED_FEATURES)
axes_bar[0].tight_layout()
axes_bar[0].subplots_adjust(wspace=0.1, hspace=0)
ax[0].tight_layout()
axes_bar[0].savefig("figures/rmse.svg", format="svg")
ax[0].savefig("figures/roc_curve.svg", format="svg")