In [102]:
import pandas as pd
from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.datasets import BinaryLabelDataset
from scipy.stats import wasserstein_distance
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from plot_fairness_metrics import *

In [103]:
from sklearn.preprocessing import QuantileTransformer
import pandas as pd
import numpy as np

def remove_disparate_impact(df, features, protected_attr, repair_level=0.5):

    df = df.copy()
    for feature in features:
        repaired_col = f"{feature}_repaired"
        df[repaired_col] = np.nan
        
        for group in df[protected_attr].unique():
            group_mask = df[protected_attr] == group
            group_values = df.loc[group_mask, feature].values.reshape(-1, 1)
            
            qt = QuantileTransformer(output_distribution='normal')
            transformed = qt.fit_transform(group_values).flatten()
            
            original = df.loc[group_mask, feature]
            repaired = (1 - repair_level) * original + repair_level * transformed
            
            df.loc[group_mask, repaired_col] = repaired

    return df


In [104]:
df = pd.read_csv('complete_df_with_predictions.csv')
df_copied = df.copy()
predicted_cols = ['pass_bar_lr', 'pass_bar_lr_weighted',
       'pass_bar_lr_best', 'pass_bar_rf', 'pass_bar_xgb', 'pass_bar_xgb_r']
df.drop(columns=predicted_cols, inplace=True)

In [105]:
race_enc = LabelEncoder()
df['race1_encoded'] = race_enc.fit_transform(df['race1'])

In [106]:
race_mapping = dict(zip(race_enc.classes_, race_enc.transform(race_enc.classes_)))
print(race_mapping)

{'asian': 0, 'black': 1, 'hisp': 2, 'other': 3, 'white': 4}


In [107]:
nokeep = ['bar', 'race2', 'race1']
keep = list(set(df.columns) - set(nokeep))
df = df[keep]

In [108]:
features = ['male', 'decile1', 'decile3', 'lsat', 'ugpa', 'zfygpa', 'zgpa', 'parttime', 'fam_inc', 'tier', 'race1_black', 'race1_hisp', 'race1_other', 'race1_white', 'race1_encoded']

target = 'pass_bar'

X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

In [109]:
df_train = X_train.copy()
df_train[target] = y_train
df_test = X_test.copy()
df_test[target] = y_test

In [110]:
features_to_repair = ['race1_encoded', 'tier', 'fam_inc']
df_train_rw = remove_disparate_impact(df_train, features=features_to_repair, protected_attr='race1_encoded', repair_level=1.0)
X_train_rw = df_train_rw[features]
y_train_rw = df_train_rw[target]



In [111]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, fbeta_score

param_grid = {
    'C': [0.01, 0.1, 1, 10], #normalization
    'class_weight': [{0: w0, 1: 1} for w0 in [5, 10, 20, 50, 100]] # we try penalizing more only class 0 mistakes
}

lr = LogisticRegression(max_iter=1000, solver='liblinear') 


fbeta_scorer = make_scorer(fbeta_score, beta=1.41, pos_label=0)
                               
grid_search = GridSearchCV(lr, param_grid, scoring=fbeta_scorer, cv=5, n_jobs=-1)

grid_search.fit(X_train_rw, y_train_rw)

lr_best_model = grid_search.best_estimator_

print("Best parameters:", grid_search.best_params_)
print("Best Fbeta-score for class 0:", grid_search.best_score_)

Best parameters: {'C': 1, 'class_weight': {0: 5, 1: 1}}
Best Fbeta-score for class 0: 0.39400955637598073


In [112]:
from sklearn.metrics import confusion_matrix, classification_report


y_test_pred = lr_best_model.predict(X_test)

print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred))

[[ 118  109]
 [ 291 3897]]
              precision    recall  f1-score   support

           0       0.29      0.52      0.37       227
           1       0.97      0.93      0.95      4188

    accuracy                           0.91      4415
   macro avg       0.63      0.73      0.66      4415
weighted avg       0.94      0.91      0.92      4415



### on whole dataset

In [123]:
from sklearn.preprocessing import QuantileTransformer

def remove_disparate_impact_global(df, features, protected_attr, repair_level=0.5):
    df = df.copy()
    
    for feature in features:
        repaired_col = f"{feature}_repaired"
        df[repaired_col] = np.nan

        # Global reference distribution
        qt_global = QuantileTransformer(output_distribution='normal')
        global_transformed = qt_global.fit_transform(df[[feature]]).flatten()

        df[f"{feature}_quantile"] = global_transformed  # for interpolation

        for group in df[protected_attr].unique():
            group_mask = df[protected_attr] == group
            original = df.loc[group_mask, feature]
            transformed = df.loc[group_mask, f"{feature}_quantile"]
            repaired = (1 - repair_level) * original + repair_level * transformed
            df.loc[group_mask, repaired_col] = repaired

        df.drop(columns=[f"{feature}_quantile"], inplace=True)

    return df


In [124]:
features_to_repair = ['race1_encoded', 'tier', 'fam_inc']
df_rw = remove_disparate_impact_global(df, features=features_to_repair, protected_attr='race1_encoded', repair_level=1.0)

In [125]:
df_rw

Unnamed: 0.1,race1_black,ugpa,race1_encoded,bar1_yr,decile1,academic_career,race1_hisp,decile3,zfygpa,race1_other,...,Unnamed: 0,zgpa,parttime,race1_encoded_repaired,tier_repaired,fam_inc_repaired,pass_bar_lr_rw,race1_encoded_repaired_bin,tier_repaired_bin,fam_inc_repaired_bin
0,False,3.5,4,7.0,10.0,1.341087,False,10.0,1.33,False,...,0,1.88,0.0,5.199338,0.261387,5.199338,1,2,2,4
1,False,3.5,4,7.0,5.0,-0.385252,False,4.0,-0.11,False,...,1,-0.57,0.0,5.199338,-1.490335,0.522099,1,2,1,2
2,False,3.5,4,7.0,3.0,-0.355093,False,2.0,-0.64,False,...,2,-1.12,0.0,5.199338,-0.559859,-5.199338,1,2,2,0
3,False,3.5,4,7.0,7.0,0.217453,False,4.0,0.34,False,...,3,-0.49,0.0,5.199338,-0.559859,0.522099,1,2,2,2
4,False,3.5,4,7.0,9.0,1.144843,False,8.0,1.02,False,...,4,0.76,0.0,5.199338,0.931971,0.522099,1,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22067,True,1.8,1,2.0,3.0,-1.922560,False,1.0,-0.58,False,...,22402,-1.49,0.0,-1.475269,-5.199338,-1.490335,0,2,0,2
22068,True,1.8,1,7.0,3.0,-2.216261,False,1.0,-0.64,False,...,22403,-1.36,0.0,-1.475269,-5.199338,-0.529301,0,2,0,2
22069,True,1.8,1,7.0,7.0,-0.613914,False,8.0,0.41,False,...,22404,0.80,1.0,-1.475269,-0.559859,-0.529301,1,2,2,2
22070,False,1.5,4,7.0,10.0,0.134185,False,10.0,1.88,False,...,22405,1.42,1.0,5.199338,-0.559859,-0.529301,1,2,2,2


In [126]:
pass_bar_lr_rw = lr_best_model.predict(df_rw[features])
df_rw['pass_bar_lr_rw'] = pass_bar_lr_rw

In [127]:
df_rw

Unnamed: 0.1,race1_black,ugpa,race1_encoded,bar1_yr,decile1,academic_career,race1_hisp,decile3,zfygpa,race1_other,...,Unnamed: 0,zgpa,parttime,race1_encoded_repaired,tier_repaired,fam_inc_repaired,pass_bar_lr_rw,race1_encoded_repaired_bin,tier_repaired_bin,fam_inc_repaired_bin
0,False,3.5,4,7.0,10.0,1.341087,False,10.0,1.33,False,...,0,1.88,0.0,5.199338,0.261387,5.199338,1,2,2,4
1,False,3.5,4,7.0,5.0,-0.385252,False,4.0,-0.11,False,...,1,-0.57,0.0,5.199338,-1.490335,0.522099,1,2,1,2
2,False,3.5,4,7.0,3.0,-0.355093,False,2.0,-0.64,False,...,2,-1.12,0.0,5.199338,-0.559859,-5.199338,1,2,2,0
3,False,3.5,4,7.0,7.0,0.217453,False,4.0,0.34,False,...,3,-0.49,0.0,5.199338,-0.559859,0.522099,1,2,2,2
4,False,3.5,4,7.0,9.0,1.144843,False,8.0,1.02,False,...,4,0.76,0.0,5.199338,0.931971,0.522099,1,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22067,True,1.8,1,2.0,3.0,-1.922560,False,1.0,-0.58,False,...,22402,-1.49,0.0,-1.475269,-5.199338,-1.490335,0,2,0,2
22068,True,1.8,1,7.0,3.0,-2.216261,False,1.0,-0.64,False,...,22403,-1.36,0.0,-1.475269,-5.199338,-0.529301,0,2,0,2
22069,True,1.8,1,7.0,7.0,-0.613914,False,8.0,0.41,False,...,22404,0.80,1.0,-1.475269,-0.559859,-0.529301,1,2,2,2
22070,False,1.5,4,7.0,10.0,0.134185,False,10.0,1.88,False,...,22405,1.42,1.0,5.199338,-0.559859,-0.529301,1,2,2,2


In [128]:
print(df_rw['race1_encoded_repaired'].describe())
print(df_rw['race1_encoded_repaired'].nunique())
print(df_rw['race1_encoded_repaired'].value_counts())

count    22072.000000
mean         3.980420
std          2.840058
min         -5.199338
25%          5.199338
50%          5.199338
75%          5.199338
max          5.199338
Name: race1_encoded_repaired, dtype: float64
5
race1_encoded_repaired
 5.199338    18462
-1.475269     1308
-1.164444     1010
-5.199338      888
-1.018778      404
Name: count, dtype: int64


In [129]:
import pandas as pd
import numpy as np

def bin_repaired_features(df, feature_cols, bins=5):
    for feature in feature_cols:
        binned_col = f"{feature}_bin"
        df[binned_col] = pd.cut(
            df[feature],
            bins=5,
            labels=False,
            include_lowest=True
        )
    return df

def compute_disparate_impact_per_bin(df, feature_bin_col, label_col, reference_bin=None):
    di_data = []

    # Drop NaNs in bin column
    df_valid = df.dropna(subset=[feature_bin_col])

    if df_valid.empty:
        raise ValueError(f"No valid values in {feature_bin_col} to compute Disparate Impact.")

    # Determine reference bin from non-NaN data
    if reference_bin is None:
        reference_bin = df_valid[feature_bin_col].mode(dropna=True)
        if reference_bin.empty:
            raise ValueError(f"Could not determine mode for {feature_bin_col}")
        reference_bin = reference_bin.iloc[0]

    # Reference group probability
    ref_group = df_valid[df_valid[feature_bin_col] == reference_bin]
    p_ref = ref_group[label_col].mean()

    for group in sorted(df_valid[feature_bin_col].dropna().unique()):
        group_df = df_valid[df_valid[feature_bin_col] == group]
        p_group = group_df[label_col].mean()
        di = p_group / p_ref if p_ref > 0 else np.nan

        di_data.append({
            'Group': group,
            'Disparate Impact': di
        })

    return pd.DataFrame(di_data)


# Example usage:
repaired_features = ['race1_encoded_repaired', 'tier_repaired', 'fam_inc_repaired']
df = bin_repaired_features(df_rw, repaired_features, bins=5)
df



Unnamed: 0.1,race1_black,ugpa,race1_encoded,bar1_yr,decile1,academic_career,race1_hisp,decile3,zfygpa,race1_other,...,Unnamed: 0,zgpa,parttime,race1_encoded_repaired,tier_repaired,fam_inc_repaired,pass_bar_lr_rw,race1_encoded_repaired_bin,tier_repaired_bin,fam_inc_repaired_bin
0,False,3.5,4,7.0,10.0,1.341087,False,10.0,1.33,False,...,0,1.88,0.0,5.199338,0.261387,5.199338,1,4,2,4
1,False,3.5,4,7.0,5.0,-0.385252,False,4.0,-0.11,False,...,1,-0.57,0.0,5.199338,-1.490335,0.522099,1,4,1,2
2,False,3.5,4,7.0,3.0,-0.355093,False,2.0,-0.64,False,...,2,-1.12,0.0,5.199338,-0.559859,-5.199338,1,4,2,0
3,False,3.5,4,7.0,7.0,0.217453,False,4.0,0.34,False,...,3,-0.49,0.0,5.199338,-0.559859,0.522099,1,4,2,2
4,False,3.5,4,7.0,9.0,1.144843,False,8.0,1.02,False,...,4,0.76,0.0,5.199338,0.931971,0.522099,1,4,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22067,True,1.8,1,2.0,3.0,-1.922560,False,1.0,-0.58,False,...,22402,-1.49,0.0,-1.475269,-5.199338,-1.490335,0,1,0,1
22068,True,1.8,1,7.0,3.0,-2.216261,False,1.0,-0.64,False,...,22403,-1.36,0.0,-1.475269,-5.199338,-0.529301,0,1,0,2
22069,True,1.8,1,7.0,7.0,-0.613914,False,8.0,0.41,False,...,22404,0.80,1.0,-1.475269,-0.559859,-0.529301,1,1,2,2
22070,False,1.5,4,7.0,10.0,0.134185,False,10.0,1.88,False,...,22405,1.42,1.0,5.199338,-0.559859,-0.529301,1,4,2,2


In [130]:
# Compute and show DI for each repaired feature
for feature in repaired_features:
    bin_col = f"{feature}_bin"
    print(f"\nDisparate Impact for {feature}:")
    di_df = compute_disparate_impact_per_bin(df, bin_col, label_col='pass_bar_lr_rw')
    print(di_df)


Disparate Impact for race1_encoded_repaired:
   Group  Disparate Impact
0      0          0.907045
1      1          0.613251
2      2          0.824270
3      4          1.000000

Disparate Impact for tier_repaired:
   Group  Disparate Impact
0      0          0.774496
1      1          0.919759
2      2          1.000000
3      4          1.066794

Disparate Impact for fam_inc_repaired:
   Group  Disparate Impact
0      0          0.773033
1      1          0.899763
2      2          1.000000
3      4          1.018946


In [131]:
ref_bin = df_rw[df_rw['race1_encoded'] == 4]['race1_encoded_repaired_bin'].mode()[0]
ref_bin

4

## just a try

In [134]:
from sklearn.preprocessing import QuantileTransformer
import numpy as np
import pandas as pd

def remove_disparate_impact_groupwise(df, features, protected_attr, repair_level=0.5, output_distribution='normal'):
    df = df.copy()

    for feature in features:
        repaired_col = f"{feature}_repaired"
        df[repaired_col] = np.nan

        # Fit global quantile transformer
        qt_global = QuantileTransformer(output_distribution=output_distribution, random_state=42)
        qt_global.fit(df[[feature]])

        for group in df[protected_attr].unique():
            group_mask = df[protected_attr] == group
            group_values = df.loc[group_mask, feature].values.reshape(-1, 1)

            # Fit group-level transformer to get group shape
            qt_group = QuantileTransformer(output_distribution=output_distribution, random_state=42)
            qt_group.fit(group_values)

            # Transform group to its own shape, then remap using global transformer
            group_transformed = qt_group.transform(group_values)
            group_aligned = qt_global.inverse_transform(group_transformed).flatten()

            # Linear interpolation between original and aligned
            original = df.loc[group_mask, feature]
            repaired = (1 - repair_level) * original + repair_level * group_aligned

            df.loc[group_mask, repaired_col] = repaired

    return df


In [135]:
features_to_repair = ['race1_encoded', 'tier', 'fam_inc']
df_rw = remove_disparate_impact_groupwise(df, features=features_to_repair, protected_attr='race1_encoded', repair_level=1.0)



In [136]:
pass_bar_lr_rw = lr_best_model.predict(df_rw[features])
df_rw['pass_bar_lr_rw_2'] = pass_bar_lr_rw

In [137]:
import pandas as pd
import numpy as np

def bin_repaired_features(df, feature_cols, bins=5):
    for feature in feature_cols:
        binned_col = f"{feature}_bin"
        df[binned_col] = pd.cut(
            df[feature],
            bins=5,
            labels=False,
            include_lowest=True
        )
    return df

def compute_disparate_impact_per_bin(df, feature_bin_col, label_col, reference_bin=None):
    di_data = []

    # Drop NaNs in bin column
    df_valid = df.dropna(subset=[feature_bin_col])

    if df_valid.empty:
        raise ValueError(f"No valid values in {feature_bin_col} to compute Disparate Impact.")

    # Determine reference bin from non-NaN data
    if reference_bin is None:
        reference_bin = df_valid[feature_bin_col].mode(dropna=True)
        if reference_bin.empty:
            raise ValueError(f"Could not determine mode for {feature_bin_col}")
        reference_bin = reference_bin.iloc[0]

    # Reference group probability
    ref_group = df_valid[df_valid[feature_bin_col] == reference_bin]
    p_ref = ref_group[label_col].mean()

    for group in sorted(df_valid[feature_bin_col].dropna().unique()):
        group_df = df_valid[df_valid[feature_bin_col] == group]
        p_group = group_df[label_col].mean()
        di = p_group / p_ref if p_ref > 0 else np.nan

        di_data.append({
            'Group': group,
            'Disparate Impact': di
        })

    return pd.DataFrame(di_data)


# Example usage:
repaired_features = ['race1_encoded_repaired', 'tier_repaired', 'fam_inc_repaired']
df = bin_repaired_features(df_rw, repaired_features, bins=5)
df



Unnamed: 0,race1_black,ugpa,race1_encoded,bar1_yr,decile1,academic_career,race1_hisp,decile3,zfygpa,race1_other,...,zgpa,parttime,race1_encoded_repaired,tier_repaired,fam_inc_repaired,pass_bar_lr_rw,race1_encoded_repaired_bin,tier_repaired_bin,fam_inc_repaired_bin,pass_bar_lr_rw_2
0,False,3.5,4,7.0,10.0,1.341087,False,10.0,1.33,False,...,1.88,0.0,0.0,4.0,5.0,1,2,2,4,1
1,False,3.5,4,7.0,5.0,-0.385252,False,4.0,-0.11,False,...,-0.57,0.0,0.0,2.0,4.0,1,2,0,3,1
2,False,3.5,4,7.0,3.0,-0.355093,False,2.0,-0.64,False,...,-1.12,0.0,0.0,3.0,1.0,1,2,1,0,1
3,False,3.5,4,7.0,7.0,0.217453,False,4.0,0.34,False,...,-0.49,0.0,0.0,3.0,4.0,1,2,1,3,1
4,False,3.5,4,7.0,9.0,1.144843,False,8.0,1.02,False,...,0.76,0.0,0.0,5.0,4.0,1,2,3,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22067,True,1.8,1,2.0,3.0,-1.922560,False,1.0,-0.58,False,...,-1.49,0.0,0.0,1.0,3.0,0,2,0,2,0
22068,True,1.8,1,7.0,3.0,-2.216261,False,1.0,-0.64,False,...,-1.36,0.0,0.0,1.0,4.0,0,2,0,3,0
22069,True,1.8,1,7.0,7.0,-0.613914,False,8.0,0.41,False,...,0.80,1.0,0.0,3.0,4.0,1,2,1,3,1
22070,False,1.5,4,7.0,10.0,0.134185,False,10.0,1.88,False,...,1.42,1.0,0.0,3.0,3.0,1,2,1,2,1


In [138]:
for feature in repaired_features:
    bin_col = f"{feature}_bin"
    print(f"\nDisparate Impact for {feature}:")
    di_df = compute_disparate_impact_per_bin(df, bin_col, label_col='pass_bar_lr_rw')
    print(di_df)


Disparate Impact for race1_encoded_repaired:
   Group  Disparate Impact
0      2               1.0

Disparate Impact for tier_repaired:
   Group  Disparate Impact
0      0          0.924893
1      1          1.000000
2      2          1.044009
3      3          1.071201
4      4          1.100303

Disparate Impact for fam_inc_repaired:
   Group  Disparate Impact
0      0          0.784925
1      1          1.028342
2      2          1.005048
3      3          1.000000
4      4          1.034621
