# Experiments with Home Credit

In [1]:
import time
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from lightgbm import LGBMClassifier
from sklearn.inspection import PartialDependenceDisplay
from lime import lime_tabular
import shap
from cfmining.algorithms import MAPOCAM, BruteForce, Greedy
from cfmining.criteria import PercentileCalculator, PercentileCriterion, PercentileChangesCriterion, NonDomCriterion
from cfmining.predictors import MonotoneClassifier
from cfmining.visualization import buildTable, PlotCounterfactuals
from cfmining.mip_builder import RecourseBuilder
from cfmining.action_set import ActionSet
import joblib


from aif360.algorithms.preprocessing import Reweighing
from aif360.datasets import BinaryLabelDataset
from fairgbm import FairGBMClassifier
from sklego.linear_model import EqualOpportunityClassifier, DemographicParityClassifier
from fairlearn.postprocessing import ThresholdOptimizer


import credit_pipeline.data_exploration as dex
from credit_pipeline.training import *


%load_ext autoreload
%autoreload 2

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'


## Loading and Cleaning

In [2]:
seed_number = 0

In [3]:
path = "../data/HomeCredit/"
df = dex.read_csv_encoded(path, 'application_train.csv')

In [4]:
columns_to_drop = dex.check_missing(df, 50,  False)
columns_to_drop

['COMMONAREA_MEDI',
 'COMMONAREA_AVG',
 'COMMONAREA_MODE',
 'NONLIVINGAPARTMENTS_MODE',
 'NONLIVINGAPARTMENTS_AVG',
 'NONLIVINGAPARTMENTS_MEDI',
 'FONDKAPREMONT_MODE',
 'LIVINGAPARTMENTS_MODE',
 'LIVINGAPARTMENTS_AVG',
 'LIVINGAPARTMENTS_MEDI',
 'FLOORSMIN_AVG',
 'FLOORSMIN_MODE',
 'FLOORSMIN_MEDI',
 'YEARS_BUILD_MEDI',
 'YEARS_BUILD_MODE',
 'YEARS_BUILD_AVG',
 'OWN_CAR_AGE',
 'LANDAREA_MEDI',
 'LANDAREA_MODE',
 'LANDAREA_AVG',
 'BASEMENTAREA_MEDI',
 'BASEMENTAREA_AVG',
 'BASEMENTAREA_MODE',
 'EXT_SOURCE_1',
 'NONLIVINGAREA_MODE',
 'NONLIVINGAREA_AVG',
 'NONLIVINGAREA_MEDI',
 'ELEVATORS_MEDI',
 'ELEVATORS_AVG',
 'ELEVATORS_MODE',
 'WALLSMATERIAL_MODE',
 'APARTMENTS_MEDI',
 'APARTMENTS_AVG',
 'APARTMENTS_MODE',
 'ENTRANCES_MEDI',
 'ENTRANCES_AVG',
 'ENTRANCES_MODE',
 'LIVINGAREA_AVG',
 'LIVINGAREA_MODE',
 'LIVINGAREA_MEDI',
 'HOUSETYPE_MODE']

In [5]:
df = df.drop(columns_to_drop, axis=1)

In [6]:
df_cols = df.columns.to_list()
obj_cols = dex.list_by_type(df, ['O'])

In [7]:
def days_to_years(dataframe, col_name):
        """
        Converts values from string to numeric.
        Uses the map function to convert the information on days employed to years employed
        """
        df_name = dataframe.copy()

        if col_name in df.columns:
            #Converts values from string to numeric.
            df_name[col_name] = pd.to_numeric(df_name[col_name], errors='coerce')

            #drops null values on the column
            df_name = df_name.dropna(subset=[col_name])

            #Use the map function to convert the information on days employed to years employed
            year = df_name.loc[:, col_name].map(lambda x: int(abs(x / 365)), na_action=None)
            df_name['YEARS'+col_name[4:]] = year

            #drops the column
            df_name = df_name.drop(col_name, axis=1)

        return df_name


df = days_to_years(df, "DAYS_EMPLOYED")
df = days_to_years(df, 'DAYS_BIRTH')

In [8]:
X_acp = df.iloc[:, (df.columns != "TARGET") & (df.columns != "SK_ID_CURR")]
y_acp = df["TARGET"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X_acp, 
    y_acp, 
    test_size = 0.2, 
    random_state=seed_number, 
    stratify=y_acp
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, 
    y_train, 
    test_size = 0.2, 
    random_state=seed_number, 
    stratify=y_train
)
A_train = (X_train.CODE_GENDER == "F").astype(int)
A_val = (X_val.CODE_GENDER == "F").astype(int)
A_test = (X_test.CODE_GENDER == "F").astype(int)

## Training Basic Models

In [10]:
n_trials = 5

In [11]:
classifiers = {
    "LogisticRegression": LogisticRegression,
    "RandomForest": RandomForestClassifier,
    "LightGBM": LGBMClassifier,
    "MLP" : MLPClassifier,
}

In [12]:
param_spaces = {
    "LogisticRegression": {
        'C': {'low': 0.001, 'high': 10, 'log': True, 'type':'float'},
        'max_iter': {'low': 1000, 'high': 1000, 'step':1, 'type':'int'},
        'penalty': {'choices': ['l2'], 'type':'categorical'},
        "class_weight" : {"choices" : [None, "balanced"], 'type':'categorical'},
    },
    "RandomForestClassifier": {
        'n_estimators': {'low':10, 'high':150, 'step':20, 'type':'int'},
        'max_depth': {'low':2, 'high':10, 'type':'int'},
        'criterion': {'choices':['gini', 'entropy'], 'type':'categorical'},
        'min_samples_leaf' : {"low" : 1, "high" : 51, "step" : 5, 'type':'int'},
        "max_features" : {"low" : 0.1, "high" : 1.0, "type" : "float"},
        "class_weight" : {"choices" : [None, "balanced"], 'type':'categorical'},
    },
    "LGBMClassifier": {
        'learning_rate': {'low': 0.01, 'high': 1.0, 'type': 'float', 'log': True},
        "num_leaves" : {"low" : 5, "high" : 100, "step" : 5, 'type':'int'},
        'max_depth': {'low': 2, 'high': 10, 'type': 'int'},
        'min_child_samples': {'low': 1, 'high': 51, 'step': 5, 'type': 'int'},
        'colsample_bytree': {'low': 0.1, 'high': 1.0, 'type': 'float'},
        'reg_alpha': {'low': 0.0, 'high': 1.0, 'type': 'float'},
        'reg_lambda': {'low': 0.0, 'high': 1.0, 'type': 'float'},
        'n_estimators': {'low': 5, 'high': 100, 'step': 5, 'type': 'int'},
        "class_weight" : {"choices" : [None, "balanced"], 'type':'categorical'},
        "verbose" : {"choices" : [-1], 'type':'categorical'},
    },
    "MLPClassifier": {
        "hidden_layer_sizes" : {"choices" : [
            [128, 64, 32],
            [128, 64, 32, 16],
            [256, 128, 64, 32, 16],
        ], 'type':'categorical'},
        "alpha" : {'low': 0.0001, 'high': 0.01, 'type': 'float', 'log': True},
        "learning_rate" : {'choices': ['constant', 'invscaling', 'adaptive'], 'type':'categorical'},
        "learning_rate_init" : {'low': 0.001, 'high': 0.1, 'type': 'float', 'log': True},
        "early_stopping" : {'choices': [True], 'type':'categorical'},
        "max_iter" : {"choices" : [50], 'type':'categorical'},
    }
}

In [None]:
study_logistic, model_logistic = optimize_model(LogisticRegression, param_spaces["LogisticRegression"], X_train, y_train, X_val , y_val, n_trials=n_trials)
joblib.dump(model_logistic, "models/home_credit_logistic.joblib")

In [14]:
print("Score for Logistic Regression: ", study_logistic.best_value)
print(study_logistic.best_params)

Score for Logistic Regression:  0.7416156239820686
{'C': 9.997157569866765, 'max_iter': 1000, 'penalty': 'l2', 'class_weight': None}


In [16]:
study_rf, model_rf = optimize_model(RandomForestClassifier, param_spaces["RandomForestClassifier"], X_train, y_train, X_val , y_val, n_trials=n_trials)
joblib.dump(model_rf, "models/home_credit_rf.joblib")

  0%|          | 0/5 [00:00<?, ?it/s]

['models/home_credit_rf.joblib']

In [17]:
print("Score for Random Forest: ", study_rf.best_value)
print(study_rf.best_params)

Score for Random Forest:  0.735405719207568
{'n_estimators': 90, 'max_depth': 8, 'criterion': 'gini', 'min_samples_leaf': 21, 'max_features': 0.6813047017599905, 'class_weight': 'balanced'}


In [None]:
study_lgbm, model_lgbm = optimize_model(LGBMClassifier, param_spaces["LGBMClassifier"], X_train, y_train, X_val , y_val, n_trials=n_trials)
joblib.dump(model_lgbm, "models/home_credit_lgbm.joblib")

In [16]:
print("Score for LGBM: ", study_lgbm.best_value)
print(study_lgbm.best_params)

Score for LGBM:  0.7532923700482194
{'learning_rate': 0.198087735781815, 'num_leaves': 95, 'max_depth': 4, 'min_child_samples': 51, 'colsample_bytree': 0.5294089985999059, 'reg_alpha': 0.7976287636865885, 'reg_lambda': 0.3859446593891416, 'n_estimators': 100, 'class_weight': None, 'verbose': -1}


In [None]:
study_mlp, model_mlp = optimize_model(MLPClassifier, param_spaces["MLPClassifier"], X_train, y_train, X_val , y_val, n_trials=n_trials)
joblib.dump(model_mlp, "models/home_credit_mlp.joblib")

In [19]:
print("Score for MLP: ", study_mlp.best_value)
print(study_mlp.best_params)

Score for MLP:  0.7433528703800805
{'hidden_layer_sizes': [128, 64, 32], 'alpha': 0.0003527051808306031, 'learning_rate': 'invscaling', 'learning_rate_init': 0.010035211915818264, 'early_stopping': True, 'max_iter': 50}


## Model evaluation

In [13]:
models = {
    "LogisticRegression" : joblib.load("models/home_credit_logistic.joblib"),
    "RandomForest" : joblib.load("models/home_credit_rf.joblib"),
    "LightGBM" : joblib.load("models/home_credit_lgbm.joblib"),
    "MLP" : joblib.load("models/home_credit_mlp.joblib"),
}
ks_threshold_dict = {}
models_dict = {}
for n, m in models.items():
    ks_threshold_dict[n] = ks_threshold(y_val, m.predict_proba(X_val)[:,1])
    models_dict[n] = [
        m,
        ks_threshold_dict[n]
    ]

In [14]:
get_metrics(models_dict, X_train, y_train)

Unnamed: 0,AUC,Balanced Accuracy,Accuracy,Precision,Recall,F1,Brier Score
LogisticRegression,0.744142,0.680523,0.688765,0.159815,0.670695,0.258124,0.068788
RandomForest,0.7657,0.694185,0.681372,0.162505,0.709466,0.26444,0.192682
LightGBM,0.775591,0.703864,0.690985,0.168585,0.719222,0.273144,0.066782
MLP,0.751901,0.685362,0.658552,0.153795,0.717334,0.253286,0.068363


In [15]:
get_metrics(models_dict, X_val, y_val)

Unnamed: 0,AUC,Balanced Accuracy,Accuracy,Precision,Recall,F1,Brier Score
LogisticRegression,0.741616,0.680688,0.688224,0.159722,0.671702,0.258077,0.06885
RandomForest,0.735406,0.676955,0.676294,0.155257,0.677744,0.252639,0.195668
LightGBM,0.753292,0.688021,0.68735,0.162057,0.688822,0.262383,0.068048
MLP,0.743353,0.682228,0.658754,0.152817,0.710222,0.251516,0.068965


In [16]:
get_metrics(models_dict, X_test, y_test)

Unnamed: 0,AUC,Balanced Accuracy,Accuracy,Precision,Recall,F1,Brier Score
LogisticRegression,0.749414,0.685851,0.692649,0.162812,0.677744,0.262552,0.068457
RandomForest,0.738672,0.676962,0.677658,0.155604,0.676133,0.252986,0.194359
LightGBM,0.755847,0.690465,0.689479,0.163516,0.691641,0.2645,0.06796
MLP,0.74307,0.679606,0.659887,0.152213,0.703122,0.250251,0.068922


In [14]:
models_dict_fairness = {}
for n, m in models_dict.items():
    models_dict_fairness[n] = (m[0].predict_proba(X_test)[:,1] > m[1]).astype(int)

In [15]:
get_fairness_metrics(models_dict_fairness, y_test, A_test, 1)

Unnamed: 0,DPD,EOD,AOD,APVD,GMA,balanced_accuracy
LogisticRegression,-0.171045,-0.160572,-0.160976,-0.007381,0.668542,0.685851
RandomForest,-0.147647,-0.133588,-0.136193,-0.011795,0.657269,0.676962
LightGBM,-0.166967,-0.14818,-0.152996,-0.007354,0.665907,0.690465
MLP,-0.204595,-0.174678,-0.185961,-0.004472,0.629205,0.679606


## Fairness

In [16]:
pipeline_preprocess = create_pipeline(
    X_train,
    y_train,
    lambda x : x,
)[:-1]
pipeline_preprocess.fit(X_train, y_train)
preprocess_column_names = pipeline_preprocess.get_feature_names_out()
X_train_preprocess = pipeline_preprocess.transform(X_train)
X_val_preprocess = pipeline_preprocess.transform(X_val)
X_test_preprocess = pipeline_preprocess.transform(X_test)
X_train_preprocess = X_train_preprocess
X_val_preprocess = X_val_preprocess
X_test_preprocess = X_test_preprocess

### Pre-processing (Reweighing / AIF360)

In [17]:
df_rw = pd.DataFrame(
    X_train_preprocess,
    columns = preprocess_column_names
)
df_rw["TARGET"] = y_train.values
x_train_aif = BinaryLabelDataset(
    df = df_rw,
    label_names = ["TARGET"],
    protected_attribute_names = ["cat__CODE_GENDER_F"]
)
rw = Reweighing(
    unprivileged_groups = [{"cat__CODE_GENDER_F": 1}],
    privileged_groups = [{"cat__CODE_GENDER_F": 0}],
)
rw.fit(x_train_aif)
rw_weights = rw.transform(x_train_aif).instance_weights

In [None]:
for n in ["LogisticRegression", "RandomForest", "LightGBM"]:
    best_params = models[n]["classifier"].get_params()
    rw_model = classifiers[n](**best_params)
    rw_model.fit(X_train_preprocess, y_train, sample_weight=rw_weights)
    rw_threshold = ks_threshold(y_val, rw_model.predict_proba(X_val_preprocess)[:,1])
    models_dict_fairness[n + "Reweighing"] = rw_model.predict_proba(X_test_preprocess)[:,1] > rw_threshold

### In-processing

#### EqualOpportunityClassifier / Demographic Parity Classifier

In [17]:
gender_col_idx = np.where(preprocess_column_names == "cat__CODE_GENDER_F")[0][0]
eop_class = EqualOpportunityClassifier(
    covariance_threshold = 0.1,
    sensitive_cols = gender_col_idx,
    positive_target = 1
)
eop_class.fit(X_train_preprocess, y_train)
eoq_ks_threshold = ks_threshold(y_val, eop_class.predict_proba(X_val_preprocess)[:,1])
models_dict_fairness["EqualOpportunity"] = eop_class.predict_proba(X_test_preprocess)[:,1] > eoq_ks_threshold
dop_class = DemographicParityClassifier(
    covariance_threshold = 0.1,
    sensitive_cols = gender_col_idx,
)
dop_class.fit(X_train_preprocess, y_train)
dop_ks_threshold = ks_threshold(y_val, dop_class.predict_proba(X_val_preprocess)[:,1])
models_dict_fairness["DemographicParity"] = dop_class.predict_proba(X_test_preprocess)[:,1] > dop_ks_threshold

: 

#### FairGBM

In [19]:
fairgbm_params = models["LightGBM"]["classifier"].get_params()
del fairgbm_params["objective"]
fairgbm = FairGBMClassifier(
    constraint_type="FNR",
    **fairgbm_params
)
fairgbm.fit(X_train_preprocess, y_train, constraint_group=A_train)
fairgbm_threshold = ks_threshold(y_val, fairgbm.predict_proba(X_val_preprocess)[:,1])
models_dict_fairness["FairGBM"] =fairgbm.predict_proba(X_test_preprocess)[:,1] > fairgbm_threshold

### Post-processing (Threshold-Optimizer / FairLearn)

In [20]:
model_dict_thr_opt = {}
for name, model in models_dict.items():
    thr_opt = ThresholdOptimizer(
        estimator=model[0],
        constraints="equalized_odds",
        objective="balanced_accuracy_score",
        prefit=True,
        predict_method="predict_proba",
    )
    thr_opt.fit(X_train, y_train, sensitive_features=A_train)
    models_dict_fairness[name + "Thr.Opt."] = thr_opt.predict(X_test, sensitive_features=A_test)

### Fairness Evaluation

In [22]:
df_metrics_results = get_fairness_metrics(
    models_dict_fairness,
    y_test,
    A_test,
    1
)
df_metrics_results_ = df_metrics_results.copy().reset_index()
df_metrics_results_.to_csv("results/home_credit_fairness_metrics.csv", index=False)

In [6]:
df_metrics_results_ = pd.read_csv("results/home_credit_fairness_metrics.csv")

In [8]:
df_metrics_results_[["index", "AOD", "balanced_accuracy"]].round(3)

Unnamed: 0,index,AOD,balanced_accuracy
0,LogisticRegression,-0.161,0.686
1,RandomForest,-0.136,0.677
2,LightGBM,-0.153,0.69
3,MLP,-0.186,0.68
4,LogisticRegressionReweighing,0.01,0.684
5,RandomForestReweighing,-0.028,0.672
6,LightGBMReweighing,-0.01,0.689
7,FairGBM,-0.133,0.691
8,LogisticRegressionThr.Opt.,0.012,0.683
9,RandomForestThr.Opt.,0.004,0.672


In [9]:
df_metrics_results_.describe().round(2)

Unnamed: 0,DPD,EOD,AOD,APVD,GMA,balanced_accuracy
count,12.0,12.0,12.0,12.0,12.0,12.0
mean,-0.07,-0.06,-0.06,-0.02,0.68,0.68
std,0.08,0.08,0.08,0.01,0.03,0.01
min,-0.2,-0.17,-0.19,-0.03,0.63,0.67
25%,-0.15,-0.14,-0.14,-0.03,0.66,0.68
50%,-0.03,-0.02,-0.02,-0.03,0.67,0.68
75%,-0.0,0.0,0.0,-0.01,0.69,0.69
max,0.0,0.01,0.01,-0.0,0.73,0.69


In [12]:
# Mapping each model to a unique color
color_mapping = {model: idx for idx, model in enumerate(df_metrics_results_["index"].unique())}

# Adjust fairness metrics to be "the higher the better" and scale them to be between 0 and 100
#for col in ['DPD', 'EOD', 'AOD', 'APVD']:
#    df_metrics_results_[col] = (1 - np.abs(df_metrics_results_[col])) * 100

# Scale the performance metrics to be between 0 and 100
#df_metrics_results_['GMA'] *= 100
#df_latest_adjusted['balanced_accuracy'] *= 100

# Generate the required number of colors from the Viridis colorscale
num_colors = len(color_mapping)
#colors = px.colors.qualitative.Dark2
colors = px.colors.sequential.Viridis
manual_colors = [colors[i * (len(colors) - 1) // (num_colors - 1)] for i in range(num_colors)]                                                              
# Create the Advanced Parallel Coordinates Plot with adjusted data
fig = go.Figure(data=
    go.Parcoords(
        line=dict(color=df_metrics_results_['index'].map(color_mapping), showscale=False),
        dimensions=[
            dict(label='DPD', values=df_metrics_results_['DPD'], range=[-0.2, 0.02]),
            dict(label='EOD', values=df_metrics_results_['EOD'], range=[-0.2, 0.02]),
            dict(label='AOD', values=df_metrics_results_['AOD'], range=[-0.2, 0.02]),
            dict(label='APVD', values=df_metrics_results_['APVD'], range=[-0.2, 0.02]),
            dict(label='-1 * GMA', values= -df_metrics_results_['GMA']),#, range=[0, 100]),
            dict(label='-1 * Balanced Acc.', values= - df_metrics_results_['balanced_accuracy'])#, range=[65, 100])
        ]
    )
)

# Add models to the legend using the dummy scatter plot approach and specified manual colors
for idx, (model, color) in enumerate(color_mapping.items()):
    fig.add_trace(
        go.Scatter(x=[None], y=[None],
                   mode='markers',
                   marker=dict(size=10, color=manual_colors[idx]),
                   name=model,
                   showlegend=True)
    )

# remove grid from plot
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font_color='black'
)

# remove xticks from plot
fig.update_xaxes(showticklabels=False)

# Update the layout and adjust the top margin
fig.update_layout(title='Fairness Metrics Home Credit', margin=dict(t=100))

# Show the plot
fig.show()

# save fig
fig.write_image("results/home_credit_fairness_metrics.pdf")