In [1]:
import sklearn
# import shap

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Import base classifiers
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from baselines import AdaFairClassifier
from imbens.ensemble import SMOTEBoostClassifier, SMOTEBaggingClassifier, RUSBoostClassifier, UnderBaggingClassifier, SelfPacedEnsembleClassifier
from fairlearn.postprocessing import ThresholdOptimizer
from fairens import FairAugEnsemble, FairEnsemble

# Set GPU for matrix computations
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.cuda.set_device(0)
    device_id = torch.cuda.current_device()
    print (f"Now using GPU #{device_id}:\n{torch.cuda.get_device_name(device_id)}")

# Import utilities
from data import FairDataset    # This is a custom class that we will use to load the datasets
from eval import evaluate_multi_split, verbose_print
from trainer import Trainer
from utils import seed_generator, dict_info, describe_data

pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[FairAdapt]'


Now using GPU #0:
Tesla V100-SXM2-32GB


# Load dataset


In [2]:
SEED = 42
n_splits = 5
i_split = 0

dataset_kwargs = {
    'y_col': 'label',
    'train_size': 0.6,
    'val_size': 0.2,
    'test_size': 0.2,
    'concat_train_val': True,
    'normalize': True,
    'random_state': 42,
}

# dataname = 'adult'
# s_attr = 'gender'
dataname = 'compas'
s_attr = 'sex'
data = FairDataset(
    dataname=dataname,
    csv_path=f'./data/{dataname}.csv',
    s_col=s_attr,
    x_with_s=True,
    **dataset_kwargs
)

data.describe()

(
    (X_train, y_train, s_train),
    (X_val, y_val, s_val),
    (X_test, y_test, s_test),
    (idx_train, idx_val, idx_test)
) = data.get_subgroup_split(i_split=i_split, random_state=SEED, n_splits=n_splits)

classes = np.unique(y_train)
n_feat = X_train.shape[1]
n_class = len(classes)

Dataset    : compas (5875, 12) load from ./data/compas.csv
Sens/Res   : sex/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train data [#samples 4700 #features 11]:
+-----+-------+-------+------------+
|     |   y=0 |   y=1 |   pos_rate |
| s=0 |   593 |   336 |     0.3617 |
+-----+-------+-------+------------+
| s=1 |  1917 |  1854 |     0.4916 |
+-----+-------+-------+------------+
test data [#samples 1175 #features 11]:
+-----+-------+-------+------------+
|     |   y=0 |   y=1 |   pos_rate |
| s=0 |   148 |    84 |     0.3621 |
+-----+-------+-------+------------+
| s=1 |   480 |   463 |     0.491  |
+-----+-------+-------+------------+



In [3]:
from baselines.lfr import LFRClassifier
from eval import evaluate

base_clf = LogisticRegression()

SEED = 42

clf = LFRClassifier(base_clf, random_state=SEED)
clf.fit(X_train, y_train, s_train)
evaluate(clf, X_test, y_test, s_test)

{'acc': 6.085106382978723e-01,
 'bacc': 0.5937423602137394,
 'ap': 0.5300762482890076,
 'roc': 0.5937423602137394,
 'f1': 0.5802267785026406,
 'dp': 0.013845211540571167,
 'eo': 0.090311986863711,
 'ge': 0.27172077312816667,
 'si': 0.06468085106382979,
 'acc_grp': {0: 0.591, 1: 0.613},
 'pos_rate_grp': {0: 0.263, 1: 0.277},
 'g_adv': 1,
 'acc_cls': {0: 0.813, 1: 0.374}}

In [4]:
pd.DataFrame(X_train, columns=data.feature_names)

Unnamed: 0,sex,MarriageStatus,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_days_from_compas,c_charge_degree
0,1.0,0.166667,0.153846,1.0,0.0,0.0,0.0,0.052632,0.640381,0.500053,1.0
1,1.0,0.000000,0.123077,0.0,0.0,0.0,0.0,0.000000,0.640721,0.500053,0.0
2,1.0,0.000000,0.461538,1.0,0.1,0.0,0.0,0.605263,0.640381,0.500053,1.0
3,1.0,0.000000,0.061538,0.0,0.0,0.0,0.0,0.026316,0.640381,0.500053,1.0
4,0.0,0.000000,0.215385,0.0,0.0,0.0,0.0,0.078947,0.640381,0.500053,0.0
...,...,...,...,...,...,...,...,...,...,...,...
3520,0.0,0.000000,0.061538,0.0,0.0,0.0,0.0,0.000000,0.640721,0.500053,1.0
3521,1.0,0.000000,0.184615,0.0,0.0,0.0,0.0,0.052632,0.640381,0.500053,1.0
3522,1.0,0.000000,0.092308,0.0,0.0,0.0,0.0,0.000000,0.640721,0.500053,1.0
3523,1.0,0.000000,0.415385,0.0,0.0,0.0,0.0,0.342105,0.640381,0.500053,1.0


In [5]:
# from aif360.sklearn.preprocessing import LearnedFairRepresentations
# from eval import evaluate

# lfr = LearnedFairRepresentations(
#     prot_attr=X_train[:, 0],
#     # prot_attr=s_train,
#     random_state=SEED
# )

# print (lfr.prot_attr)

# df_X_train = pd.DataFrame(X_train)

# lfr.fit(df_X_train, y_train, priv_group=1)

# X_train_edit = lfr.transform(pd.DataFrame(X_train))
# X_test_edit = lfr.transform(pd.DataFrame(X_test))

# clf = LogisticRegression(random_state=SEED)
# clf.fit(X_train_edit, y_train)

# evaluate(clf, X_test, y_test, s_test)

In [6]:
"""Load Datasets"""

dataset_kwargs = {
    'y_col': 'label',
    'train_size': 0.6,
    'val_size': 0.2,
    'test_size': 0.2,
    'concat_train_val': True,
    'normalize': True,
    'random_state': 42,
}

all_datasets = {
    'compas': ['sex', 'race'],
    'adult': ['gender', 'race'],
    'bank': ['age', 'marital=married'],
    'lsa': ['gender', 'race'],
    'lsa_unfair_gender_race': ['gender', 'race'],
    'meps': ['SEX', 'RACE'],
    'german': ['sex', 'foreign_worker', 'marital_status=single'],
}

"""
Create a dictionary of datasets: dataset_zoo
key: dataset name
value: FairDataset object
"""
dataset_zoo = {}
for dataname, s_attrs in all_datasets.items():
    for s_attr in s_attrs:
        dataset = FairDataset(
            dataname=dataname,
            csv_path=f'./data/{dataname}.csv',
            s_col=s_attr,
            **dataset_kwargs
        )
        dataset_zoo[dataset.fullname] = dataset

        # dataset.describe()
        dataset.brief()

# Print the information of the datasets and models
print(
    f"////// Dataset ZOO //////\n"
    f"{dict_info(dataset_zoo)}\n"
)

dataset_zoo_subset = {
    'compas_sex': dataset_zoo['compas_sex'],
    'compas_race': dataset_zoo['compas_race'],
}

Dataset    : compas (5875, 12) load from ./data/compas.csv
Sens/Res   : sex/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 929, 1: 3771} | grp_pos_ratio: {0: 0.3617, 1: 0.4916}
test       | size {0: 232, 1: 943} | grp_pos_ratio: {0: 0.3621, 1: 0.491}

Dataset    : compas (5875, 12) load from ./data/compas.csv
Sens/Res   : race/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 1878, 1: 2822} | grp_pos_ratio: {0: 0.3946, 1: 0.5135}
test       | size {0: 469, 1: 706} | grp_pos_ratio: {0: 0.3945, 1: 0.5127}

Dataset    : adult (45222, 99) load from ./data/adult.csv
Sens/Res   : gender/label
Split      : train/test = 0.8/0.2, random_state = 42, x_with_s = True
train      | size {0: 11756, 1: 24421} | grp_pos_ratio: {0: 0.1136, 1: 0.3125}
test       | size {0: 2939, 1: 6106} | grp_pos_ratio: {0: 0.1136, 1: 0.3125}

Dataset    : adult (45222, 99) load from ./data/adult.csv
Sens/Res   : race/labe

In [7]:
from baselines import ReweightClassifier, AdaFairClassifier, ReductionClassifier, ThresholdClassifier, MimicClassifier
import tqdm
from eval import evaluate


def run_baseline_exp(dataset_zoo, base_model_zoo, model_zoo, n_splits=5, n_runs=1, random_state=42, verbose=False):
    """
    Run baseline experiment with different base models and different datasets.
    """
    print(
        f"////// Baseline Experiment //////\n"
        f"Base Model Zoo: {list(base_model_zoo.keys())}\n"
        f"Model Zoo: {list(model_zoo.keys())}\n"
        f"Dataset Zoo: {list(dataset_zoo.keys())}\n"
        f"n_splits: {n_splits}\n"
        f"n_runs: {n_runs}\n"
        f"random_state: {random_state}\n"
    )

    all_res = []

    for data_name, data in dataset_zoo.items():

        for base_model_name, base_model in base_model_zoo.items():
            
            for model_name, (model, model_kwargs) in model_zoo.items():

                for i_run in range(n_runs):

                    print (f"Data: {data_name} | Run: {i_run} | Base: {base_model_name} | Model: {model_name}")
                    rand_seed = random_state + i_run

                    base_model = base_model.set_params(random_state=rand_seed)
                    
                    for i_split in tqdm.tqdm(range(n_splits), disable=verbose):
                        
                        # get the i-th split of a n-fold cross validation
                        (
                            (X_train, y_train, s_train),
                            (X_val, y_val, s_val),
                            (X_test, y_test, s_test),
                            (idx_train, idx_val, idx_test)
                        ) = data.get_subgroup_split(
                            i_split=i_split, 
                            random_state=rand_seed,
                            n_splits=n_splits
                        )

                        clf = model(
                            estimator=base_model,
                            random_state=rand_seed,
                            **model_kwargs,
                        )
                        try:
                            clf.fit(X_train, y_train)
                        except Exception as e:
                            # print (f"Error: {e}")
                            clf.fit(X_train, y_train, sensitive_features=s_train)
                        
                        res = evaluate(clf, X_test, y_test, s_test)

                        all_res.append({
                            **res,
                            'method': model_name,
                            'n_edit': 0,
                            'dataset': data_name,
                            'base_model': base_model_name,
                            'i_run': i_run,
                        })
                        if verbose:
                            res_vis = res.copy()
                            for k, v in res_vis.items():
                                if isinstance(v, float):
                                    res_vis[k] = np.round(v*100, 2)
                            print (f"Split: {i_split} | {res_vis}")

    df_res = pd.DataFrame(all_res)
    df_res['n_edit'] = 0
    
    return df_res

ensemble_kwargs = {
    'n_estimators': 10,
    # 'random_state': 42,
}

base_model_zoo = {
    'LR': LogisticRegression(),
    # 'KN': KNeighborsClassifier(n_neighbors=5),
    # 'DT': DecisionTreeClassifier(max_depth=10),
    # 'MLP': MLPClassifier(hidden_layer_sizes=(8), max_iter=50),
    # 'ADA': AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=None), n_estimators=5),
    # 'BAG': BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=None), n_estimators=5),
}

model_zoo = {
    'ERM': (MimicClassifier, {}),
    'ThrDP': (ThresholdClassifier, {'constraints': 'demographic_parity'}),
    'ThrEO': (ThresholdClassifier, {'constraints': 'equalized_odds'}),
    'RedDP': (ReductionClassifier, {'constraints': 'DemographicParity'}),
    'RedEO': (ReductionClassifier, {'constraints': 'EqualizedOdds'}),
    'RW': (ReweightClassifier, {}),
    'LFR': (LFRClassifier, {}),
    'AdaF1': (AdaFairClassifier, {'saIndex': 0, 'saValue': 0, 'CSB': 'CSB1', **ensemble_kwargs}),
    # 'AdaF2': (AdaFairClassifier, {'saIndex': 0, 'saValue': 0, 'CSB': 'CSB2', **ensemble_kwargs}),
}

seed = 42
n_runs = 1
n_split = 5

df_res = run_baseline_exp(dataset_zoo, base_model_zoo, model_zoo, n_splits=n_split, n_runs=n_runs, random_state=seed, verbose=False)

////// Baseline Experiment //////
Base Model Zoo: ['LR']
Model Zoo: ['ERM', 'ThrDP', 'ThrEO', 'RedDP', 'RedEO', 'RW', 'LFR', 'AdaF1']
Dataset Zoo: ['compas_sex', 'compas_race', 'adult_gender', 'adult_race', 'bank_age', 'bank_marital=married', 'lsa_gender', 'lsa_race', 'lsa_unfair_gender_race_gender', 'lsa_unfair_gender_race_race', 'meps_SEX', 'meps_RACE', 'german_sex', 'german_foreign_worker', 'german_marital_status=single']
n_splits: 5
n_runs: 1
random_state: 42

Data: compas_sex | Run: 0 | Base: LR | Model: ERM
Split: 0 | {'acc': 69.45, 'bacc': 69.05, 'ap': 60.62, 'roc': 69.05, 'f1': 69.1, 'dp': 36.15, 'eo': 43.64, 'ge': 16.44, 'si': 21.45, 'acc_grp': {0: 0.69, 1: 0.696}, 'pos_rate_grp': {0: 0.138, 1: 0.499}, 'g_adv': 1, 'acc_cls': {0: 0.75, 1: 0.631}}
Split: 1 | {'acc': 67.23, 'bacc': 66.77, 'ap': 58.55, 'roc': 66.77, 'f1': 66.8, 'dp': 37.24, 'eo': 39.76, 'ge': 17.91, 'si': 24.26, 'acc_grp': {0: 0.707, 1: 0.664}, 'pos_rate_grp': {0: 0.121, 1: 0.493}, 'g_adv': 1, 'acc_cls': {0: 0.737

KeyboardInterrupt: 

In [None]:
def plot_scatter_xy_tradeoff(
        df, x, y, group_key, style, 
        ax=None, title=None, errorbar=False, 
        **kwargs
):
    assert x in df.columns and y in df.columns
    assert group_key in df.columns and style in df.columns
    df_plot = df.groupby([group_key, style]).agg({x: ['mean', 'std'], y: ['mean', 'std']}).reset_index()

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    for style_val in df_plot[style].unique():
        df_subplot = df_plot[df_plot[style] == style_val]
        x_mean, x_std = df_subplot[x]['mean'], df_subplot[x]['std']
        y_mean, y_std = df_subplot[y]['mean'], df_subplot[y]['std']
        
        ax.scatter(x_mean, y_mean, label=style_val, marker="o", s=50)
        if errorbar:
            ax.errorbar(
                x=x_mean, y=y_mean, 
                xerr=x_std, yerr=y_std, 
                ecolor='k', fmt='none', alpha=0.5,
                # capthick=2, capsize=5, 
            )
        ax.set(
            xlabel=x.upper(),
            ylabel=y.upper(),
        )
        ax.legend()
    return ax

def plot_xy_group_scatter_tradeoff(
        df, xs, ys, group_key, style,
        subfig_size=(3, 3), **kwargs
):
    n_x, n_y = len(xs), len(ys)
    h_ax, w_ax = subfig_size
    
    # one plot for one dataset
    dataset_unique = df['dataset'].unique()

    for data_name in dataset_unique:
        df_data = df[df['dataset'] == data_name]
        fig, axs = plt.subplots(n_y, n_x, figsize=(w_ax*n_x, h_ax*n_y))
        for i, x in enumerate(xs):
            for j, y in enumerate(ys):
                ax = plot_scatter_xy_tradeoff(
                    df_data, x, y, group_key, style, ax=axs[j, i], title=None, **kwargs
                )
        plt.suptitle(f"Dataset: {data_name}")
        plt.tight_layout()
        plt.show()


df_res['n_edit'] = 0
plot_xy_group_scatter_tradeoff(
    df_res, xs=['si', 'ge', 'dp', 'eo'], ys=['acc', 'bacc'], 
    group_key='n_edit', style='method', errorbar=True,
    subfig_size=(4, 4)
)

In [None]:
df_res[df_res['method'] == 'AdaF1']

In [None]:
file_name = f'./res_cache/baseline_clf{list(base_model_zoo.keys())}_data{list(dataset_zoo.keys())}_seed{seed}_split{n_split}.csv'
df_res.to_csv(file_name, index=False)

# df_res.to_csv(f'./res_cache/baseline_{dataname}_{s_attr}_seed{seed}_split{n_split}.csv', index=False))