In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import warnings
from sklearn.model_selection import KFold, GridSearchCV

# Ignore all warning messages
warnings.filterwarnings('ignore')

# Set seed for reproducibility
seed = 42

# Load the dataset
dataset = pd.read_excel('./data/D98 (modified) v02.xlsx')
dataset = dataset.dropna()



In [2]:
dataset = dataset.drop('diag_1', axis=1)
dataset = dataset.drop('diag_2', axis=1)
dataset = dataset.drop('diag_3', axis=1)


In [3]:
# Define your features and label, but keep 'race' for creating the mask
X = dataset.drop('target', axis=1)
y = dataset['target'].values

# Create masks for race before scaling
race_0_mask = dataset['Race'] == 0  
race_1_mask = dataset['Race'] == 1  

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_race_0 = X_scaled[race_0_mask]
X_scaled_race_1 = X_scaled[race_1_mask]
y_race_0 = y[race_0_mask]
y_race_1 = y[race_1_mask]

In [5]:
y_race_0

array([0., 1., 0., ..., 1., 0., 1.])

In [6]:


# Define the models
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity, recall, or true positive rate
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0  # Specificity or true negative rate
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0  # False positive rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False negative rate
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation and calculate sensitivity and specificity
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments and storing results
def run_experiment(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")

            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results in the fold_results dictionary
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        # Append the dictionary to the results_list as a DataFrame
        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each race
print("Starting experiments for race = 0")
run_experiment(X_scaled_race_0, y_race_0, '0', results_list)

print("Starting experiments for race = 0")
run_experiment(X_scaled_race_1, y_race_1, '1', results_list)

# Concatenate all DataFrames in the results_list into one DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for race = 0
Processing fold 1 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 2 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 3 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 4 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evalu

   Training and evaluating model: ANN
Processing fold 13 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 14 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 15 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 16 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training a

In [7]:
results_df = pd.concat(results_list, ignore_index=True)


In [8]:
results_path = 'D98_results.xlsx'
results_df.to_excel(results_path, index=False)

In [12]:
# Define your features and label, but keep 'race' for creating the mask
X = dataset.drop('target', axis=1)
y = dataset['target'].values

# Create masks for race before scaling
race_0_mask = dataset['Race'] == 0  
race_1_mask = dataset['Race'] == 1  

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_race_0 = X_scaled[race_0_mask]
X_scaled_race_1 = X_scaled[race_1_mask]
y_race_0 = y[race_0_mask]
y_race_1 = y[race_1_mask]

# Hyperparameter grids
hyperparameters = {
    'SVM': {
        'C': [0.1, 1,],
        'kernel': ['linear', 'rbf'],
    },
    'RF': {
        'n_estimators': [50, 100],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2,]
    },
    'LR': {
        'C': [0.01, 0.1, 1],
        'solver': ['newton-cg', 'lbfgs', 'liblinear'],
        'penalty': ['l2']
    },
    'KNN': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree',]
    },
    'DT': {
        'max_depth': [10, 20,],
        'min_samples_split': [2, 5,],
        'criterion': ['gini', 'entropy']
    },
    'ANN': {
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam'],
        'alpha': [0.0001, 0.001],
    }
}

# Define models
seed = 42  # Set your seed for reproducibility
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments with hyperparameter tuning
def run_experiment_with_tuning(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")
            if name in hyperparameters:
                # Apply hyperparameter tuning
                clf = GridSearchCV(model, hyperparameters[name], cv=5, scoring='accuracy', n_jobs=-1)
                clf.fit(X_train, y_train)
                best_model = clf.best_estimator_
                y_pred = best_model.predict(X_test)
            else:
                # Fit the model without tuning
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each gender group
print("Starting experiments for race = 0")
run_experiment_with_tuning(X_scaled_race_0, y_race_0, '0', results_list)

print("Starting experiments for race = 1")
run_experiment(X_scaled_race_1, y_race_1, '1', results_list)

# Concatenate all DataFrames in the results_list into one  DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for race = 0
Processing fold 1 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 2 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 3 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 4 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 5 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 6 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 7 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 8 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 9 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 10 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 11 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 12 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 13 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 14 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 15 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 16 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 17 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 18 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 19 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 20 for group 0
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Starting experiments for race = 1
Processing fold 1 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 2 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 3 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 4 for group 1
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evalu

In [13]:
results_df = pd.concat(results_list, ignore_index=True)
results_path = 'D98_results_tuned.xlsx'
results_df.to_excel(results_path, index=False)

In [2]:
df = pd.read_excel('D98_results.xlsx')

In [8]:
df.tail()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,DT_FP,DT_FN,ANN_TPR,ANN_TNR,ANN_FPR,ANN_FNR,ANN_TP,ANN_TN,ANN_FP,ANN_FN
35,16,1,0.498893,0.757379,0.242621,0.501107,901,1514,485,905,...,843,834,0.486157,0.768384,0.231616,0.513843,878,1536,463,928
36,17,1,0.482153,0.759314,0.240686,0.517847,851,1549,491,914,...,850,851,0.552408,0.678922,0.321078,0.447592,975,1385,655,790
37,18,1,0.488571,0.732847,0.267153,0.511429,855,1506,549,895,...,860,838,0.552,0.678345,0.321655,0.448,966,1394,661,784
38,19,1,0.483492,0.758176,0.241824,0.516508,864,1530,488,923,...,834,840,0.570789,0.687314,0.312686,0.429211,1020,1387,631,767
39,20,1,0.46509,0.759369,0.240631,0.53491,826,1540,488,950,...,793,866,0.501126,0.720907,0.279093,0.498874,890,1462,566,886


In [9]:
from scipy.stats import ttest_ind

def perform_t_tests(df, algorithm):
    # Define the column names based on the algorithm
    tpr_col = f"{algorithm}_TPR"
    fpr_col = f"{algorithm}_FPR"
    fn_col = f"{algorithm}_FN"
    fp_col = f"{algorithm}_FP"
    
    # Define the groups
    protected_group = df['Group'] == 0
    unprotected_group = ~protected_group

    # Extract the metrics
    protected_tpr = df.loc[protected_group, tpr_col].values
    unprotected_tpr = df.loc[unprotected_group, tpr_col].values

    protected_fpr = df.loc[protected_group, fpr_col].values
    unprotected_fpr = df.loc[unprotected_group, fpr_col].values

    protected_ratio_fn_fp = (df.loc[protected_group, fn_col] / df.loc[protected_group, fp_col]).values
    unprotected_ratio_fn_fp = (df.loc[unprotected_group, fn_col] / df.loc[unprotected_group, fp_col]).values

    # Perform t-tests

    # Definition 1: Equalised Odds (TPR and FPR)
    tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)

    # Definition 2: Equal Opportunity (TPR)
    equal_opportunity_ttest = ttest_ind(protected_tpr, unprotected_tpr)

    # Definition 3: Treatment Equality (Ratio of false negatives to false positives)
    treatment_equality_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Definition 4: Aggregate of all conditions
    aggregate_tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    aggregate_fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)
    aggregate_ratio_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Print results
    print(f"{algorithm} - Equalised Odds (TPR):", tpr_ttest)
    print(f"{algorithm} - Equalised Odds (FPR):", fpr_ttest)
    print(f"{algorithm} - Equal Opportunity (TPR):", equal_opportunity_ttest)
    print(f"{algorithm} - Aggregate - TPR:", aggregate_tpr_ttest)
    print(f"{algorithm} - Aggregate - FPR:", aggregate_fpr_ttest)


In [10]:
perform_t_tests(df, 'SVM')
perform_t_tests(df, 'LR')
perform_t_tests(df, 'DT')
perform_t_tests(df, 'RF')
perform_t_tests(df, 'ANN')

SVM - Equalised Odds (TPR): TtestResult(statistic=-10.428389780992552, pvalue=1.050453859073614e-12, df=38.0)
SVM - Equalised Odds (FPR): TtestResult(statistic=-14.895605723719244, pvalue=1.89195751257944e-17, df=38.0)
SVM - Equal Opportunity (TPR): TtestResult(statistic=-10.428389780992552, pvalue=1.050453859073614e-12, df=38.0)
SVM - Aggregate - TPR: TtestResult(statistic=-10.428389780992552, pvalue=1.050453859073614e-12, df=38.0)
SVM - Aggregate - FPR: TtestResult(statistic=-14.895605723719244, pvalue=1.89195751257944e-17, df=38.0)
LR - Equalised Odds (TPR): TtestResult(statistic=-12.243624744276481, pvalue=9.258500455658684e-15, df=38.0)
LR - Equalised Odds (FPR): TtestResult(statistic=-19.279922530323038, pvalue=3.2206871460095746e-21, df=38.0)
LR - Equal Opportunity (TPR): TtestResult(statistic=-12.243624744276481, pvalue=9.258500455658684e-15, df=38.0)
LR - Aggregate - TPR: TtestResult(statistic=-12.243624744276481, pvalue=9.258500455658684e-15, df=38.0)
LR - Aggregate - FPR: Tt