In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import warnings
from sklearn.model_selection import KFold, GridSearchCV

# Ignore all warning messages
warnings.filterwarnings('ignore')

# Set seed for reproducibility
seed = 42


In [9]:
df = pd.read_csv('./data/D87.csv')
df.head()


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [3]:
numeric_features=['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']
categorical_features=['HeartDisease', 'Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory',
       'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth','Asthma', 'KidneyDisease', 'SkinCancer']

In [4]:
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
enc.fit(df[categorical_features])
df[categorical_features] = enc.transform(df[categorical_features])

In [5]:
from sklearn.model_selection import train_test_split, KFold, GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

from sklearn.metrics import precision_score,recall_score
from sklearn.metrics import f1_score


In [6]:
y = df['HeartDisease'].values
df.drop('HeartDisease',axis=1,inplace=True)

In [7]:
# Define your features and label, but keep 'gender' for creating the mask
X = df
X.head()

Unnamed: 0,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,0.0,7.0,5.0,2.0,1.0,4.0,5.0,1.0,0.0,1.0
1,20.34,0.0,0.0,1.0,0.0,0.0,0.0,0.0,12.0,5.0,0.0,1.0,4.0,7.0,0.0,0.0,0.0
2,26.58,1.0,0.0,0.0,20.0,30.0,0.0,1.0,9.0,5.0,2.0,1.0,1.0,8.0,1.0,0.0,0.0
3,24.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,5.0,0.0,0.0,2.0,6.0,0.0,0.0,1.0
4,23.71,0.0,0.0,0.0,28.0,0.0,1.0,0.0,4.0,5.0,0.0,1.0,4.0,8.0,0.0,0.0,0.0


In [8]:
# Create masks for gender before scaling
gender_0_mask = X['Sex'] == 0  # Assuming '0' represents Female
gender_1_mask = X['Sex'] == 1  # Assuming '1' represents Male

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_gender_0 = X_scaled[gender_0_mask]
X_scaled_gender_1 = X_scaled[gender_1_mask]
y_gender_0 = y[gender_0_mask]
y_gender_1 = y[gender_1_mask]


In [9]:
# Define the models
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity, recall, or true positive rate
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0  # Specificity or true negative rate
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0  # False positive rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False negative rate
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation and calculate sensitivity and specificity
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments and storing results
def run_experiment(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")

            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results in the fold_results dictionary
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        # Append the dictionary to the results_list as a DataFrame
        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each gender
print("Starting experiments for Gender = Female (0)")
run_experiment(X_scaled_gender_0, y_gender_0, 'Female', results_list)

print("Starting experiments for Gender = Male (1)")
run_experiment(X_scaled_gender_1, y_gender_1, 'Male', results_list)

# Concatenate all DataFrames in the results_list into one DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for Gender = Female (0)
Processing fold 1 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 2 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 3 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 4 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating 

   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 13 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 14 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 15 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 16 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
  

In [10]:
results_df = pd.concat(results_list, ignore_index=True)


In [11]:
results_path = 'D87_results.xlsx'
results_df.to_excel(results_path, index=False)

In [3]:
df = pd.read_excel('./result/D87_results.xlsx')

In [4]:
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,DT_FP,DT_FN,ANN_TPR,ANN_TNR,ANN_FPR,ANN_FNR,ANN_TP,ANN_TN,ANN_FP,ANN_FN
0,1,Female,0.054329,0.995001,0.004999,0.945671,32,7763,39,557,...,510,478,0.035654,0.99718,0.00282,0.964346,21,7780,22,568
1,2,Female,0.067797,0.995929,0.004071,0.932203,36,7828,32,495,...,503,411,0.045198,0.994911,0.005089,0.954802,24,7820,40,507
2,3,Female,0.066553,0.994234,0.005766,0.933447,39,7760,45,547,...,526,454,0.059727,0.995772,0.004228,0.940273,35,7772,33,551
3,4,Female,0.046154,0.996797,0.003203,0.953846,27,7781,25,558,...,505,452,0.047863,0.996285,0.003715,0.952137,28,7777,29,557
4,5,Female,0.075812,0.994768,0.005232,0.924188,42,7796,41,512,...,497,454,0.054152,0.9963,0.0037,0.945848,30,7808,29,524


In [7]:
from scipy.stats import ttest_ind

def perform_t_tests(df, algorithm):
    # Define the column names based on the algorithm
    tpr_col = f"{algorithm}_TPR"
    fpr_col = f"{algorithm}_FPR"
    fn_col = f"{algorithm}_FN"
    fp_col = f"{algorithm}_FP"
    
    # Define the groups
    protected_group = df['Group'] == 'Female'
    unprotected_group = ~protected_group

    # Extract the metrics
    protected_tpr = df.loc[protected_group, tpr_col].values
    unprotected_tpr = df.loc[unprotected_group, tpr_col].values

    protected_fpr = df.loc[protected_group, fpr_col].values
    unprotected_fpr = df.loc[unprotected_group, fpr_col].values

    protected_ratio_fn_fp = (df.loc[protected_group, fn_col] / df.loc[protected_group, fp_col]).values
    unprotected_ratio_fn_fp = (df.loc[unprotected_group, fn_col] / df.loc[unprotected_group, fp_col]).values

    # Perform t-tests

    # Definition 1: Equalised Odds (TPR and FPR)
    tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)

    # Definition 2: Equal Opportunity (TPR)
    equal_opportunity_ttest = ttest_ind(protected_tpr, unprotected_tpr)

    # Definition 3: Treatment Equality (Ratio of false negatives to false positives)
    treatment_equality_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Definition 4: Aggregate of all conditions
    aggregate_tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    aggregate_fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)
    aggregate_ratio_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Print results
    print(f"{algorithm} - Equalised Odds (TPR):", tpr_ttest)
    print(f"{algorithm} - Equalised Odds (FPR):", fpr_ttest)
    print(f"{algorithm} - Equal Opportunity (TPR):", equal_opportunity_ttest)
    print(f"{algorithm} - Aggregate - TPR:", aggregate_tpr_ttest)
    print(f"{algorithm} - Aggregate - FPR:", aggregate_fpr_ttest)


In [8]:
perform_t_tests(df, 'SVM')
perform_t_tests(df, 'LR')
perform_t_tests(df, 'DT')
perform_t_tests(df, 'RF')
perform_t_tests(df, 'ANN')

SVM - Equalised Odds (TPR): TtestResult(statistic=-15.96019786656686, pvalue=1.9536608407375117e-18, df=38.0)
SVM - Equalised Odds (FPR): TtestResult(statistic=-20.391416455500146, pvalue=4.59703320684668e-22, df=38.0)
SVM - Equal Opportunity (TPR): TtestResult(statistic=-15.96019786656686, pvalue=1.9536608407375117e-18, df=38.0)
SVM - Aggregate - TPR: TtestResult(statistic=-15.96019786656686, pvalue=1.9536608407375117e-18, df=38.0)
SVM - Aggregate - FPR: TtestResult(statistic=-20.391416455500146, pvalue=4.59703320684668e-22, df=38.0)
LR - Equalised Odds (TPR): TtestResult(statistic=-16.501503500458824, pvalue=6.428362564989013e-19, df=38.0)
LR - Equalised Odds (FPR): TtestResult(statistic=-20.059031003586284, pvalue=8.151140283274074e-22, df=38.0)
LR - Equal Opportunity (TPR): TtestResult(statistic=-16.501503500458824, pvalue=6.428362564989013e-19, df=38.0)
LR - Aggregate - TPR: TtestResult(statistic=-16.501503500458824, pvalue=6.428362564989013e-19, df=38.0)
LR - Aggregate - FPR: Tte