In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import warnings
from sklearn.model_selection import KFold, GridSearchCV


# Ignore all warning messages
warnings.filterwarnings('ignore')

# Set seed for reproducibility
seed = 42

# Load the dataset
dataset = pd.read_csv('./data/cardio_train.csv',sep=';', index_col='id')


In [2]:
dataset

Unnamed: 0_level_0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,17474,1,156,56.0,100,60,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
99998,22431,1,163,72.0,135,80,1,2,0,0,0,1


In [3]:
dataset = dataset.dropna()


In [5]:
# Define your features and label, but keep 'gender' for creating the mask
X = dataset.drop('cardio', axis=1)
y = dataset['cardio'].values

# Create masks for gender before scaling
gender_0_mask = dataset['gender'] == 1
gender_1_mask = dataset['gender'] == 2

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_gender_0 = X_scaled[gender_0_mask]
X_scaled_gender_1 = X_scaled[gender_1_mask]
y_gender_0 = y[gender_0_mask]
y_gender_1 = y[gender_1_mask]

# Define the models
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity, recall, or true positive rate
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0  # Specificity or true negative rate
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0  # False positive rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False negative rate
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation and calculate sensitivity and specificity
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments and storing results
def run_experiment(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")

            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results in the fold_results dictionary
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        # Append the dictionary to the results_list as a DataFrame
        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each gender
print("Starting experiments for Gender = Female (0)")
run_experiment(X_scaled_gender_0, y_gender_0, 'Female', results_list)

print("Starting experiments for Gender = Male (1)")
run_experiment(X_scaled_gender_1, y_gender_1, 'Male', results_list)

# Concatenate all DataFrames in the results_list into one DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for Gender = Female (0)
Processing fold 1 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 2 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 3 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 4 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating 

   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 13 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 14 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 15 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 16 for group Male
  

In [7]:
results_df = pd.concat(results_list, ignore_index=True)


In [8]:
results_path = 'D97_results.xlsx'
results_df.to_excel(results_path, index=False)

In [11]:
# Define your features and label, but keep 'gender' for creating the mask
X = dataset.drop('cardio', axis=1)
y = dataset['cardio'].values

# Create masks for gender before scaling
gender_0_mask = dataset['gender'] == 1
gender_1_mask = dataset['gender'] == 2

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_gender_0 = X_scaled[gender_0_mask]
X_scaled_gender_1 = X_scaled[gender_1_mask]
y_gender_0 = y[gender_0_mask]
y_gender_1 = y[gender_1_mask]

# Hyperparameter grids
hyperparameters = {
    'SVM': {
        'C': [0.1, 1,],
        'kernel': ['linear', 'rbf'],
    },
    'RF': {
        'n_estimators': [50, 100],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2,]
    },
    'LR': {
        'C': [0.01, 0.1, 1],
        'solver': ['newton-cg', 'lbfgs', 'liblinear'],
        'penalty': ['l2']
    },
    'KNN': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree',]
    },
    'DT': {
        'max_depth': [10, 20,],
        'min_samples_split': [2, 5,],
        'criterion': ['gini', 'entropy']
    },
    'ANN': {
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam'],
        'alpha': [0.0001, 0.001],
    }
}

# Define models
seed = 42  # Set your seed for reproducibility
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments with hyperparameter tuning
def run_experiment_with_tuning(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")
            if name in hyperparameters:
                # Apply hyperparameter tuning
                clf = GridSearchCV(model, hyperparameters[name], cv=5, scoring='accuracy', n_jobs=-1)
                clf.fit(X_train, y_train)
                best_model = clf.best_estimator_
                y_pred = best_model.predict(X_test)
            else:
                # Fit the model without tuning
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each gender group
print("Starting experiments for Gender = Female (0)")
run_experiment_with_tuning(X_scaled_gender_0, y_gender_0, 'Female', results_list)

print("Starting experiments for Gender = Male (1)")
run_experiment_with_tuning(X_scaled_gender_1, y_gender_1, 'Male', results_list)

# Concatenate all DataFrames in the results_list into one  DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for Gender = Female (0)
Processing fold 1 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 2 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 3 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 4 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 5 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 6 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 7 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 8 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 9 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 10 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 11 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 12 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 13 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 14 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 15 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 16 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 17 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 18 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 19 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 20 for group Female
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Starting experiments for Gender = Male (1)
Processing fold 1 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 2 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 3 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 4 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 5 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 6 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 7 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 8 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 9 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 10 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 11 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 12 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 13 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 14 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 15 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 16 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 17 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 18 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 19 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




Processing fold 20 for group Male
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN




    Fold   Group   SVM_TPR   SVM_TNR   SVM_FPR   SVM_FNR  SVM_TP  SVM_TN  \
0      1  Female  0.686632  0.744000  0.256000  0.313368     791     837   
1      2  Female  0.700086  0.748214  0.251786  0.299914     810     838   
2      3  Female  0.673894  0.782918  0.217082  0.326106     777     880   
3      4  Female  0.688210  0.763251  0.236749  0.311790     788     864   
4      5  Female  0.688730  0.777394  0.222606  0.311270     770     901   
5      6  Female  0.677036  0.762669  0.237331  0.322964     740     903   
6      7  Female  0.712489  0.739691  0.260309  0.287511     793     861   
7      8  Female  0.686131  0.762066  0.237934  0.313869     752     900   
8      9  Female  0.696237  0.757106  0.242894  0.303763     777     879   
9     10  Female  0.715168  0.750656  0.249344  0.284832     811     858   
10    11  Female  0.707623  0.761413  0.238587  0.292377     789     884   
11    12  Female  0.699482  0.750447  0.249553  0.300518     810     839   
12    13  Fe

In [12]:
results_df = pd.concat(results_list, ignore_index=True)
results_path = 'D97_results_tuned.xlsx'
results_df.to_excel(results_path, index=False)

In [3]:
df = pd.read_excel('./data/D97_results.xlsx')

In [4]:
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,DT_FP,DT_FN,ANN_TPR,ANN_TNR,ANN_FPR,ANN_FNR,ANN_TP,ANN_TN,ANN_FP,ANN_FN
0,1,Female,0.686632,0.744,0.256,0.313368,791,837,288,361,...,412,410,0.724826,0.719111,0.280889,0.275174,835,809,316,317
1,2,Female,0.700086,0.748214,0.251786,0.299914,810,838,282,347,...,393,438,0.701815,0.742857,0.257143,0.298185,812,832,288,345
2,3,Female,0.673894,0.782918,0.217082,0.326106,777,880,244,376,...,396,457,0.632264,0.821174,0.178826,0.367736,729,923,201,424
3,4,Female,0.68821,0.763251,0.236749,0.31179,788,864,268,357,...,414,426,0.70655,0.753534,0.246466,0.29345,809,853,279,336
4,5,Female,0.68873,0.777394,0.222606,0.31127,770,901,258,348,...,427,411,0.691413,0.788611,0.211389,0.308587,773,914,245,345


In [5]:
from scipy.stats import ttest_ind

def perform_t_tests(df, algorithm):
    # Define the column names based on the algorithm
    tpr_col = f"{algorithm}_TPR"
    fpr_col = f"{algorithm}_FPR"
    fn_col = f"{algorithm}_FN"
    fp_col = f"{algorithm}_FP"
    
    # Define the groups
    protected_group = df['Group'] == 'Female'
    unprotected_group = ~protected_group

    # Extract the metrics
    protected_tpr = df.loc[protected_group, tpr_col].values
    unprotected_tpr = df.loc[unprotected_group, tpr_col].values

    protected_fpr = df.loc[protected_group, fpr_col].values
    unprotected_fpr = df.loc[unprotected_group, fpr_col].values

    protected_ratio_fn_fp = (df.loc[protected_group, fn_col] / df.loc[protected_group, fp_col]).values
    unprotected_ratio_fn_fp = (df.loc[unprotected_group, fn_col] / df.loc[unprotected_group, fp_col]).values

    # Perform t-tests

    # Definition 1: Equalised Odds (TPR and FPR)
    tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)

    # Definition 2: Equal Opportunity (TPR)
    equal_opportunity_ttest = ttest_ind(protected_tpr, unprotected_tpr)

    # Definition 3: Treatment Equality (Ratio of false negatives to false positives)
    treatment_equality_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Definition 4: Aggregate of all conditions
    aggregate_tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    aggregate_fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)
    aggregate_ratio_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Print results
    print(f"{algorithm} - Equalised Odds (TPR):", tpr_ttest)
    print(f"{algorithm} - Equalised Odds (FPR):", fpr_ttest)
    print(f"{algorithm} - Equal Opportunity (TPR):", equal_opportunity_ttest)
    print(f"{algorithm} - Aggregate - TPR:", aggregate_tpr_ttest)
    print(f"{algorithm} - Aggregate - FPR:", aggregate_fpr_ttest)


In [6]:
perform_t_tests(df, 'SVM')
perform_t_tests(df, 'LR')
perform_t_tests(df, 'DT')
perform_t_tests(df, 'RF')
perform_t_tests(df, 'ANN')

SVM - Equalised Odds (TPR): TtestResult(statistic=3.01601434247243, pvalue=0.00454916854808813, df=38.0)
SVM - Equalised Odds (FPR): TtestResult(statistic=-0.2761009332543229, pvalue=0.7839666297171094, df=38.0)
SVM - Equal Opportunity (TPR): TtestResult(statistic=3.01601434247243, pvalue=0.00454916854808813, df=38.0)
SVM - Aggregate - TPR: TtestResult(statistic=3.01601434247243, pvalue=0.00454916854808813, df=38.0)
SVM - Aggregate - FPR: TtestResult(statistic=-0.2761009332543229, pvalue=0.7839666297171094, df=38.0)
LR - Equalised Odds (TPR): TtestResult(statistic=-0.8753062762766121, pvalue=0.386906537238078, df=38.0)
LR - Equalised Odds (FPR): TtestResult(statistic=1.8531016571197978, pvalue=0.07164503112081702, df=38.0)
LR - Equal Opportunity (TPR): TtestResult(statistic=-0.8753062762766121, pvalue=0.386906537238078, df=38.0)
LR - Aggregate - TPR: TtestResult(statistic=-0.8753062762766121, pvalue=0.386906537238078, df=38.0)
LR - Aggregate - FPR: TtestResult(statistic=1.8531016571197