In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import warnings
from sklearn.model_selection import KFold, GridSearchCV

# Ignore all warning messages
warnings.filterwarnings('ignore')

# Set seed for reproducibility
seed = 42


In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
glioma_grading_clinical_and_mutation_features = fetch_ucirepo(id=759) 
  
# data (as pandas dataframes) 
X = glioma_grading_clinical_and_mutation_features.data.features 
y = glioma_grading_clinical_and_mutation_features.data.targets 
  


In [38]:
X['Race'] = np.where(X['Race'] == 'white', 0, 1)

In [39]:
X

Unnamed: 0,Gender,Age_at_diagnosis,Race,IDH1,TP53,ATRX,PTEN,EGFR,CIC,MUC16,...,FUBP1,RB1,NOTCH1,BCOR,CSMD3,SMARCA4,GRIN2A,IDH2,FAT4,PDGFRA
0,0,51.30,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,38.72,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,35.17,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,32.78,0,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
4,0,31.51,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,1,77.89,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
835,0,85.18,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
836,1,77.49,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
837,0,63.33,0,0,1,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0


In [40]:
# Create masks for race before scaling
race_0_mask = X['Race'] == 0  
race_1_mask = X['Race'] == 1  


# Apply masks after scaling
X_scaled_race_0 = X[race_0_mask].to_numpy()
X_scaled_race_1 = X[race_1_mask].to_numpy()
y_race_0 = y[race_0_mask].to_numpy()
y_race_1 = y[race_1_mask].to_numpy()



In [41]:
X_scaled_race_0

array([[ 0.  , 51.3 ,  0.  , ...,  0.  ,  0.  ,  0.  ],
       [ 0.  , 38.72,  0.  , ...,  0.  ,  0.  ,  0.  ],
       [ 0.  , 35.17,  0.  , ...,  0.  ,  0.  ,  0.  ],
       ...,
       [ 0.  , 85.18,  0.  , ...,  0.  ,  0.  ,  0.  ],
       [ 1.  , 77.49,  0.  , ...,  0.  ,  0.  ,  0.  ],
       [ 0.  , 63.33,  0.  , ...,  0.  ,  0.  ,  0.  ]])

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Load the dataset from your local file system (replace 'path_to_file' with the actual file path)
# dataset = pd.read_csv('path_to_file')

# Preprocess the dataset
# Convert categorical variables to numeric
le = LabelEncoder()
dataset['Race'] = dataset['Race'].map({'Female': 0, 'Male': 1})
dataset = dataset.dropna()

# Split the data into subgroups based on the protected feature (gender)
group_male = dataset[dataset['Race'] == 1]  # Male
group_female = dataset[dataset['Race'] == 0]  # Female

# Define the models
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'ANN': MLPClassifier(max_iter=1000)  # Increased max_iter for convergence
}

# K-fold cross-validation settings
k = 20
kf = KFold(n_splits=k, shuffle=True, random_state=seed)

# Prepare training and testing data
X_train_male = group_male.drop(['diabetes'], axis=1)
y_train_male = group_male['diabetes']
X_test_all = dataset.drop(['diabetes'], axis=1)
y_test_all = dataset['diabetes']
X_test_female = group_female.drop(['diabetes'], axis=1)
y_test_female = group_female['diabetes']

# Perform k-fold cross-validation
results = []
for model_name, model in models.items():
    fold_idx = 1
    for train_index, test_index in kf.split(X_train_male, y_train_male):
        X_train, X_val = X_train_male.iloc[train_index], X_train_male.iloc[test_index]
        y_train, y_val = y_train_male.iloc[train_index], y_train_male.iloc[test_index]
        
        model.fit(X_train, y_train)
        
        predictions_all = model.predict(X_test_all)
        predictions_female = model.predict(X_test_female)
        
        report_all = classification_report(y_test_all, predictions_all, output_dict=True)
        report_female = classification_report(y_test_female, predictions_female, output_dict=True)
        
        results.append({
            'Model': model_name,
            'Fold': fold_idx,
            'Accuracy_All': report_all['accuracy'],
            'Precision_All': report_all['weighted avg']['precision'],
            'Recall_All': report_all['weighted avg']['recall'],
            'F1_Score_All': report_all['weighted avg']['f1-score'],
            'Accuracy_Female': report_female['accuracy'],
            'Precision_Female': report_female['weighted avg']['precision'],
            'Recall_Female': report_female['weighted avg']['recall'],
            'F1_Score_Female': report_female['weighted avg']['f1-score'],
        })
        fold_idx += 1

# Convert results to DataFrame for better visualization
results_df = pd.DataFrame(results)
print(results_df.groupby(['Model']).mean())  # Displaying average metrics across folds for simplicity


In [43]:
# Define the models
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity, recall, or true positive rate
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0  # Specificity or true negative rate
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0  # False positive rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False negative rate
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation and calculate sensitivity and specificity
kf = KFold(n_splits=10, shuffle=True, random_state=seed)

# Define function for running experiments and storing results
def run_experiment(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")

            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results in the fold_results dictionary
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        # Append the dictionary to the results_list as a DataFrame
        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each race
print("Starting experiments for race = 0 (white)")
run_experiment(X_scaled_race_0, y_race_0, '0 (white)', results_list)

print("Starting experiments for race = 1 (other)")
run_experiment(X_scaled_race_1, y_race_1, '1', results_list)

# Concatenate all DataFrames in the results_list into one DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for race = 0 (white)
Processing fold 1 for group 0 (white)
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 2 for group 0 (white)
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 3 for group 0 (white)
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
Processing fold 4 for group 0 (white)
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and ev

In [44]:
results_df = pd.concat(results_list, ignore_index=True)


In [45]:
results_path = 'D71_results.xlsx'
results_df.to_excel(results_path, index=False)

In [6]:
df = pd.read_excel('./result/D71_results.xlsx')

In [7]:
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,DT_FP,DT_FN,ANN_TPR,ANN_TNR,ANN_FPR,ANN_FNR,ANN_TP,ANN_TN,ANN_FP,ANN_FN
0,1,0 (white),0.685714,0.952381,0.047619,0.314286,24,40,2,11,...,8,9,0.857143,0.833333,0.166667,0.142857,30,35,7,5
1,2,0 (white),0.730769,0.862745,0.137255,0.269231,19,44,7,7,...,8,5,0.807692,0.921569,0.078431,0.192308,21,47,4,5
2,3,0 (white),0.724138,0.6875,0.3125,0.275862,21,33,15,8,...,14,5,0.965517,0.833333,0.166667,0.034483,28,40,8,1
3,4,0 (white),0.740741,0.78,0.22,0.259259,20,39,11,7,...,9,7,0.888889,0.78,0.22,0.111111,24,39,11,3
4,5,0 (white),0.7,0.851064,0.148936,0.3,21,40,7,9,...,6,4,0.966667,0.893617,0.106383,0.033333,29,42,5,1


In [16]:
from scipy.stats import ttest_ind

def perform_t_tests(df, algorithm):
    # Define the column names based on the algorithm
    tpr_col = f"{algorithm}_TPR"
    fpr_col = f"{algorithm}_FPR"
    fn_col = f"{algorithm}_FN"
    fp_col = f"{algorithm}_FP"
    
    # Define the groups
    protected_group = df['Group'] == '0 (white)'
    unprotected_group = ~protected_group

    # Extract the metrics
    protected_tpr = df.loc[protected_group, tpr_col].values
    unprotected_tpr = df.loc[unprotected_group, tpr_col].values

    protected_fpr = df.loc[protected_group, fpr_col].values
    unprotected_fpr = df.loc[unprotected_group, fpr_col].values

    protected_ratio_fn_fp = (df.loc[protected_group, fn_col] / df.loc[protected_group, fp_col]).values
    unprotected_ratio_fn_fp = (df.loc[unprotected_group, fn_col] / df.loc[unprotected_group, fp_col]).values

    # Perform t-tests

    # Definition 1: Equalised Odds (TPR and FPR)
    tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)

    # Definition 2: Equal Opportunity (TPR)
    equal_opportunity_ttest = ttest_ind(protected_tpr, unprotected_tpr)

    # Definition 3: Treatment Equality (Ratio of false negatives to false positives)
    treatment_equality_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Definition 4: Aggregate of all conditions
    aggregate_tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    aggregate_fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)
    aggregate_ratio_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Print results
    print(f"{algorithm} - Equalised Odds (TPR):", tpr_ttest)
    print(f"{algorithm} - Equalised Odds (FPR):", fpr_ttest)
    print(f"{algorithm} - Equal Opportunity (TPR):", equal_opportunity_ttest)
    print(f"{algorithm} - Aggregate - TPR:", aggregate_tpr_ttest)
    print(f"{algorithm} - Aggregate - FPR:", aggregate_fpr_ttest)


In [17]:
perform_t_tests(df, 'SVM')
perform_t_tests(df, 'LR')
perform_t_tests(df, 'DT')
perform_t_tests(df, 'RF')
perform_t_tests(df, 'ANN')

SVM - Equalised Odds (TPR): TtestResult(statistic=-3.1995853449641456, pvalue=0.0049670154933958415, df=18.0)
SVM - Equalised Odds (FPR): TtestResult(statistic=-6.944461281636296, pvalue=1.7281481730698042e-06, df=18.0)
SVM - Equal Opportunity (TPR): TtestResult(statistic=-3.1995853449641456, pvalue=0.0049670154933958415, df=18.0)
SVM - Aggregate - TPR: TtestResult(statistic=-3.1995853449641456, pvalue=0.0049670154933958415, df=18.0)
SVM - Aggregate - FPR: TtestResult(statistic=-6.944461281636296, pvalue=1.7281481730698042e-06, df=18.0)
LR - Equalised Odds (TPR): TtestResult(statistic=1.2194268267704633, pvalue=0.2384209011519161, df=18.0)
LR - Equalised Odds (FPR): TtestResult(statistic=-3.6518590160813544, pvalue=0.0018240704312290203, df=18.0)
LR - Equal Opportunity (TPR): TtestResult(statistic=1.2194268267704633, pvalue=0.2384209011519161, df=18.0)
LR - Aggregate - TPR: TtestResult(statistic=1.2194268267704633, pvalue=0.2384209011519161, df=18.0)
LR - Aggregate - FPR: TtestResult(s