In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import warnings

# Ignore all warning messages
warnings.filterwarnings('ignore')

# Set seed for reproducibility
seed = 42

# Load the dataset
dataset = pd.read_csv('./data/diabetes_prediction_dataset.csv')


In [36]:
dataset.head()
print(dataset.isna().sum())


gender                 0
age                    0
hypertension           0
heart_disease          0
smoking_history        0
bmi                    0
HbA1c_level            0
blood_glucose_level    0
diabetes               0
dtype: int64


In [14]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Load the dataset from your local file system (replace 'path_to_file' with the actual file path)

# Preprocess the dataset
# Convert categorical variables to numeric
le = LabelEncoder()
dataset['smoking_history'] = le.fit_transform(dataset['smoking_history'])
dataset['gender'] = dataset['gender'].map({'Female': 0, 'Male': 1})
dataset =  dataset.dropna()
# Split the data into subgroups based on the protected feature (gender)
group_male = dataset[dataset['gender'] == 1]  # Male
group_female = dataset[dataset['gender'] == 0]  # Female

# Train a model on one subgroup (e.g., 'male')
X_train = group_male.drop(['diabetes'], axis=1)
y_train = group_male['diabetes']
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the entire dataset
X_test = dataset.drop(['diabetes'], axis=1)
y_test = dataset['diabetes']
predictions = model.predict(X_test)
print("Classification report for the entire dataset:")
print(classification_report(y_test, predictions))

# You might want to look at performance specifically on the 'female' group
X_test_female = group_female.drop(['diabetes'], axis=1)
y_test_female = group_female['diabetes']
predictions_female = model.predict(X_test_female)
print("Performance on 'female' group:")
print(classification_report(y_test_female, predictions_female))


Classification report for the entire dataset:
              precision    recall  f1-score   support

           0       0.99      0.98      0.98     91500
           1       0.79      0.87      0.83      8500

    accuracy                           0.97    100000
   macro avg       0.89      0.92      0.91    100000
weighted avg       0.97      0.97      0.97    100000

Performance on 'female' group:
              precision    recall  f1-score   support

           0       0.98      0.96      0.97     54091
           1       0.64      0.75      0.69      4461

    accuracy                           0.95     58552
   macro avg       0.81      0.86      0.83     58552
weighted avg       0.95      0.95      0.95     58552



In [37]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Load the dataset from your local file system (replace 'path_to_file' with the actual file path)
# dataset = pd.read_csv('path_to_file')

# Preprocess the dataset
# Convert categorical variables to numeric
le = LabelEncoder()
dataset['smoking_history'] = le.fit_transform(dataset['smoking_history'])
dataset['gender'] = dataset['gender'].map({'Female': 0, 'Male': 1})
dataset = dataset.dropna()

# Split the data into subgroups based on the protected feature (gender)
group_male = dataset[dataset['gender'] == 1]  # Male
group_female = dataset[dataset['gender'] == 0]  # Female

# Define the models
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'ANN': MLPClassifier(max_iter=1000)  # Increased max_iter for convergence
}

# K-fold cross-validation settings
k = 20
kf = KFold(n_splits=k, shuffle=True, random_state=seed)

# Prepare training and testing data
X_train_male = group_male.drop(['diabetes'], axis=1)
y_train_male = group_male['diabetes']
X_test_all = dataset.drop(['diabetes'], axis=1)
y_test_all = dataset['diabetes']
X_test_female = group_female.drop(['diabetes'], axis=1)
y_test_female = group_female['diabetes']

# Perform k-fold cross-validation
results = []
for model_name, model in models.items():
    fold_idx = 1
    for train_index, test_index in kf.split(X_train_male, y_train_male):
        X_train, X_val = X_train_male.iloc[train_index], X_train_male.iloc[test_index]
        y_train, y_val = y_train_male.iloc[train_index], y_train_male.iloc[test_index]
        
        model.fit(X_train, y_train)
        
        predictions_all = model.predict(X_test_all)
        predictions_female = model.predict(X_test_female)
        
        report_all = classification_report(y_test_all, predictions_all, output_dict=True)
        report_female = classification_report(y_test_female, predictions_female, output_dict=True)
        
        results.append({
            'Model': model_name,
            'Fold': fold_idx,
            'Accuracy_All': report_all['accuracy'],
            'Precision_All': report_all['weighted avg']['precision'],
            'Recall_All': report_all['weighted avg']['recall'],
            'F1_Score_All': report_all['weighted avg']['f1-score'],
            'Accuracy_Female': report_female['accuracy'],
            'Precision_Female': report_female['weighted avg']['precision'],
            'Recall_Female': report_female['weighted avg']['recall'],
            'F1_Score_Female': report_female['weighted avg']['f1-score'],
        })
        fold_idx += 1

# Convert results to DataFrame for better visualization
results_df = pd.DataFrame(results)
print(results_df.groupby(['Model']).mean())  # Displaying average metrics across folds for simplicity


                     Fold  Accuracy_All  Precision_All  Recall_All  \
Model                                                                
ANN                  10.5      0.472499       0.893893    0.472499   
Decision Tree        10.5      0.968879       0.970291    0.968879   
KNN                  10.5      0.955017       0.951920    0.955017   
Logistic Regression  10.5      0.441569       0.889843    0.441569   
Random Forest        10.5      0.982950       0.982731    0.982950   
SVM                  10.5      0.946020       0.949008    0.946020   

                     F1_Score_All  Accuracy_Female  Precision_Female  \
Model                                                                  
ANN                      0.570299         0.129552          0.929944   
Decision Tree            0.969453         0.949306          0.953584   
KNN                      0.950950         0.954025          0.949867   
Logistic Regression      0.539333         0.078794          0.929631   
Random 

In [38]:
results_df.to_csv('D95_model_evaluation_results.csv', index=False)


In [2]:
dataset['gender'] = dataset['gender'].map({'Female': 0, 'Male': 1})
dataset = pd.get_dummies(dataset, columns=['smoking_history'])
dataset = dataset.dropna()
dataset.reset_index(drop=True, inplace=True)


In [4]:
y_gender_0

array([0, 0, 0, ..., 0, 0, 0])

In [3]:

# Define your features and label, but keep 'gender' for creating the mask
X = dataset.drop('diabetes', axis=1)
y = dataset['diabetes'].values

# Create masks for gender before scaling
gender_0_mask = dataset['gender'] == 0  # Assuming '0' represents Female
gender_1_mask = dataset['gender'] == 1  # Assuming '1' represents Male

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply masks after scaling
X_scaled_gender_0 = X_scaled[gender_0_mask]
X_scaled_gender_1 = X_scaled[gender_1_mask]
y_gender_0 = y[gender_0_mask]
y_gender_1 = y[gender_1_mask]

# Define the models
models = {
    'SVM': SVC(random_state=seed),
    'LR': LogisticRegression(random_state=seed),
    'KNN': KNeighborsClassifier(),
    'RF': RandomForestClassifier(random_state=seed),
    'DT': DecisionTreeClassifier(random_state=seed),
    'ANN': MLPClassifier(random_state=seed)
}

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # Sensitivity, recall, or true positive rate
    TNR = TN / (TN + FP) if (TN + FP) > 0 else 0  # Specificity or true negative rate
    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0  # False positive rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False negative rate
    
    return TPR, TNR, FPR, FNR, TP, TN, FP, FN

# Initialize a list to store temporary DataFrame objects
results_list = []

# Perform k-fold cross-validation and calculate sensitivity and specificity
kf = KFold(n_splits=20, shuffle=True, random_state=seed)

# Define function for running experiments and storing results
def run_experiment(X_data, y_data, group_label, results_list):
    for fold, (train_index, test_index) in enumerate(kf.split(X_data)):
        X_train, X_test = X_data[train_index], X_data[test_index]
        y_train, y_test = y_data[train_index], y_data[test_index]

        fold_results = {'Fold': fold + 1, 'Group': group_label}
        print(f"Processing fold {fold + 1} for group {group_label}")

        for name, model in models.items():
            print(f"   Training and evaluating model: {name}")

            # Fit the model
            model.fit(X_train, y_train)
            # Predict on the test set
            y_pred = model.predict(X_test)
            # Calculate metrics
            TPR, TNR, FPR, FNR, TP, TN, FP, FN = calculate_metrics(y_test, y_pred)
            
            # Store results in the fold_results dictionary
            fold_results.update({
                f'{name}_TPR': TPR, f'{name}_TNR': TNR,
                f'{name}_FPR': FPR, f'{name}_FNR': FNR,
                f'{name}_TP': TP, f'{name}_TN': TN,
                f'{name}_FP': FP, f'{name}_FN': FN
            })

        # Append the dictionary to the results_list as a DataFrame
        results_list.append(pd.DataFrame([fold_results]))

# Running experiments for each gender
print("Starting experiments for Gender = Female (0)")
run_experiment(X_scaled_gender_0, y_gender_0, 'Female', results_list)

print("Starting experiments for Gender = Male (1)")
run_experiment(X_scaled_gender_1, y_gender_1, 'Male', results_list)

# Concatenate all DataFrames in the results_list into one DataFrame
final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)


Starting experiments for Gender = Female (0)
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and e

   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
   Training and evaluating model: SVM
   Training and evaluating model: LR
   Training and evaluating model: KNN
   Training and evaluating model: RF
   Training and evaluating model: DT
   Training and evaluating model: ANN
    Fold   Group   SVM_TPR   SVM_TNR   SVM_FPR   SVM_FNR  SVM_TP  SVM_TN  \
0      1  Female  0.582938  0.998896  0.001104  0.417062     1

In [None]:
results_df = pd.concat(results_list, ignore_index=True)
results_path = 'D95_results.xlsx'
results_df.to_excel(results_path, index=False)

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

df['gender'] = protected_feature
df['target'] = y

# Split the data into subgroups based on the protected feature
group_male = df[df['gender'] == 0]
group_female = df[df['gender'] == 1]

# Train a model on one subgroup (e.g., 'male')
X_train, y_train = group_male.drop(['target', 'gender'], axis=1), group_male['target']
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model on the entire dataset
X_test, y_test = df.drop(['target', 'gender'], axis=1), df['target']
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

# You might want to look at performance specifically on the 'female' group
X_test_female, y_test_female = group_female.drop(['target', 'gender'], axis=1), group_female['target']
predictions_female = model.predict(X_test_female)
print("Performance on 'female' group:")
print(classification_report(y_test_female, predictions_female))


In [2]:
df = pd.read_excel('./result/D95_results.xlsx')

In [3]:
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,DT_FP,DT_FN,ANN_TPR,ANN_TNR,ANN_FPR,ANN_FNR,ANN_TP,ANN_TN,ANN_FP,ANN_FN
0,1,Female,0.582938,0.998896,0.001104,0.417062,123,2714,3,88,...,96,51,0.739336,0.997056,0.002944,0.260664,156,2709,8,55
1,2,Female,0.572052,0.999259,0.000741,0.427948,131,2697,2,98,...,70,57,0.69869,0.998888,0.001112,0.30131,160,2696,3,69
2,3,Female,0.57619,0.99816,0.00184,0.42381,121,2713,5,89,...,69,54,0.666667,0.998896,0.001104,0.333333,140,2715,3,70
3,4,Female,0.558685,0.999632,0.000368,0.441315,119,2714,1,94,...,59,56,0.676056,0.999632,0.000368,0.323944,144,2714,1,69
4,5,Female,0.549107,0.99926,0.00074,0.450893,123,2702,2,101,...,70,59,0.683036,0.998521,0.001479,0.316964,153,2700,4,71


In [4]:
from scipy.stats import ttest_ind

def perform_t_tests(df, algorithm):
    # Define the column names based on the algorithm
    tpr_col = f"{algorithm}_TPR"
    fpr_col = f"{algorithm}_FPR"
    fn_col = f"{algorithm}_FN"
    fp_col = f"{algorithm}_FP"
    
    # Define the groups
    protected_group = df['Group'] == 'Female'
    unprotected_group = ~protected_group

    # Extract the metrics
    protected_tpr = df.loc[protected_group, tpr_col].values
    unprotected_tpr = df.loc[unprotected_group, tpr_col].values

    protected_fpr = df.loc[protected_group, fpr_col].values
    unprotected_fpr = df.loc[unprotected_group, fpr_col].values

    protected_ratio_fn_fp = (df.loc[protected_group, fn_col] / df.loc[protected_group, fp_col]).values
    unprotected_ratio_fn_fp = (df.loc[unprotected_group, fn_col] / df.loc[unprotected_group, fp_col]).values

    # Perform t-tests

    # Definition 1: Equalised Odds (TPR and FPR)
    tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)

    # Definition 2: Equal Opportunity (TPR)
    equal_opportunity_ttest = ttest_ind(protected_tpr, unprotected_tpr)

    # Definition 3: Treatment Equality (Ratio of false negatives to false positives)
    treatment_equality_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Definition 4: Aggregate of all conditions
    aggregate_tpr_ttest = ttest_ind(protected_tpr, unprotected_tpr)
    aggregate_fpr_ttest = ttest_ind(protected_fpr, unprotected_fpr)
    aggregate_ratio_ttest = ttest_ind(protected_ratio_fn_fp, unprotected_ratio_fn_fp)

    # Print results
    print(f"{algorithm} - Equalised Odds (TPR):", tpr_ttest)
    print(f"{algorithm} - Equalised Odds (FPR):", fpr_ttest)
    print(f"{algorithm} - Equal Opportunity (TPR):", equal_opportunity_ttest)
    print(f"{algorithm} - Aggregate - TPR:", aggregate_tpr_ttest)
    print(f"{algorithm} - Aggregate - FPR:", aggregate_fpr_ttest)


In [5]:
perform_t_tests(df, 'SVM')
perform_t_tests(df, 'LR')
perform_t_tests(df, 'DT')
perform_t_tests(df, 'RF')
perform_t_tests(df, 'ANN')

SVM - Equalised Odds (TPR): TtestResult(statistic=-0.03732355519889093, pvalue=0.9704224143314983, df=38.0)
SVM - Equalised Odds (FPR): TtestResult(statistic=-5.458773956237387, pvalue=3.1442561812078673e-06, df=38.0)
SVM - Equal Opportunity (TPR): TtestResult(statistic=-0.03732355519889093, pvalue=0.9704224143314983, df=38.0)
SVM - Aggregate - TPR: TtestResult(statistic=-0.03732355519889093, pvalue=0.9704224143314983, df=38.0)
SVM - Aggregate - FPR: TtestResult(statistic=-5.458773956237387, pvalue=3.1442561812078673e-06, df=38.0)
LR - Equalised Odds (TPR): TtestResult(statistic=-2.2105868887457527, pvalue=0.03315819725719151, df=38.0)
LR - Equalised Odds (FPR): TtestResult(statistic=-5.241538262077401, pvalue=6.2291897026653475e-06, df=38.0)
LR - Equal Opportunity (TPR): TtestResult(statistic=-2.2105868887457527, pvalue=0.03315819725719151, df=38.0)
LR - Aggregate - TPR: TtestResult(statistic=-2.2105868887457527, pvalue=0.03315819725719151, df=38.0)
LR - Aggregate - FPR: TtestResult(s