In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler

### Load the data

In [2]:
data = pd.read_csv("../data/alzheimers_disease_data.csv")
data = data.drop("DoctorInCharge", axis=1) # this attribute is confidential in the data, and thus not useful 
data.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,4751,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,4752,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,4753,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,4754,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,4755,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,...,6.045039,0,0,0.014691,0,0,1,1,0,0


### Split the data

In [36]:
X, y = data, data["Diagnosis"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=22, stratify=y)

X_test = X_test.drop("Diagnosis", axis=1)

X_train.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
1062,5813,74,0,0,2,16.327315,0,11.281357,1.151913,7.17136,...,0.921736,0,1,4.217614,1,0,0,0,0,1
1982,6733,71,0,1,1,15.138279,0,3.570242,0.331743,3.376891,...,5.492227,0,1,5.023993,0,0,0,0,1,1
1390,6141,88,1,0,2,35.751527,0,10.876879,7.052473,9.31438,...,4.266609,0,1,9.352727,0,1,0,1,0,1
243,4994,70,0,0,2,23.185997,0,8.451036,6.184948,9.927077,...,4.225134,0,0,5.454608,1,0,0,0,1,0
76,4827,68,1,0,1,24.487589,0,14.060047,8.112291,9.864426,...,5.815445,0,0,3.301422,1,0,0,0,1,0


### Undersampling by ethnicity

In [48]:
e = X_train["Ethnicity"]
print('Original dataset shape: %s' % Counter(e))

X_temp = X_train.drop("Ethnicity", axis=1)
y_temp = X_train["Ethnicity"]

rus = RandomUnderSampler(random_state=12)
X_resampled_temp, y_resampled_temp = rus.fit_resample(X_temp, y_temp)

X_undersampled_temp = pd.DataFrame(X_resampled_temp, columns=X_temp.columns)
X_undersampled_temp.insert(loc=3, column="Ethnicity", value=None)
X_undersampled_temp["Ethnicity"] = y_resampled_temp

X_rus = X_undersampled_temp.drop("Diagnosis", axis=1)
y_rus = X_undersampled_temp["Diagnosis"]

print('Undersampled dataset shape: %s' % X_rus["Ethnicity"].value_counts())

Original dataset shape: Counter({0: 932, 1: 359, 2: 163, 3: 157})
Undersampled dataset shape: Ethnicity
0    157
1    157
2    157
3    157
Name: count, dtype: int64


In [50]:
X_rus.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,...,MMSE,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness
764,5515,71,1,0,2,18.573515,0,16.467741,0.167873,8.938344,...,26.597184,1.937336,0,1,0.428357,1,1,0,0,0
1135,5886,85,0,0,1,37.752963,0,9.619914,3.718347,0.735304,...,14.423622,3.072536,0,1,8.629721,0,0,0,0,0
1161,5912,79,1,0,3,21.757782,0,11.361209,4.789175,5.793957,...,9.860889,6.873629,0,0,3.76742,0,0,0,0,0
525,5276,60,1,0,2,32.482594,0,18.829069,4.490351,6.666395,...,25.929372,2.742409,1,1,3.223265,0,0,0,0,0
891,5642,90,1,0,2,24.556434,0,9.708596,2.137043,3.741381,...,17.063199,0.862614,0,0,2.438237,0,0,0,0,1


### Train the model

In [51]:
classifier = HistGradientBoostingClassifier(random_state=12)
classifier.fit(X_rus, y_rus)
y_pred = classifier.predict(X_test)

In [53]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.96      0.94       348
           1       0.92      0.86      0.89       190

    accuracy                           0.92       538
   macro avg       0.92      0.91      0.92       538
weighted avg       0.92      0.92      0.92       538



### Compare performance based on ethnicity

In [55]:
# Join X_test, y_test, y_pred for analysis
results = X_test.copy()
results["TrueDiagnosis"] = y_test
results["PredictedDiagnosis"] = y_pred

In [56]:
# Ethnicity 0
eth0_results = results[results["Ethnicity"] == 0]
print(classification_report(eth0_results["TrueDiagnosis"], eth0_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       222
           1       0.94      0.88      0.91       124

    accuracy                           0.94       346
   macro avg       0.94      0.92      0.93       346
weighted avg       0.94      0.94      0.94       346



In [57]:
# Ethncity 1
eth1_results = results[results["Ethnicity"] == 1]
print(classification_report(eth1_results["TrueDiagnosis"], eth1_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.89      0.94      0.91        63
           1       0.86      0.78      0.82        32

    accuracy                           0.88        95
   macro avg       0.88      0.86      0.87        95
weighted avg       0.88      0.88      0.88        95



In [58]:
# Ethnicity 2  
eth2_results = results[results["Ethnicity"] == 2]
print(classification_report(eth2_results["TrueDiagnosis"], eth2_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.88      0.96      0.92        23
           1       0.94      0.85      0.89        20

    accuracy                           0.91        43
   macro avg       0.91      0.90      0.91        43
weighted avg       0.91      0.91      0.91        43



In [59]:
# Ethncity 3
eth3_results = results[results["Ethnicity"] == 3]
print(classification_report(eth3_results["TrueDiagnosis"], eth3_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95        40
           1       0.86      0.86      0.86        14

    accuracy                           0.93        54
   macro avg       0.90      0.90      0.90        54
weighted avg       0.93      0.93      0.93        54



In [25]:
%load_ext autoreload
%autoreload 2
from fairness_metrics import demographic_parity, equalized_odds, disparate_impact

# DI >= 0.8 is a pre-established threshold for fairness
# DP and EO need to be as close to 0 as possible

for i in [1,2,3]:
    dp = round(demographic_parity(results, 0, i), 4)
    eo = equalized_odds(results, 0, i)
    tpr_diff, fpr_diff = round(eo[0], 4), round(eo[1], 4)
    di = round(disparate_impact(results, 0, i), 4)
    print(dp, tpr_diff, fpr_diff, di)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
0.0353 0.0 -0.0511 1.113
0.045 0.125 0.001 0.8854
0.0264 0.1 0.0323 1.0821
