In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from collections import Counter
from imblearn.over_sampling import SMOTE

### Load the data

In [2]:
data = pd.read_csv("../data/alzheimers_disease_data.csv")
data = data.drop("DoctorInCharge", axis=1) # this attribute is confidential in the data, and thus not useful 
data.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,4751,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,4752,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,4753,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,4754,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,4755,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,...,6.045039,0,0,0.014691,0,0,1,1,0,0


### Oversampling by ethnicity

In [3]:
e = data["Ethnicity"]
print('Original dataset shape %s' % Counter(e))

ethnicity_counts = dict(data["Ethnicity"].value_counts())
num_ethnicities = len(ethnicity_counts)
max_count = max(ethnicity_counts.values())

strategy_over = {ethnicity: max_count for ethnicity in range(num_ethnicities)}
over = SMOTE(sampling_strategy=strategy_over)
print(strategy_over, sep='\n')

data_over, e_over=over.fit_resample(data, e)
print('Oversampled dataset shape %s' % Counter(e_over))

Original dataset shape Counter({0: 1278, 1: 454, 3: 211, 2: 206})
{0: 1278, 1: 1278, 2: 1278, 3: 1278}
Oversampled dataset shape Counter({0: 1278, 3: 1278, 1: 1278, 2: 1278})


### Split the data

In [4]:
X, y = data_over.drop("Diagnosis", axis=1), data_over["Diagnosis"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=22, stratify=y)
X_train.head()

Unnamed: 0,PatientID,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,...,MMSE,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Unnamed: 22,Unnamed: 23
4877,6469,63,0,3,2,36.104854,1,5.638289,8.998836,1.690883,...,12.949967,7.032440,0.0,0.0,6.840749,0,0.0,0,0,0.0,,
1871,6622,83,0,0,2,24.380963,0,19.093165,6.753044,2.273881,...,7.90904,3.140178,0.0,0.0,0.832633,1,0.0,0,0,,,
4597,6534,69,0,3,1,18.060797,0,19.199864,0.0,14.713007,2.888925,2.160641,...,18.235851,3.202963,0.0,0,7.89458,0,0,0.0,0.0,0.0
419,5170,73,1,0,1,36.926039,0,15.835243,5.265447,0.039668,...,19.835576,5.712410,1.0,1.0,1.275737,0,0.0,0,0,1.0,,
4269,6492,70,0,3,2,36.81085,1,12.313799,8.730317,9.518343,...,26.169394,9.155845,0.0,0.0,2.054067,0,0.0,0,0,1.0,,


### Train the model

In [5]:
classifier = HistGradientBoostingClassifier(random_state=12)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [6]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       989
           1       0.90      0.74      0.81       289

    accuracy                           0.92      1278
   macro avg       0.91      0.86      0.88      1278
weighted avg       0.92      0.92      0.92      1278



In [7]:
X["Ethnicity"].value_counts()

Ethnicity
0    1278
3    1278
1    1278
2    1278
3    1278
1    1278
2    1278
Name: count, dtype: int64

### Compare performance based on ethnicity

In [7]:
# Join X_test, y_test, y_pred for analysis
results = X_test.copy()
results["TrueDiagnosis"] = y_test
results["PredictedDiagnosis"] = y_pred

In [8]:
# Ethnicity 0
eth0_results = results[results["Ethnicity"] == 0]
print(classification_report(eth0_results["TrueDiagnosis"], eth0_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97       211
           1       0.95      0.92      0.93       108

    accuracy                           0.96       319
   macro avg       0.96      0.95      0.95       319
weighted avg       0.96      0.96      0.96       319



In [9]:
# Ethncity 1
eth1_results = results[results["Ethnicity"] == 1]
print(classification_report(eth1_results["TrueDiagnosis"], eth1_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.90      0.98      0.94       246
           1       0.86      0.58      0.70        65

    accuracy                           0.89       311
   macro avg       0.88      0.78      0.82       311
weighted avg       0.89      0.89      0.89       311



In [10]:
# Ethnicity 2  
eth2_results = results[results["Ethnicity"] == 2]
print(classification_report(eth2_results["TrueDiagnosis"], eth2_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.92      0.98      0.95       244
           1       0.90      0.72      0.80        72

    accuracy                           0.92       316
   macro avg       0.91      0.85      0.87       316
weighted avg       0.92      0.92      0.91       316



In [11]:
# Ethncity 3
eth3_results = results[results["Ethnicity"] == 3]
print(classification_report(eth3_results["TrueDiagnosis"], eth3_results["PredictedDiagnosis"]))

              precision    recall  f1-score   support

           0       0.94      0.97      0.96       288
           1       0.76      0.59      0.67        44

    accuracy                           0.92       332
   macro avg       0.85      0.78      0.81       332
weighted avg       0.92      0.92      0.92       332



In [13]:
%load_ext autoreload
%autoreload 2
from fairness_metrics import demographic_parity, equalized_odds, disparate_impact

# DI >= 0.8 is a pre-established threshold for fairness
# DP and EO need to be as close to 0 as possible

for i in [1,2,3]:
    dp = round(demographic_parity(results, 0, i), 4)
    eo = equalized_odds(results, 0, i)
    tpr_diff, fpr_diff = round(eo[0], 4), round(eo[1], 4)
    di = round(disparate_impact(results, 0, i), 4)
    print(dp, tpr_diff, fpr_diff, di)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
0.1845 -0.3321 0.0007 2.3044
0.1425 -0.1944 0.0009 1.7762
0.2236 -0.3258 0.0041 3.1835
