In [1]:
import pandas as pd 

In [2]:
data = pd.read_csv('alzheimers_disease_data.csv')
data = data.drop(columns=['PatientID', 'DoctorInCharge'])

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts 

scaler = StandardScaler()

# y = f(x)

x = data.drop(columns=['Diagnosis'])
y = data['Diagnosis']

x_train_scaled = scaler.fit_transform(x)
x_test_scaled = scaler.transform(x)

x_train, x_test, y_train, y_test = tts(x_train_scaled, y, test_size=0.3, stratify=y, random_state=65)

In [4]:
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.model_selection import GridSearchCV as GSCV

param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

grid_search = GSCV(DTC(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [5]:
model = DTC(
    class_weight='balanced',
    criterion='entropy',
    max_depth=5,
    min_samples_leaf=1,
    min_samples_split=2
)

model.fit(x_train, y_train)

pred_train = model.predict(x_train)
pred_test = model.predict(x_test)

In [6]:
from sklearn.metrics import classification_report as cr 

report_train = cr(y_train, pred_train)
report_test = cr(y_test, pred_test)

In [7]:
print(report_train)

              precision    recall  f1-score   support

           0       0.97      0.96      0.96       972
           1       0.92      0.95      0.94       532

    accuracy                           0.95      1504
   macro avg       0.95      0.95      0.95      1504
weighted avg       0.95      0.95      0.95      1504



In [8]:
print(report_test)

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       417
           1       0.89      0.93      0.91       228

    accuracy                           0.94       645
   macro avg       0.93      0.93      0.93       645
weighted avg       0.94      0.94      0.94       645

