## EARLY ALZHEIMER'S DETECTION USING PYCARET LIBRARY

In [1]:
import pandas as pd
from pycaret.classification import *

In [2]:
df = pd.read_csv('data/oasis_longitudinal.csv')
df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [3]:
# use first visit data only because of the analysis we're doing
df = df.loc[df['Visit']==1] 

 # reset index after filtering first visit data
df = df.reset_index(drop=True)

# M/F column
df['M/F'] = df['M/F'].replace(['F','M'], [0,1]) 

# Target variable
df['Group'] = df['Group'].replace(['Converted'], ['Demented'])
df['Group'] = df['Group'].replace(['Demented', 'Nondemented'], [1,0]) 

# Drop unnecessary columns
df = df.drop(['MRI ID', 'Visit', 'Hand'], axis=1) 

In [4]:
# Dropped the 8 rows with missing values in the column, SES
df = df.dropna(axis=0, how='any')
pd.isnull(df).sum()

Subject ID    0
Group         0
MR Delay      0
M/F           0
Age           0
EDUC          0
SES           0
MMSE          0
CDR           0
eTIV          0
nWBV          0
ASF           0
dtype: int64

In [5]:
df = df.drop("Subject ID", axis=1)

In [6]:
df.head()

Unnamed: 0,Group,MR Delay,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,0,1,87,14,2.0,27.0,0.0,1987,0.696,0.883
2,0,0,0,88,18,3.0,28.0,0.0,1215,0.71,1.444
3,0,0,1,80,12,4.0,28.0,0.0,1689,0.712,1.039
5,0,0,0,93,14,2.0,30.0,0.0,1272,0.698,1.38
6,1,0,1,68,12,2.0,27.0,0.5,1457,0.806,1.205


In [7]:
df.dtypes

Group         int64
MR Delay      int64
M/F           int64
Age           int64
EDUC          int64
SES         float64
MMSE        float64
CDR         float64
eTIV          int64
nWBV        float64
ASF         float64
dtype: object

## Train and Evlauate model

In [8]:
experiment = setup(df, target="Group",train_size = 0.8,fold_shuffle=True)

Unnamed: 0,Description,Value
0,session_id,8475
1,Target,Group
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(142, 11)"
5,Missing Values,False
6,Numeric Features,7
7,Categorical Features,3
8,Ordinal Features,False
9,High Cardinality Features,False


AttributeError: 'Simple_Imputer' object has no attribute 'fill_value_categorical'

In [9]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
catboost,CatBoost Classifier,0.9023,0.9072,0.8133,1.0,0.8943,0.8063,0.8244,0.713
ridge,Ridge Classifier,0.8848,0.0,0.7967,0.9833,0.8759,0.771,0.7906,0.008
rf,Random Forest Classifier,0.8841,0.9022,0.83,0.9467,0.8812,0.7685,0.7792,0.09
nb,Naive Bayes,0.8765,0.8931,0.8133,0.9467,0.8701,0.7537,0.767,0.016
lda,Linear Discriminant Analysis,0.8667,0.9239,0.78,0.9667,0.8592,0.7343,0.7539,0.01
gbc,Gradient Boosting Classifier,0.8576,0.9,0.8133,0.9148,0.8566,0.7151,0.7262,0.025
lightgbm,Light Gradient Boosting Machine,0.8477,0.915,0.83,0.8973,0.8569,0.6908,0.7006,0.188
lr,Logistic Regression,0.8402,0.8706,0.78,0.9181,0.8321,0.6827,0.703,0.588
ada,Ada Boost Classifier,0.8394,0.91,0.83,0.8729,0.8475,0.6754,0.6821,0.031
xgboost,Extreme Gradient Boosting,0.8394,0.8961,0.8633,0.8773,0.856,0.6701,0.6977,0.099
