# Neural Net Classifier

In [1]:
import pandas as pd 

data = pd.read_csv("../Data/Alzheimer's.csv")

#Converted the group to dummy variables
data.loc[data.Group=='Nondemented', 'Group'] = 0
data.loc[data.Group=='Demented', 'Group'] = 1
data.loc[data.Group=='Converted', 'Group'] = 2

#Converted the M/F to dummy variables
data.loc[data.Sex=='M', 'Sex'] = 0
data.loc[data.Sex=='F', 'Sex'] = 1

#ToDo
#Fill in SES and Mini Mental State missing sample values
#Use the mean of that category for the value
# Get rid of SES data 

data

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,Sex,Hand,Age,EDUC,SES,Mini Mental State,Clinical Dementia Rating,Estimated total Intracranial Volume,Normalize Whole Brain Volume,Atlas Scaling Factor
0,OAS2_0001,OAS2_0001_MR1,0,1,0,0,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,0,2,457,0,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,1,1,0,0,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,1,2,560,0,R,76,12,,28.0,0.5,1738,0.713,1.010
4,OAS2_0002,OAS2_0002_MR3,1,3,1895,0,R,80,12,,22.0,0.5,1698,0.701,1.034
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,OAS2_0185,OAS2_0185_MR2,1,2,842,0,R,82,16,1.0,28.0,0.5,1693,0.694,1.037
369,OAS2_0185,OAS2_0185_MR3,1,3,2297,0,R,86,16,1.0,26.0,0.5,1688,0.675,1.040
370,OAS2_0186,OAS2_0186_MR1,0,1,0,1,R,61,13,2.0,30.0,0.0,1319,0.801,1.331
371,OAS2_0186,OAS2_0186_MR2,0,2,763,1,R,63,13,2.0,30.0,0.0,1327,0.796,1.323


In [2]:
import numpy as np 
from sklearn.model_selection import train_test_split


In [3]:
#PCA might be a good technique to select predictors 

#note that PCA performs best when data is normalized (range b/w 0 and 1)

#It is possible to use categorical and continuous predictors 
#for a regression problem. My understanding is you need to make 
#dummy variables for the binary predictors. 

#Variables that we will need to deal with: 
# Hand, Visit, Subject ID, MRI ID

In [4]:
#Attempting PCA on data
#Hand is completely useless as it is identical for all samples
data_drop = data.drop(['Hand','Visit','Subject ID','MRI ID'], axis = 1) #axis = 1 means to drop column not row

#get rid of row 360 and 359 bc they are missing alot of data (both SES and MMS)
data_drop = data_drop.drop([360, 359])


#delete all data points that dont have SES in them (this is where they have NaN)
data_drop = data_drop.dropna()

In [5]:
#dementia status is what we want to predict - change this to single target 
group = data_drop[['Group']] 
group = group.astype('int')
group

Unnamed: 0,Group
0,0
1,0
5,0
6,0
7,0
...,...
368,1
369,1
370,0
371,0


In [6]:
data_drop = data_drop.drop(['Group'], axis = 1) #axis = 1 means to drop column not row

In [7]:
#get a list of columns in pandas object 
names_of_data = data_drop.columns.tolist()

#shuffle = false prevents data split being different everytime
X_train, X_test, y_train, y_test = train_test_split(data_drop, group, test_size=0.4, shuffle = False)

#split test into validate and test, again making sure the data is always the same for consistency
#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle = False)

#Normalizing the data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#running the actual PCA
from sklearn.decomposition import PCA

pca = PCA()
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

#relief f algorithm - sorting features 

In [8]:
data_drop.loc[355]

MR Delay                                 652
Sex                                        0
Age                                       81
EDUC                                      20
SES                                        1
Mini Mental State                         26
Clinical Dementia Rating                 0.5
Estimated total Intracranial Volume     1556
Normalize Whole Brain Volume           0.691
Atlas Scaling Factor                   1.128
Name: 355, dtype: object

In [9]:
data_drop.loc[354]

MR Delay                                   0
Sex                                        0
Age                                       79
EDUC                                      20
SES                                        1
Mini Mental State                         26
Clinical Dementia Rating                 0.5
Estimated total Intracranial Volume     1548
Normalize Whole Brain Volume           0.711
Atlas Scaling Factor                   1.134
Name: 354, dtype: object

In [10]:
explained_variance = pca.explained_variance_ratio_
print(len(explained_variance))
print(explained_variance)

10
[0.27582099 0.2259758  0.1513074  0.1293538  0.08646765 0.05392503
 0.02962122 0.02618717 0.02012601 0.00121493]


In [11]:
from sklearn.neural_network import MLPClassifier

In [12]:
neural_net = MLPClassifier(solver="sgd", activation = 'logistic', learning_rate_init = 0.005, momentum = 0.9, max_iter = 2000)

In [13]:
neural_net.fit(X_train, y_train.values.ravel())

MLPClassifier(activation='logistic', learning_rate_init=0.005, max_iter=2000,
              solver='sgd')

In [14]:
y_pred = neural_net.predict(X_test)
y_pred

array([1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 0])

In [15]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
results = classification_report(y_test, y_pred)

accuracy_score(y_test, y_pred)

[[72  0  0]
 [ 0 52  0]
 [ 9  7  2]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        72
           1       0.88      1.00      0.94        52
           2       1.00      0.11      0.20        18

    accuracy                           0.89       142
   macro avg       0.92      0.70      0.69       142
weighted avg       0.90      0.89      0.85       142



0.8873239436619719