In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import keras
from keras.models import Sequential
from keras.layers import *
from keras.utils import np_utils
from tensorflow.keras.optimizers import SGD

from sklearn.model_selection import cross_val_score, KFold, train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import OneHotEncoder
from scikeras.wrappers import KerasClassifier

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, History

from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [5]:
features_dr = 'C:/Users/norma/Desktop/BITIRME/GSE_OrtakData_NormR.csv'
labels_dr = 'C:/Users/norma/Desktop/BITIRME/gse_less_feature.csv'

features_df = pd.read_csv(features_dr, sep = ";")
labels_df = pd.read_csv(labels_dr, sep = ",", dtype='unicode')

features = features_df.drop('Class',axis=1)
labels = labels_df.iloc[:,:3]
labels_without_mci = labels_df.iloc[:,:2]

# **PCA**

## n_components = 5

In [None]:
pca = PCA(n_components = 5).fit(features)
principalComponents = pca.fit_transform(features)
columns = ['Pc' + str(i) for i in range(1,6)]
principalDf = pd.DataFrame(data = principalComponents
             , columns = columns)

## n_components = 114

In [6]:
features_df = pd.read_csv(features_dr, sep = ";")
features2 = features_df.drop('Class',axis=1)

pca2 = PCA(n_components = 114).fit(features)
principalComponents2 = pca2.fit_transform(features)
columns2 = ['Pc' + str(i) for i in range(1,115)]
principalDf2 = pd.DataFrame(data = principalComponents2
             , columns = columns2)

In [None]:
## MODEL

In [13]:
# Optimizer settings
epochs=60
learning_rate = 0.1
decay_rate = learning_rate / epochs
momentum = 0.8

sgd = SGD(learning_rate=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)

In [21]:
# repeat some of the initial values here so we make sure they were not changed
input_dim = 114

# let's create a function that creates the model (required for KerasClassifier) 
# while accepting the hyperparameters we want to tune 
# we also pass some default values such as optimizer='rmsprop'
def build_model(optimizer=sgd, init_mode='glorot_uniform'):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, kernel_initializer=init_mode, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(64, kernel_initializer=init_mode, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(3, kernel_initializer=init_mode, activation='softmax'))

    # compile model
    model.compile(loss='categorical_crossentropy', 
                  optimizer=optimizer, 
                  metrics=['accuracy'])

    return model

## Train test split with and without MCI class

In [16]:
x_train_n114_w, x_test_n114_w, y_train_n114_w, y_test_n114_w = train_test_split(principalDf2, labels, test_size= 0.2, random_state=42)
y_train_n114_w = y_train_n114_w.astype(int)
y_test_n114_w = y_test_n114_w.astype(int)

In [25]:
ckpt = keras.callbacks.ModelCheckpoint(
        filepath='C:/Users/norma/Desktop/BITIRME/Models/MLP/best_model_114w.{epoch:02d}-{accuracy:.4f}.h5',
        monitor='accuracy', save_best_only=True,verbose=1)

callbacks = [EarlyStopping(monitor='accuracy', patience=30, mode='min', min_delta=0.0001),
         ckpt]

model_n114w = build_model()

In [26]:
history114w = model_n114w.fit(
          x = np.asarray(x_train_n114_w).astype('float32'),
          y = y_train_n114_w,
          steps_per_epoch=len(principalDf2)//32,
          epochs=50,
          callbacks=[callbacks]
          )
model_n114w.save("finishModel.h5")

Epoch 1/50
Epoch 1: accuracy improved from -inf to 0.37760, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114w.01-0.3776.h5
Epoch 2/50
 1/39 [..............................] - ETA: 0s - loss: 1.1697 - accuracy: 0.3077
Epoch 2: accuracy improved from 0.37760 to 0.40139, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114w.02-0.4014.h5
Epoch 3/50
 1/39 [..............................] - ETA: 0s - loss: 1.0898 - accuracy: 0.4231
Epoch 3: accuracy improved from 0.40139 to 0.41031, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114w.03-0.4103.h5
Epoch 4/50
 1/39 [..............................] - ETA: 0s - loss: 1.0670 - accuracy: 0.5000
Epoch 4: accuracy improved from 0.41031 to 0.42616, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114w.04-0.4262.h5
Epoch 5/50
Epoch 5: accuracy did not improve from 0.42616
Epoch 6/50
Epoch 6: accuracy improved from 0.42616 to 0.46184, saving model to C:/Users/norma/De

In [27]:
y_pred_n114w = model_n114w.predict(x_test_n114_w)
y_pred_n114w = np.argmax(y_pred_n114w, axis=1)

class_d = 'C:/Users/norma/Desktop/BITIRME/GSE_OrtakData_NormR.csv'
class_df = pd.read_csv(class_d, sep = ';')
#creating instance of one-hot-encoder
encoder = OneHotEncoder(handle_unknown='ignore')

#perform one-hot encoding on 'team' column 
encoder_df = pd.DataFrame(encoder.fit_transform(class_df[['Class']]).toarray())
encoder_df.columns = ['AD','CTL','MCI']

y_test_n114_ = encoder.inverse_transform(y_test_n114_w).ravel()
y_test_n114_w = []
for i in y_test_n114_:
    if i == 'MCI':
        y_test_n114_w.append(2)
    elif i == 'AD':
        y_test_n114_w.append(0)
    elif i == 'CTL':
        y_test_n114_w.append(1)

In [36]:
print(accuracy_score(y_test_n114_w,y_pred_n114w))

0.4782608695652174


In [30]:
# confusion matrix
matrix = confusion_matrix(y_test_n114_w,y_pred_n114w, labels=[2,1,0])
print('Confusion matrix : \n',matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(y_test_n114_w,y_pred_n114w,labels=[2,1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
matrix = classification_report(y_test_n114_w,y_pred_n114w,labels=[1,0])
print('Classification report : \n',matrix)

Confusion matrix : 
 [[ 1 36 26]
 [ 2 58 28]
 [ 1 39 62]]


ValueError: too many values to unpack (expected 4)

In [31]:
### WITHOUT MCI

In [32]:
# x_train_n5w, x_test_n5w, y_train_n5w, y_test_n5w = train_test_split(principalDf, labels, test_size= 0.2, random_state=42)
# x_train_n5, x_test_n5, y_train_n5, y_test_n5 = train_test_split(principalDf, labels_without_mci, test_size= 0.2, random_state=42)
x_train_n114, x_test_n114, y_train_n114, y_test_n114 = train_test_split(principalDf2, labels_without_mci, test_size= 0.2, random_state=42)

In [33]:
y_train_n114 = y_train_n114.astype(int)
y_test_n114 = y_test_n114.astype(int)

In [35]:
# repeat some of the initial values here so we make sure they were not changed
input_dim = 114

# let's create a function that creates the model (required for KerasClassifier) 
# while accepting the hyperparameters we want to tune 
# we also pass some default values such as optimizer='rmsprop'
def build_model_2(optimizer=sgd, init_mode='glorot_uniform'):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, kernel_initializer=init_mode, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(64, kernel_initializer=init_mode, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(2, kernel_initializer=init_mode, activation='softmax'))

    # compile model
    model.compile(loss='categorical_crossentropy', 
                  optimizer=optimizer, 
                  metrics=['accuracy'])

    return model

In [37]:
ckpt = keras.callbacks.ModelCheckpoint(
        filepath='C:/Users/norma/Desktop/BITIRME/Models/MLP/best_model_114.{epoch:02d}-{accuracy:.4f}.h5',
        monitor='accuracy', save_best_only=True,verbose=1)

callbacks = [EarlyStopping(monitor='accuracy', patience=30, mode='min', min_delta=0.0001),
         ckpt]

model_n114 = build_model_2()

In [38]:
history114 = model_n114.fit(
          x = np.asarray(x_train_n114).astype('float32'),
          y = y_train_n114,
          steps_per_epoch=len(principalDf2)//32,
          epochs=50,
          callbacks=[callbacks]
          )
model_n114.save("finishModel.h5")

Epoch 1/50
 1/39 [..............................] - ETA: 11s - loss: 0.5449 - accuracy: 0.3077
Epoch 1: accuracy improved from -inf to 0.49653, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114.01-0.4965.h5
Epoch 2/50
 1/39 [..............................] - ETA: 0s - loss: 0.4568 - accuracy: 0.3846
Epoch 2: accuracy improved from 0.49653 to 0.49851, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114.02-0.4985.h5
Epoch 3/50
 1/39 [..............................] - ETA: 0s - loss: 0.4595 - accuracy: 0.7692
Epoch 3: accuracy improved from 0.49851 to 0.51140, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114.03-0.5114.h5
Epoch 4/50
 1/39 [..............................] - ETA: 0s - loss: 1557.4188 - accuracy: 0.5000
Epoch 4: accuracy improved from 0.51140 to 0.55897, saving model to C:/Users/norma/Desktop/BITIRME/Models/MLP\best_model_114.04-0.5590.h5
Epoch 5/50
 1/39 [..............................] - ETA: 0s - loss: 0.5

Epoch 35/50
 1/39 [..............................] - ETA: 0s - loss: 0.5594 - accuracy: 0.7692
Epoch 35: accuracy did not improve from 0.62537
Epoch 36/50
 1/39 [..............................] - ETA: 0s - loss: 0.4266 - accuracy: 0.6923
Epoch 36: accuracy did not improve from 0.62537
Epoch 37/50
 1/39 [..............................] - ETA: 0s - loss: 0.4266 - accuracy: 0.6923
Epoch 37: accuracy did not improve from 0.62537
Epoch 38/50
 1/39 [..............................] - ETA: 0s - loss: 0.5623 - accuracy: 0.4615
Epoch 38: accuracy did not improve from 0.62537
Epoch 39/50
Epoch 39: accuracy did not improve from 0.62537
Epoch 40/50
 1/39 [..............................] - ETA: 0s - loss: 0.6134 - accuracy: 0.5385
Epoch 40: accuracy did not improve from 0.62537


In [39]:
y_pred_n114 = model_n114.predict(x_test_n114)
y_pred_n114 = np.argmax(y_pred_n114, axis=1)

class_d = 'C:/Users/norma/Desktop/BITIRME/GSE_OrtakData_NormR.csv'
class_df = pd.read_csv(class_d, sep = ';')
#creating instance of one-hot-encoder
encoder = OneHotEncoder(handle_unknown='ignore')

#perform one-hot encoding on 'team' column 
encoder_df = pd.DataFrame(encoder.fit_transform(class_df[['Class']]).toarray())
encoder_df = encoder_df.iloc[:,:2]
encoder_df.columns = ['AD','CTL']

y_test_n114 = encoder.inverse_transform(y_test_n114).ravel()
y_test_n114_ = []

for i in y_test_n114:
    if i == 'AD':
        y_test_n114_.append(0)
    elif i == 'CTL':
        y_test_n114_.append(1)

ValueError: Shape of the passed X data is not correct. Expected 3 columns, got 2.

In [None]:
# confusion matrix
matrix = confusion_matrix(y_test_n114_w,y_pred_n114w, labels=[2,1,0])
print('Confusion matrix : \n',matrix)

# outcome values order in sklearn
tp, fn, fp, tn = confusion_matrix(y_test_n114_w,y_pred_n114w,labels=[2,1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

# classification report for precision, recall f1-score and accuracy
matrix = classification_report(y_test_n114_w,y_pred_n114w,labels=[1,0])
print('Classification report : \n',matrix)