# Alzheimer's Disease Classification

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

import os
from distutils.dir_util import copy_tree, remove_tree

from PIL import Image
from random import randint

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import matthews_corrcoef as MCC
from sklearn.metrics import balanced_accuracy_score as BAS
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow_addons as tfa
from keras.utils.vis_utils import plot_model
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Conv2D, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.densenet import DenseNet169
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator as IDG
from tensorflow.keras.layers import BatchNormalization, MaxPool2D, GlobalAveragePooling2D


print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.4.1


### Data Pre-Processing

In [3]:
base_dir = "/kaggle/input/alzheimers-dataset-4-class-of-images/Alzheimer_s Dataset/"
root_dir = "./"
test_dir = base_dir + "test/"
train_dir = base_dir + "train/"
work_dir = root_dir + "dataset/"

if os.path.exists(work_dir):
    remove_tree(work_dir)
    

os.mkdir(work_dir)
copy_tree(train_dir, work_dir)
copy_tree
(test_dir, work_dir)
print("Working Directory Contents:", os.listdir(work_dir))

Working Directory Contents: ['NonDemented', 'MildDemented', 'VeryMildDemented', 'ModerateDemented']


In [4]:
WORK_DIR = './dataset/'

CLASSES = [ 'NonDemented',
            'VeryMildDemented',
            'MildDemented',
            'ModerateDemented']

IMG_SIZE = 176
IMAGE_SIZE = [176, 176]
DIM = (IMG_SIZE, IMG_SIZE)

In [5]:


ZOOM = [.99, 1.01]
BRIGHT_RANGE = [0.8, 1.2]

work_dr = IDG(rescale = 1./255, brightness_range=BRIGHT_RANGE, zoom_range=ZOOM)

train_data_gen = work_dr.flow_from_directory(directory=WORK_DIR, target_size=DIM, batch_size=6500)

Found 5121 images belonging to 4 classes.


In [6]:
train_data, train_labels = train_data_gen.next()

In [7]:

print(train_data.shape, train_labels.shape)

(5121, 176, 176, 3) (5121, 4)


In [8]:
sm = SMOTE(random_state=42)

train_data, train_labels = sm.fit_resample(train_data.reshape(-1, IMG_SIZE * IMG_SIZE * 3), train_labels)

train_data = train_data.reshape(-1, IMG_SIZE, IMG_SIZE, 3)

print(train_data.shape, train_labels.shape)

(10240, 176, 176, 3) (10240, 4)




In [9]:
train_data, test_data, train_labels, test_labels = train_test_split(train_data, train_labels, test_size = 0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size = 0.2, random_state=42)

### Constructing a Convolutional Neural Network Architecture

In [10]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50V2, VGG16
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from tensorflow.keras.models import Model

# Input shape (you can modify this based on your data)
input_shape = (176, 176, 3)

# Load pre-trained ResNet50V2 without the top layers 
resnet50v2 = ResNet50V2(weights='imagenet', include_top=False, input_shape=input_shape)

# Load pre-trained VGG16 without the top layers
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers to prevent updating during training
for layer in resnet50v2.layers:
    layer.trainable = False

for layer in vgg16.layers:
    layer.trainable = False

# Define the input
input_tensor = Input(shape=input_shape)

# Pass the input through ResNet50V2 and VGG16
resnet_features = resnet50v2(input_tensor)
vgg16_features = vgg16(input_tensor)

# Flatten the output from both models
resnet_features = Flatten()(resnet_features)
vgg16_features = Flatten()(vgg16_features)

# Concatenate the outputs
combined_features = Concatenate()([resnet_features, vgg16_features])

# Add a fully connected layer for classification (adjust based on your number of classes)
output = Dense(256, activation='relu')(combined_features)
output = Dense(4, activation='softmax')(output)  # Assuming 10 classes
# output= Dense(1, activation='softmax')
# Create the final model
dual_model = Model(inputs=input_tensor, outputs=output)

# Compile the model
dual_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
dual_model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 176, 176, 3) 0                                            
__________________________________________________________________________________________________
resnet50v2 (Functional)         (None, 6, 6, 2048)   23564800    input_6[0][0]                    
__________________________________________________________________________________________________
vgg16 (Functional)              (None, 5, 5, 512)    14714688    input_6[0][0]                    
__________________________________________________________________________________________________
flatten_2 (Flatten)             (None, 73728) 

In [18]:
model = Sequential([
        Input(shape=(*IMAGE_SIZE
                     , 3)),
        Conv2D(16, 3, activation='relu', padding='same'),
        Conv2D(16, 3, activation='relu', padding='same'),
        MaxPool2D(),
        Conv2D(32, 3, activation='relu', padding='same'),
        Conv2D(32, 3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPool2D(),
        Conv2D(64, 3, activation='relu', padding='same'),
        Conv2D(64, 3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPool2D(),
        Conv2D(128, 3, activation='relu', padding='same'),
        Conv2D(128, 3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPool2D(),
        Dropout(0.2),
        Conv2D(256, 3, activation='relu', padding='same'),
        Conv2D(256, 3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPool2D(),
        Dropout(0.2),
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.7),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.6),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(4, activation='softmax')        
    ], name = "cnn_model")

In [11]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet169, VGG16
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from tensorflow.keras.models import Model

# Input shape (you can modify this based on your data)
input_shape = (176, 176, 3)

# Load pre-trained DenseNet169 without the top layers
densenet169 = DenseNet169(weights='imagenet', include_top=False, input_shape=input_shape)

# Load pre-trained VGG16 without the top layers
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers to prevent updating during training
for layer in densenet169.layers:
    layer.trainable = False

for layer in vgg16.layers:
    layer.trainable = False

# Define the input
input_tensor = Input(shape=input_shape)

# Pass the input through DenseNet169 and VGG16
densenet_features = densenet169(input_tensor)
vgg16_features = vgg16(input_tensor)

# Flatten the output from both models
densenet_features = Flatten()(densenet_features)
vgg16_features = Flatten()(vgg16_features)

# Concatenate the outputs
combined_features = Concatenate()([densenet_features, vgg16_features])

# Add a fully connected layer for classification (adjust based on your number of classes)
output = Dense(1024, activation='relu')(combined_features)
output = Dense(1024, activation='relu')(output)
output = Dense(512, activation='relu')(output)
output = Dense(256, activation='relu')(output)
output = Dense(128, activation='relu')(output)
output = Dense(64, activation='relu')(output)
output = Dense(4, activation='softmax')(output)  # Adjust the number of classes as needed

# Create the final model
dense_vgg_model = Model(inputs=input_tensor, outputs=output)

# Compile the model
dense_vgg_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
dense_vgg_model.summary()


Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            [(None, 176, 176, 3) 0                                            
__________________________________________________________________________________________________
densenet169 (Functional)        (None, 5, 5, 1664)   12642880    input_9[0][0]                    
__________________________________________________________________________________________________
vgg16 (Functional)              (None, 5, 5, 512)    14714688    input_9[0][0]                    
__________________________________________________________________________________________________
flatten_4 (Flatten)             (None, 41600)        0           densenet169[0][0]                
____________________________________________________________________________________________

In [12]:
class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('val_acc') > 0.99:
            print("\nReached accuracy threshold! Terminating training.")
            self.model.stop_training = True
            
my_callback = MyCallback()
early_stopping = EarlyStopping(monitor='val_loss', patience=2)

In [13]:
METRICS = [tf.keras.metrics.CategoricalAccuracy(name='acc'),
           tf.keras.metrics.AUC(name='auc'), 
           tfa.metrics.F1Score(num_classes=4)]

CALLBACKS = [my_callback]
    
# model.compile(optimizer='adam',
#               loss=tf.losses.CategoricalCrossentropy(),
#               metrics=METRICS)

# model.summary()

### Training & Testing the Model

In [14]:
EPOCHS = 15   

history = dense_vgg_model.fit(train_data, train_labels, validation_data=(val_data, val_labels), epochs=EPOCHS)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [1]:

fig, mat = plt.subplots(1, 3, figsize = (30, 5))
mat = mat.ravel()

for i, metric in enumerate(["acc", "auc", "loss"]):
    mat[i].plot(history.history[metric])
    mat[i].plot(history.history["val_" + metric])
    mat[i].set_title("Model {}".format(metric))
    mat[i].set_xlabel("Epochs")
    mat[i].set_ylabel(metric)
    mat[i].legend(["train", "val"])

NameError: name 'plt' is not defined

In [16]:

test_scores = dense_vgg_model.evaluate(test_data, test_labels)
print("Testing Accuracy: %.2f%%"%(test_scores[1] * 100))

Testing Accuracy: 89.16%


In [None]:
pred_labels = dual_model.predict(test_data)

In [None]:

def roundoff(arr):
    arr[np.argwhere(arr != arr.max())] = 0
    arr[np.argwhere(arr == arr.max())] = 1
    return arr

for labels in pred_labels:
    labels = roundoff(labels)

print(classification_report(test_labels, pred_labels, target_names=CLASSES))

In [None]:

pred_ls = np.argmax(pred_labels, axis=1)
test_ls = np.argmax(test_labels, axis=1)

conf_arr = confusion_matrix(test_ls, pred_ls)

plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

ax = sns.heatmap(conf_arr, cmap='Greens', annot=True, fmt='d', xticklabels=CLASSES, yticklabels=CLASSES)

plt.title('Alzheimer\'s Disease Diagnosis')
plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.show(ax)

In [None]:

print("Balanced Accuracy Score: {} %".format(round(BAS(test_ls, pred_ls) * 100, 2)))
print("Matthew's Correlation Coefficient: {} %".format(round(MCC(test_ls, pred_ls) * 100, 2)))

In [18]:

model_dir = work_dir + "alzheimer_cnn_model"
dense_vgg_model.save(model_dir, save_format='h5')
os.listdir(work_dir)

['NonDemented',
 'alzheimer_cnn_model',
 'MildDemented',
 'VeryMildDemented',
 'ModerateDemented']

In [None]:
tf.keras.utils.plot_model(dual_model, to_file="cnn_model.png", show_shapes=True)

### Using the InceptionV3 model as a base model for the task

In [None]:
# ivm = Sequential([
#     InceptionResNetV2(input_shape=(176,176,3),include_top=False,weights='imagenet',pooling='same'),
#     MaxPool2D(),
#     Flatten(),
#     Dense(64, activation='relu'),
#     BatchNormalization(),
#     Dropout(0.3),
#     Dense(4,activation='softmax')
# ],name="ivm_model")

In [None]:
# ivm.compile( optimizer='adam',loss='categorical_crossentropy',metrics= METRICS)

# ivm.summary()

In [None]:

# EPOCHS = 30

# history_ivm = ivm.fit(train_data, train_labels, validation_data=(val_data, val_labels), callbacks=CALLBACKS, epochs=EPOCHS)

In [None]:


# fig, mat = plt.subplots(1, 3, figsize = (30, 5))
# mat = mat.ravel()

# for i, metric in enumerate(["acc", "auc", "loss"]):
#     mat[i].plot(history_ivm.history[metric])
#     mat[i].plot(history_ivm.history["val_" + metric])
#     mat[i].set_title("Model {}".format(metric))
#     mat[i].set_xlabel("Epochs")
#     mat[i].set_ylabel(metric)
#     mat[i].legend(["train", "val"])

In [None]:
# test_scores = ivm.evaluate(test_data, test_labels)

# print("Testing Accuracy: %.2f%%"%(test_scores[1] * 100))

In [None]:
# pred_labels = ivm.predict(test_data)

In [None]:

# def roundoff(arr):
#     arr[np.argwhere(arr != arr.max())] = 0
#     arr[np.argwhere(arr == arr.max())] = 1
#     return arr

# for labels in pred_labels:
#     labels = roundoff(labels)

# print(classification_report(test_labels, pred_labels, target_names=CLASSES))

In [None]:


# pred_ls = np.argmax(pred_labels, axis=1)
# test_ls = np.argmax(test_labels, axis=1)

# conf_arr = confusion_matrix(test_ls, pred_ls)

# plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

# ax = sns.heatmap(conf_arr, cmap='Greens', annot=True, fmt='d', xticklabels=CLASSES, yticklabels=CLASSES)

# plt.title('Alzheimer\'s Disease Diagnosis using InceptionResnetV2')
# plt.xlabel('Prediction')
# plt.ylabel('Truth')
# plt.show(ax)

In [None]:
# print("Balanced Accuracy Score: {} %".format(round(BAS(test_ls, pred_ls) * 100, 2)))
# print("Matthew's Correlation Coefficient: {} %".format(round(MCC(test_ls, pred_ls) * 100, 2)))

In [None]:
# model_dir2 = work_dir + "alzheimer_inceptionresnet"
# model.save(model_dir2, save_format='h5')
# os.listdir(work_dir)

In [None]:
# tf.keras.utils.plot_model(ivm, to_file="res_model.png", show_shapes=True)

#  **Using DenseNet 169**

In [None]:
dnm =  Sequential(
        [DenseNet169(input_shape=(176,176,3),include_top=False,weights='imagenet',pooling='same'),
        MaxPool2D(),
        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3), 
        Dense(4,activation='softmax')
        ],name="dense_model")



In [None]:
dnm.compile( optimizer='adam',loss='categorical_crossentropy',metrics= METRICS)

dnm.summary()

In [None]:

EPOCHS = 30

history_dnm = dnm.fit(train_data, train_labels, validation_data=(val_data, val_labels), callbacks=CALLBACKS, epochs=EPOCHS)

In [None]:

fig, ax = plt.subplots(1, 3, figsize = (30, 5))
ax = ax.ravel()

for i, metric in enumerate(["acc", "auc", "loss"]):
    ax[i].plot(history_dnm.history[metric])
    ax[i].plot(history_dnm.history["val_" + metric])
    ax[i].set_title("Model {}".format(metric))
    ax[i].set_xlabel("Epochs")
    ax[i].set_ylabel(metric)
    ax[i].legend(["train", "val"])

In [None]:
test_scores = dnm.evaluate(test_data, test_labels)

print("Testing Accuracy: %.2f%%"%(test_scores[1] * 100))

In [None]:
pred_labels = dnm.predict(test_data)

In [None]:

def roundoff(arr):
    
    arr[np.argwhere(arr != arr.max())] = 0
    arr[np.argwhere(arr == arr.max())] = 1
    return arr

for labels in pred_labels:
    labels = roundoff(labels)

print(classification_report(test_labels, pred_labels, target_names=CLASSES))

In [None]:


pred_ls = np.argmax(pred_labels, axis=1)
test_ls = np.argmax(test_labels, axis=1)

conf_arr = confusion_matrix(test_ls, pred_ls)

plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

ax = sns.heatmap(conf_arr, cmap='Greens', annot=True, fmt='d', xticklabels=CLASSES, yticklabels=CLASSES)

plt.title('Alzheimer\'s Disease Diagnosis using DenseNet 169')
plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.show(ax)

In [None]:

print("Balanced Accuracy Score: {} %".format(round(BAS(test_ls, pred_ls) * 100, 2)))
print("Matthew's Correlation Coefficient: {} %".format(round(MCC(test_ls, pred_ls) * 100, 2)))

In [None]:
model_dir = work_dir + "alzheimer_densenet"
model.save(model_dir, save_format='h5')
os.listdir(work_dir)

In [None]:
tf.keras.utils.plot_model(dnm, to_file="dnm_model.png", show_shapes=True)

# Using ResNet50V2

In [None]:
rnm =  Sequential([
    ResNet50V2(input_shape=(176,176,3),include_top=False,weights='imagenet',pooling='same'),
    MaxPool2D(),
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3), 
    Dense(4,activation='softmax')],name="rnm_model")


In [None]:
rnm.compile( optimizer='adam',loss='categorical_crossentropy',metrics= METRICS)

rnm.summary()

In [None]:

EPOCHS = 30

history_rnm = rnm.fit(train_data, train_labels, validation_data=(val_data, val_labels), callbacks=CALLBACKS, epochs=EPOCHS)

In [None]:


fig, ax = plt.subplots(1, 3, figsize = (30, 5))
ax = ax.ravel()

for i, metric in enumerate(["acc", "auc", "loss"]):
    ax[i].plot(history_rnm.history[metric])
    ax[i].plot(history_rnm.history["val_" + metric])
    ax[i].set_title("Model {}".format(metric))
    ax[i].set_xlabel("Epochs")
    ax[i].set_ylabel(metric)
    ax[i].legend(["train", "val"])

In [None]:
test_scores = rnm.evaluate(test_data, test_labels)

print("Testing Accuracy: %.2f%%"%(test_scores[1] * 100))

In [None]:
pred_labels = rnm.predict(test_data)

In [None]:

def roundoff(arr):
    """To round off according to the argmax of each predicted label array. """
    arr[np.argwhere(arr != arr.max())] = 0
    arr[np.argwhere(arr == arr.max())] = 1
    return arr

for labels in pred_labels:
    labels = roundoff(labels)

print(classification_report(test_labels, pred_labels, target_names=CLASSES))

In [None]:


pred_ls = np.argmax(pred_labels, axis=1)
test_ls = np.argmax(test_labels, axis=1)

conf_arr = confusion_matrix(test_ls, pred_ls)

plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

ax = sns.heatmap(conf_arr, cmap='Greens', annot=True, fmt='d', xticklabels=CLASSES, yticklabels=CLASSES)

plt.title('Alzheimer\'s Disease Diagnosis using ResNet50 V2')
plt.xlabel('Prediction')
plt.ylabel('Truth')
plt.show(ax)

In [None]:

print("Balanced Accuracy Score: {} %".format(round(BAS(test_ls, pred_ls) * 100, 2)))
print("Matthew's Correlation Coefficient: {} %".format(round(MCC(test_ls, pred_ls) * 100, 2)))

In [None]:
model_dir = work_dir + "alzheimer_resnet50V2"
model.save(model_dir, save_format='h5')
os.listdir(work_dir)

In [None]:
tf.keras.utils.plot_model(rnm, to_file="rnm_model.png", show_shapes=True)