In [107]:
import os

import tensorflow as tf
from tensorflow.keras import layers, models
from keras.callbacks import Callback

from sklearn.model_selection import cross_validate, StratifiedKFold, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, classification_report

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

SEED = 42
EPOCHS = 100
BATCH_SIZE = 25
SCORING = {
    'accuracy': make_scorer(accuracy_score),
    'f1': make_scorer(f1_score, average='weighted', zero_division=0),
    'recall': make_scorer(recall_score, average='weighted', zero_division=0),
    'precision': make_scorer(precision_score, average='weighted', zero_division=0)
}
STRATIFIED_K_FOLD = StratifiedKFold(n_splits=5) 


### ###########################################
### Metric for CNN model
### ###########################################

class MetricsCallback(Callback):
    def __init__(self, validation_data):
        super(MetricsCallback, self).__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        x_val, y_val = self.validation_data
        y_pred = self.model.predict(x_val)
        
        accuracy = accuracy_score(y_val, y_pred.argmax(axis=1))
        f1 = f1_score(y_val, y_pred.argmax(axis=1), average='weighted', zero_division=0)
        recall = recall_score(y_val, y_pred.argmax(axis=1), average='weighted', zero_division=0)
        precision = precision_score(y_val, y_pred.argmax(axis=1), average='weighted', zero_division=0)

        logs['val_accuracy'] = accuracy
        logs['val_f1'] = f1
        logs['val_recall'] = recall
        logs['val_precision'] = precision
        print(" - val_accuracy: {:.4f} - val_f1: {:.4f} - val_recall: {:.4f} - val_precision: {:.4f}".format(accuracy, f1, recall, precision))


### ###########################################
### Model creation functions
### ###########################################

def create_cnn(height=256, width=256, channels=1, num_classes=4):
    cnn_model = models.Sequential()

    cnn_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(height, width, channels)))
    cnn_model.add(layers.MaxPooling2D((2, 2)))
    cnn_model.add(layers.Dropout(0.25))

    cnn_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    cnn_model.add(layers.MaxPooling2D((2, 2)))
    cnn_model.add(layers.Dropout(0.25))

    cnn_model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    cnn_model.add(layers.MaxPooling2D((2, 2)))
    cnn_model.add(layers.Dropout(0.25))

    # Flatten the output of the convolutional layers
    cnn_model.add(layers.Flatten())

    # Fully connected layer for classification
    cnn_model.add(layers.Dense(512, activation='relu'))
    cnn_model.add(layers.Dropout(0.5))
    cnn_model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile the model
    cnn_model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    
    return cnn_model


def create_feature_extraction_cnn(input_shape):
    model = models.Sequential()
    
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Flatten())

    return model


### ###########################################
### Data loading and processing functions
### ###########################################
    
def load_data(data_directory):
    images_list = []
    labels_list = []

    # iterate over the list of subfolders (i.e. categories)
    categories = os.listdir(data_directory)
    for category in categories:
        category_path = os.path.join(data_directory, category)
        
        for image_name in os.listdir(category_path):
            image_path = os.path.join(category_path, image_name)
            
            # load and preprocess the image
            img = Image.open(image_path).convert('L')  # convert to greyscale
            img = img.resize((256, 256))               # resize to 256x256
            img = np.array(img)                        # convert to numpy array
            
            # append the image and its label to the lists
            images_list.append(img)
            labels_list.append(category)

    # stack the images into a single numpy array to get the dimensions (num_images, 256, 256)
    images_array = np.stack(images_list)

    # conver the labels list to a numpy array
    labels_array = np.array(labels_list)

    return images_array, labels_array

def preprocess_images(images_array):
    # normalise pixel values to be between [0, 1]
    images_array = images_array.astype('float32') / 255.0
    # reshape the images array to include a single channel (greyscale)
    images_array = np.expand_dims(images_array, axis=-1)

    return images_array


### ###########################################
### Summary and detail utility functions
### ###########################################

def print_cross_validation_scores(cv_scores):
    for score in SCORING:
        scores_per_fold = [round(x, 2) for x in cv_scores[f'test_{score}']]
        mean_score = round(cv_scores[f'test_{score}'].mean(), 2)
        print(f"{score.capitalize()} scores for each fold: {scores_per_fold}")
        print(f"Mean {score}: {mean_score}\n")


def print_summary_of_array(array, name):
    print(f"{name} length: {len(array)}")
    print(f"Shape of {name}:", array.shape)


def plot_history(history):
    # plot training & validation accuracy values
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

    # plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

In [67]:
# Load data
X_train, y_train = load_data("data/train")
print("Training data before split:")
print_summary_of_array(X_train, "X_train")
print_summary_of_array(y_train, "y_train")

X_test, y_test = load_data("data/test")
print("\nTest data:")
print_summary_of_array(X_test, "X_test")
print_summary_of_array(y_test, "y_test")

Training data before split:
X_train length: 5120
Shape of X_train: (5120, 256, 256)
y_train length: 5120
Shape of y_train: (5120,)

Test data:
X_test length: 40
Shape of X_test: (40, 256, 256)
y_test length: 40
Shape of y_test: (40,)


In [68]:
# Preprocess images
X_train = preprocess_images(X_train)
X_test = preprocess_images(X_test)

In [69]:
input_shape = (256, 256, 1)
feature_extraction_model = create_feature_extraction_cnn(input_shape)

train_features = feature_extraction_model.predict(X_train)
print_summary_of_array(train_features, "train_features")

test_features = feature_extraction_model.predict(X_test)
print_summary_of_array(test_features, "test_features")

num_features = train_features.shape[1]

train_features length: 5120
Shape of train_features: (5120, 115200)
test_features length: 40
Shape of test_features: (40, 115200)


In [70]:
# define a dictionary to map string labels to numerical labels
label_mapping = {label: index for index, label in enumerate(np.unique(y_train))}

# encode labels
y_train_encoded = np.array([label_mapping[label] for label in y_train])
y_test_encoded = np.array([label_mapping[label] for label in y_test])

In [102]:
reverse_label_mapping = {index: label for label, index in label_mapping.items()}

## KNN Model

### Cross-Validation Evaluation

In [71]:
n_neighbors = 3
knn_model = KNeighborsClassifier(n_neighbors=n_neighbors)

# flatten the extracted features for KNN input
X_train_knn = train_features.reshape((train_features.shape[0], -1))
X_test_knn = test_features.reshape((test_features.shape[0], -1))

cv_scores_knn = cross_validate(knn_model, X_train_knn, y_train_encoded, cv=STRATIFIED_K_FOLD, scoring=SCORING)

print_cross_validation_scores(cv_scores_knn)

Accuracy scores for each fold: [1.0, 0.99, 0.99, 1.0, 0.99]
Mean accuracy: 0.99

F1 scores for each fold: [1.0, 0.99, 0.99, 1.0, 0.99]
Mean f1: 0.99

Recall scores for each fold: [1.0, 0.99, 0.99, 1.0, 0.99]
Mean recall: 0.99

Precision scores for each fold: [1.0, 0.99, 0.99, 1.0, 0.99]
Mean precision: 0.99



### Evaluation on Unseen Data

In [72]:
# Train the KNN model
knn_model.fit(X_train_knn, y_train_encoded)

In [105]:
# Make predictions on the unseen test data
predictions_knn = knn_model.predict(test_features)

# Evaluate the model's performance
accuracy_knn = accuracy_score(y_test_encoded, predictions_knn)
precision_knn = precision_score(y_test_encoded, predictions_knn, average='weighted', zero_division=0)
recall_knn = recall_score(y_test_encoded, predictions_knn, average='weighted', zero_division=0)
f1_knn = f1_score(y_test_encoded, predictions_knn, average='weighted', zero_division=0)

print(f"Accuracy: {round(accuracy_knn, 2)}")
print(f"Precision: {round(precision_knn, 2)}")
print(f"Recall: {round(recall_knn, 2)}")
print(f"F1 Score: {round(f1_knn, 2)}")

report_knn = classification_report(y_test_encoded, predictions_knn, target_names=reverse_label_mapping.values())
print(report_knn)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
                  precision    recall  f1-score   support

    MildDemented       1.00      1.00      1.00        10
ModerateDemented       1.00      1.00      1.00        10
     NonDemented       1.00      1.00      1.00        10
VeryMildDemented       1.00      1.00      1.00        10

        accuracy                           1.00        40
       macro avg       1.00      1.00      1.00        40
    weighted avg       1.00      1.00      1.00        40



## Random Forest Model

### Cross-Validation Evaluation

In [74]:
n_estimators = 100
rf_model = RandomForestClassifier(n_estimators=n_estimators, random_state=SEED)

# Flatten the extracted features for Random Forest input
X_train_rf = train_features.reshape((train_features.shape[0], -1))
X_test_rf = test_features.reshape((test_features.shape[0], -1))

cv_scores_rf = cross_validate(rf_model, X_train_rf, y_train_encoded, cv=STRATIFIED_K_FOLD, scoring=SCORING)

print_cross_validation_scores(cv_scores_rf)

Accuracy scores for each fold: [0.89, 0.88, 0.89, 0.89, 0.89]
Mean accuracy: 0.89

F1 scores for each fold: [0.88, 0.88, 0.88, 0.89, 0.88]
Mean f1: 0.88

Recall scores for each fold: [0.89, 0.88, 0.89, 0.89, 0.89]
Mean recall: 0.89

Precision scores for each fold: [0.89, 0.88, 0.89, 0.89, 0.89]
Mean precision: 0.89



### Evaluation on Unseen Data

In [75]:
# Train the Random Forest model
rf_model.fit(X_train_rf, y_train_encoded)

In [104]:
# Make predictions on the unseen test data
predictions_rf = rf_model.predict(test_features)

# Evaluate the model's performance
accuracy_rf = accuracy_score(y_test_encoded, predictions_rf)
precision_rf = precision_score(y_test_encoded, predictions_rf, average='weighted', zero_division=0)
recall_rf = recall_score(y_test_encoded, predictions_rf, average='weighted', zero_division=0)
f1_rf = f1_score(y_test_encoded, predictions_rf, average='weighted', zero_division=0)

print(f"Accuracy: {round(accuracy_rf, 2)}")
print(f"Precision: {round(precision_rf, 2)}")
print(f"Recall: {round(recall_rf, 2)}")
print(f"F1 Score: {round(f1_rf, 2)}")

report_rf = classification_report(y_test_encoded, predictions_rf, target_names=reverse_label_mapping.values())
print(report_rf)

Accuracy: 0.72
Precision: 0.8
Recall: 0.72
F1 Score: 0.66
                  precision    recall  f1-score   support

    MildDemented       0.83      1.00      0.91        10
ModerateDemented       1.00      0.10      0.18        10
     NonDemented       0.83      1.00      0.91        10
VeryMildDemented       0.53      0.80      0.64        10

        accuracy                           0.73        40
       macro avg       0.80      0.73      0.66        40
    weighted avg       0.80      0.72      0.66        40



## CNN Model

In this section, we will train and evaluate the performance of a CNN to predict on the test dataset. This will not make use of the feature extraction.

### Cross-Validation Evaluation

In [None]:
cnn_epochs = 10
cnn_model = create_cnn()

num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True)

accuracy_scores = []
f1_scores = []
recall_scores = []
precision_scores = []

# Iterate over the folds
for train_index, val_index in kf.split(X_train):
    # Split data into training and validation sets
    fold_X_train, fold_X_val = X_train[train_index], X_train[val_index]
    fold_y_train, fold_y_val = y_train_encoded[train_index], y_train_encoded[val_index]

    metrics_callback = MetricsCallback(validation_data=(fold_X_val, fold_y_val))

    # Train the model on the training set for this fold
    history = cnn_model.fit(fold_X_train, fold_y_train, epochs=cnn_epochs, batch_size=BATCH_SIZE, validation_data=(fold_X_val, fold_y_val), verbose=0, callbacks=[metrics_callback])

    # After training, you can access the accuracy, F1, recall, and precision scores from the history object
    val_accuracy = history.history['val_accuracy']
    val_f1 = history.history['val_f1']
    val_recall = history.history['val_recall']
    val_precision = history.history['val_precision']

    # Store evaluation metrics for this fold
    accuracy_scores.append(val_accuracy)
    f1_scores.append(val_f1)
    recall_scores.append(val_recall)
    precision_scores.append(val_precision)



In [96]:
# Calculate average scores across all folds
avg_accuracy_cnn = np.mean(accuracy_scores)
avg_f1_cnn = np.mean(f1_scores)
avg_recall_cnn = np.mean(recall_scores)
avg_precision_cnn = np.mean(precision_scores)

print("Average Accuracy:", round(avg_accuracy_cnn, 2))
print("Average F1 Score:", round(avg_f1_cnn, 2))
print("Average Recall:", round(avg_recall_cnn, 2))
print("Average Precision:", round(avg_precision_cnn, 2))

Average Accuracy: 0.96
Average F1 Score: 0.95
Average Recall: 0.96
Average Precision: 0.95


### Evaluation on Unseen Data

In [111]:
test_loss_cnn, test_accuracy_cnn = cnn_model.evaluate(X_test, y_test_encoded, verbose=0)

# Predict classes for test data
predictions_cnn = cnn_model.predict(X_test)
y_pred_classes_cnn = np.argmax(predictions_cnn, axis=1)

# Decode labels
y_pred_classes_decoded = np.array([reverse_label_mapping[index] for index in y_pred_classes_cnn])
y_test_decoded = np.array([reverse_label_mapping[index] for index in y_test_encoded])

report_cnn = classification_report(y_test_decoded, y_pred_classes_decoded)

print("CNN test Loss:", round(test_loss_cnn, 2))
print("CNN test Accuracy:", round(test_accuracy_cnn))
print("CNN Metrics:")
print(report_cnn)

CNN test Loss: 0.0
CNN test Accuracy: 1
CNN Metrics:
                  precision    recall  f1-score   support

    MildDemented       1.00      1.00      1.00        10
ModerateDemented       1.00      1.00      1.00        10
     NonDemented       1.00      1.00      1.00        10
VeryMildDemented       1.00      1.00      1.00        10

        accuracy                           1.00        40
       macro avg       1.00      1.00      1.00        40
    weighted avg       1.00      1.00      1.00        40



## Overall Evaluation

In [113]:
def classification_report_df(y_true, y_pred, labels, target_names):
    report = classification_report(y_true, y_pred, labels=labels, target_names=target_names, output_dict=True)
    report_df = pd.DataFrame(report).transpose()
    return report_df

labels = np.unique(y_test_encoded)
report_rf = classification_report_df(y_test_encoded, predictions_rf, labels=labels, target_names=reverse_label_mapping.values())
report_knn = classification_report_df(y_test_encoded, predictions_knn, labels=labels, target_names=reverse_label_mapping.values())
report_cnn = classification_report_df(y_test_encoded, y_pred_classes_cnn, labels=labels, target_names=reverse_label_mapping.values())

# add the name of the model as a column
report_rf['Model'] = 'Random Forest'
report_knn['Model'] = 'KNN'
report_cnn['Model'] = 'CNN'

comparison_df = pd.concat([report_rf, report_knn, report_cnn])
comparison_df.drop(columns=['support'], inplace=True)

print(comparison_df)

                  precision  recall  f1-score          Model
MildDemented       0.833333   1.000  0.909091  Random Forest
ModerateDemented   1.000000   0.100  0.181818  Random Forest
NonDemented        0.833333   1.000  0.909091  Random Forest
VeryMildDemented   0.533333   0.800  0.640000  Random Forest
accuracy           0.725000   0.725  0.725000  Random Forest
macro avg          0.800000   0.725  0.660000  Random Forest
weighted avg       0.800000   0.725  0.660000  Random Forest
MildDemented       1.000000   1.000  1.000000            KNN
ModerateDemented   1.000000   1.000  1.000000            KNN
NonDemented        1.000000   1.000  1.000000            KNN
VeryMildDemented   1.000000   1.000  1.000000            KNN
accuracy           1.000000   1.000  1.000000            KNN
macro avg          1.000000   1.000  1.000000            KNN
weighted avg       1.000000   1.000  1.000000            KNN
MildDemented       1.000000   1.000  1.000000            CNN
ModerateDemented   1.000