In [None]:
"""
Filename: mlp_model.ipynb
Description: MRI brain images classification analysis with MLP model

Author: Ng, Wee Ding
Date Created: 2024-11-30
Last Modified: 2024-12-06
Version: 1.0

License: MIT
"""

In [None]:
import os
import numpy as np
from datetime import datetime 
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
from skimage.color import rgb2gray
from sklearn.decomposition import PCA, KernelPCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize, StandardScaler

matplotlib.style.use('ggplot')

np.__version__

In [None]:
training = "test_data/Training" # MODIFY TO CHANGE TO data folder
testing = "test_data/Testing" # MODIFY TO CHANGE TO data folder
training_dir = os.listdir(training)
testing_dir = os.listdir(testing)

IMG_SIZE = 128

x_vectors = []
y_vectors = []
y_count=0
for dir in training_dir:
    #save_path = 'cleaned/Training/'+ dir
    path = os.path.join(training,dir)
    image_dir = os.listdir(path)
    y_count=y_count+1
    print(f"Label: {y_count} = {path}")
    
    for img in image_dir:
        image = Image.open(os.path.join(path,img))
        # Ensure the image is in grayscale mode ('L')
        if image.mode != "L":
            image = image.convert("L")
        image_resized = image.resize((IMG_SIZE, IMG_SIZE))
        image_resized_array = np.array(image_resized)
        image_resized_array = image_resized_array/255.0 #normalize the image
        y_vectors.append(y_count)
        _image_vector = image_resized_array.flatten()
        x_vectors.append(_image_vector)
        #print(f"Filename: {path}\\{img}, Size: {image.size}, Mode: {image.mode}, Format: {image.format}")
y_train = np.vstack(y_vectors)
X_train = np.vstack(x_vectors)

x_vectors = []
y_vectors = []
y_count=0
for dir in testing_dir:
    #save_path = 'cleaned/Testing/'+ dir
    path = os.path.join(testing,dir)
    image_dir = os.listdir(path)
    y_count=y_count+1
    print(f"Label: {y_count} = {path}")
    
    for img in image_dir:
        image = Image.open(os.path.join(path,img))
        # Ensure the image is in grayscale mode ('L')
        if image.mode != "L":
            image = image.convert("L")
        image_resized = image.resize((IMG_SIZE, IMG_SIZE))
        image_resized_array = np.array(image_resized)
        image_resized_array = image_resized_array/255.0 #normalize the image
        y_vectors.append(y_count)
        _image_vector = image_resized_array.flatten()
        x_vectors.append(_image_vector)
        #print(f"Filename: {path}\\{img}, Size: {image.size}, Mode: {image.mode}, Format: {image.format}")

y_test = np.vstack(y_vectors)
X_test = np.vstack(x_vectors)




In [None]:


pca = PCA(n_components=2)
kernel_pca = KernelPCA(
    n_components=None, kernel="rbf", gamma=10, fit_inverse_transform=True, alpha=0.1
)

X_test_pca = pca.fit(X_train).transform(X_test)
X_test_kernel_pca = kernel_pca.fit(X_train).transform(X_test)

X_train_pca = pca.fit(X_train).transform(X_train)
X_train_kernel_pca = kernel_pca.fit(X_train).transform(X_train)

In [None]:
#fig, (pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(
#    ncols=2, figsize=(7, 4)
#)

fig, (pca_proj_ax) = plt.subplots(
    ncols=1, figsize=(5, 3)
)

# Adjust spacing between subplots
fig.subplots_adjust(wspace=0.4, hspace=0.3)  # Increase horizontal and vertical spacing


pca_proj_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test, cmap="plasma")
pca_proj_ax.set_ylabel("Principal component #1")
pca_proj_ax.set_xlabel("Principal component #0")
pca_proj_ax.set_title("Projection of testing data\n using PCA")

#kernel_pca_proj_ax.scatter(X_test_kernel_pca[:, 0], X_test_kernel_pca[:, 1], c=y_test, cmap="plasma")
#kernel_pca_proj_ax.set_ylabel("Principal component #1")
#kernel_pca_proj_ax.set_xlabel("Principal component #0")
#_ = kernel_pca_proj_ax.set_title("Projection of testing data\n using KernelPCA")

In [None]:
#fig, (pca_proj_ax, kernel_pca_proj_ax) = plt.subplots(
#    ncols=2, figsize=(7, 4)
#)

fig, (pca_proj_ax, pca_proj_ax1) = plt.subplots(
    ncols=2, figsize=(10, 3)
)


# Adjust spacing between subplots
fig.subplots_adjust(wspace=0.4, hspace=0.3)  # Increase horizontal and vertical spacing

pca_proj_ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c=y_train, cmap="plasma")
pca_proj_ax.set_ylabel("Principal component #1")
pca_proj_ax.set_xlabel("Principal component #0")
pca_proj_ax.set_title("Projection of training data\n using PCA")

pca_proj_ax1.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test, cmap="plasma")
pca_proj_ax1.set_ylabel("Principal component #1")
pca_proj_ax1.set_xlabel("Principal component #0")
pca_proj_ax1.set_title("Projection of testing data\n using PCA")

#kernel_pca_proj_ax.scatter(X_train_kernel_pca[:, 0], X_train_kernel_pca[:, 1], c=y_train, cmap="plasma")
#kernel_pca_proj_ax.set_ylabel("Principal component #1")
#kernel_pca_proj_ax.set_xlabel("Principal component #0")
#_ = kernel_pca_proj_ax.set_title("Projection of training data\n using KernelPCA")

In [None]:
## Create an MLPClassifier model (2 hidden layers, with 64 and 32 neurons respectively)
#mlp = MLPClassifier(hidden_layer_sizes=(64, 32),
#                    max_iter=1000, random_state=42, solver="adam", learning_rate_init=0.01, verbose=10)


In [None]:
def plot_results(true_labels, predicted_labels, class_names):
    """
    Plots the confusion matrix and calculates accuracy, ROC-AUC, and other metrics.
    Displays class names instead of digits in the confusion matrix.
    
    Args:
        true_labels: Ground truth labels, as integers.
        predicted_labels: Model-predicted labels, as integers.
        predicted_probs: Predicted probabilities for each class.
        class_names: List of class names corresponding to the class indices.
    """
    # Ensure true_labels are scalar integers
    #true_labels_scalar = np.array(true_labels, dtype=int)  # Assume true_labels are already integer-encoded

    # Calculate and print accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f'Accuracy Validation: {accuracy:.4f}')

    # Compute confusion matrix and print classification report
    cm = confusion_matrix(true_labels, predicted_labels)
    print("\nClassification Report:")
    print(classification_report(true_labels, predicted_labels, target_names=class_names))

    # Plot confusion matrix with class names
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix - MLP')
    plt.show()
    print(f'Validation Accuracy: {accuracy:.4f}')
    

In [None]:
# Train the model on the training data
#mlp.fit(X_train, y_train)

# Make predictions on the test data
#y_pred = mlp.predict(X_test)


In [None]:
def plot_results(true_labels, predicted_labels, predicted_probs, class_names):
    """
    Plots the confusion matrix and calculates accuracy, ROC-AUC, and other metrics.
    Displays class names instead of digits in the confusion matrix.
    
    Args:
        true_labels: Ground truth labels, as integers.
        predicted_labels: Model-predicted labels, as integers.
        predicted_probs: Predicted probabilities for each class.
        class_names: List of class names corresponding to the class indices.
    """
    # Ensure true_labels are scalar integers
    #true_labels_scalar = np.array(true_labels, dtype=int)  # Assume true_labels are already integer-encoded

    # Calculate and print accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f'Accuracy Validation: {accuracy:.4f}')

    # Compute confusion matrix and print classification report
    cm = confusion_matrix(true_labels, predicted_labels)
    print("\nClassification Report:")
    print(classification_report(true_labels, predicted_labels, target_names=class_names))


    # Plot confusion matrix with class names
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix - MLP')
    plt.show()
    print(f'Validation Accuracy: {accuracy:.4f}')
    
    # Calculate ROC-AUC for each class
    roc_auc_scores = []
    
    # Plot ROC-AUC curve for each class
    true_labels_binarized = label_binarize(true_labels, classes=[1, 2, 3, 4])
    plt.figure(figsize=(8, 6))
    for i, class_name in enumerate(class_names):
        fpr, tpr, _ = roc_curve(true_labels_binarized[:, i], predicted_probs[:, i])
        auc = roc_auc_score(true_labels_binarized[:, i], predicted_probs[:, i])
        roc_auc_scores.append(auc)
        plt.plot(fpr, tpr, label=f'{class_name} (AUC = {auc:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
    plt.title('ROC Curves - MLP', fontsize=14)
    plt.xlabel('False Positive Rate (FPR)', fontsize=12)
    plt.ylabel('True Positive Rate (TPR)', fontsize=12)
    plt.legend(loc='lower right', fontsize=10)
    plt.grid(True)
    plt.show()
    print(f'Validation Accuracy: {accuracy:.4f}')
    print("ROC-AUC Scores for each class:")
    output = ", ".join([f"{class_name}: {roc_auc_scores[i]:.4f}" for i, class_name in enumerate(class_names)])
    print(output)
    for i, class_name in enumerate(class_names):
        print(f"{class_name}: {roc_auc_scores[i]:.4f}")
    
def plot_loss_curve(model):
    """
    Plots the loss curve over iterations during training.
    
    Args:
        model: Trained MLPClassifier model.
    """
    plt.figure(figsize=(8, 6))
    plt.plot(model.loss_curve_, label='Loss Curve', linewidth=2)
    plt.title('Loss Over Iterations - MLP', fontsize=14)
    plt.xlabel('Iteration', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.grid(True)
    plt.legend()
    plt.show()


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create an MLPClassifier model (2 hidden layers, with 64 and 32 neurons respectively)
mlp = MLPClassifier(hidden_layer_sizes=(64, 32),
                    max_iter=1000, random_state=42, solver="adam", learning_rate_init=0.01, verbose=10)

# Train the model on the training data
mlp.fit(X_train, y_train)

plot_loss_curve(mlp)

# Make predictions on the test data
y_pred = mlp.predict(X_test)
y_probs = mlp.predict_proba(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Validation Accuracy: {accuracy:.2f}")
class_names = ['pituitary', 'notumor', 'glioma', 'meningioma']
plot_results(y_test, y_pred, y_probs, class_names)



In [None]:
# Generate a classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)