NOTES: Implement SVC and Random Forest
Compare their F1 scores
Use dropout and regularization
Use ResNet50 and VGG16
Make bar graphs to compare results between all implementation

# Importing Dataset

In [28]:
!pip install pillow



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [29]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # to surpress the CUDA warnings

In [30]:
# importing the data
import kagglehub

# download latest version
path = kagglehub.dataset_download("uraninjo/augmented-alzheimer-mri-dataset")

print("Path to dataset files:", path)



# from google.colab import drive #so we dont have to upload the data every time, just have it in your google drive (I put in a folder called 'Alzheimer_Dataset')
# drive.mount('/content/drive')

Path to dataset files: /home/gitpod/.cache/kagglehub/datasets/uraninjo/augmented-alzheimer-mri-dataset/versions/1


In [31]:
# zip_path = '/content/drive/My Drive/Alzheimer_Dataset/archive.zip'  # change it wo whatever the zip is named in your google drive

In [32]:
# import zipfile
# import os

# with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#     zip_ref.extractall('/content/Alzheimer_Dataset')

In [33]:
# # Check the directory
# print("Extracted files:", os.listdir('/content/Alzheimer_Dataset'))

# Splitting Data into Training and Testing Sets and Training

In [34]:
# set the base path to the downloaded dataset path
base_path = path  # this path comes from the API

train_dir = os.path.join(base_path, 'OriginalDataset')
test_dir = os.path.join(base_path, 'AugmentedAlzheimerDataset')

# printing the directories to verify
print("Train Directory:", train_dir)
print("Test Directory:", test_dir)

# test_dir = '/content/Alzheimer_Dataset/AugmentedAlzheimerDataset'
# train_dir = '/content/Alzheimer_Dataset/OriginalDataset'

Train Directory: /home/gitpod/.cache/kagglehub/datasets/uraninjo/augmented-alzheimer-mri-dataset/versions/1/OriginalDataset
Test Directory: /home/gitpod/.cache/kagglehub/datasets/uraninjo/augmented-alzheimer-mri-dataset/versions/1/AugmentedAlzheimerDataset


In [35]:
# loading datasets using categorical labels
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    image_size=(128, 128),          # resize all images to 128x128 pixels
    batch_size=32,                  # load images in batches of 32 for efficient processin
    label_mode='categorical',       # assign categorical labels to each image (one-hot encoded)
    shuffle=True,                   # shuffle the dataset to promote model generalization
)

test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    image_size=(128, 128),
    batch_size=32,
    label_mode='categorical',
    shuffle=False,                  # do not shuffle to maintain consistent evaluation order
)

# shard datasets for parallel processing across devices, reducing data loading overhead
num_shards = 4
shard_id = 0
train_dataset = train_dataset.shard(num_shards=num_shards, index=shard_id)
test_dataset = test_dataset.shard(num_shards=num_shards, index=shard_id)

Found 6400 files belonging to 4 classes.
Found 33984 files belonging to 4 classes.


# Data Preprocessing

In [36]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Preprocess the data with augmentation for the training set
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   zoom_range=0.2,
                                   shear_range=0.2,
                                   horizontal_flip=True)

# Preprocess the test set without augmentation
test_datagen = ImageDataGenerator(rescale=1./255)

# Load training data
train_data = train_datagen.flow_from_directory(train_dir,
                                               target_size=(224, 224),
                                               batch_size=32,
                                               class_mode='categorical')

# Load test data
test_data = test_datagen.flow_from_directory(test_dir,
                                             target_size=(224, 224),
                                             batch_size=32,
                                             class_mode='categorical')

Found 6400 images belonging to 4 classes.
Found 33984 images belonging to 4 classes.


# Simple CNN

In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential()

# Convolutional layer 1
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Convolutional layer 2
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten and fully connected layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))  # 4 classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

In [38]:
history = model.fit(train_data,
                    epochs=10,  # You can adjust this based on your needs
                    validation_data=test_data)

ImportError: Could not import PIL.Image. The use of `load_img` requires PIL.

#Another Simple CNN (with Batch Normalization)

In [None]:
from tensorflow.keras.layers import BatchNormalization

# Simple CNN model
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the CNN model
cnn_history = cnn_model.fit(train_data, validation_data=test_data, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# SVC with VGG16 Feature extraction

In [None]:
from tensorflow.keras.applications import VGG16
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, f1_score

# Load VGG16 without the top layers for feature extraction
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Function to extract features using VGG16
def extract_features(generator, model):
    features = []
    labels = []
    for inputs, label in generator:
        feature_batch = model.predict(inputs)
        features.append(feature_batch)
        labels.append(label)
        if len(features) * generator.batch_size >= generator.samples:
            break
    return np.vstack(features), np.argmax(np.vstack(labels), axis=1)

# Extract features for SVC
train_features, train_labels = extract_features(train_data, vgg16_base)
test_features, test_labels = extract_features(test_data, vgg16_base)

# Reshape features for compatibility with SVC
train_features = train_features.reshape(train_features.shape[0], -1)
test_features = test_features.reshape(test_features.shape[0], -1)

# Train SVC model
svc_model = SVC(kernel='linear', class_weight='balanced')
svc_model.fit(train_features, train_labels)
svc_predictions = svc_model.predict(test_features)

# F1 score and classification report for SVC
svc_f1 = f1_score(test_labels, svc_predictions, average='weighted')
print("SVC F1 Score:", svc_f1)
print("SVC Classification Report:")
print(classification_report(test_labels, svc_predictions))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
SVC F1 Score: 0.74377455180435
SVC Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.73      0.72      8960
           1       1.00      0.80      0.89      6464
           2       0.75      0.77      0.76      9600
           3       0.62      0.68      0.65      8960

    accuracy                           0.74     33984
   macro avg       0.77      0.74      0.75     33984
weighted avg       0.75      0.74      0.74     33984



# VGG16 with Transfer Learning and Dropout/Regularization

In [None]:
from tensorflow.keras.layers import Dense, Flatten, Dropout

# Build model using VGG16 base
vgg16_base.trainable = False  # Freeze VGG16 layers

vgg16_model = Sequential([
    vgg16_base,
    Flatten(),
    Dense(256, activation='relu', kernel_regularizer='l2'),
    Dropout(0.5),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile and train VGG16 model
vgg16_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
vgg16_history = vgg16_model.fit(train_data, validation_data=test_data, epochs=10)

# F1 score for VGG16
vgg16_predictions = np.argmax(vgg16_model.predict(test_data), axis=1)
vgg16_f1 = f1_score(test_labels, vgg16_predictions, average='weighted')
print("VGG16 F1 Score:", vgg16_f1)
print("VGG16 Classification Report:")
print(classification_report(test_labels, vgg16_predictions))


Epoch 1/10
  2/200 [..............................] - ETA: 2:12 - loss: 8.5591 - accuracy: 0.2656

# ResNet with Transfer Learning and Regularization

In [None]:
from tensorflow.keras.applications import ResNet50

# Build model using ResNet50 base
resnet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet50_base.trainable = False  # Freeze ResNet50 layers

resnet50_model = Sequential([
    resnet50_base,
    Flatten(),
    Dense(256, activation='relu', kernel_regularizer='l2'),
    Dropout(0.5),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile and train ResNet50 model
resnet50_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
resnet50_history = resnet50_model.fit(train_data, validation_data=test_data, epochs=10)

# F1 score for ResNet50
resnet50_predictions = np.argmax(resnet50_model.predict(test_data), axis=1)
resnet50_f1 = f1_score(test_labels, resnet50_predictions, average='weighted')
print("ResNet50 F1 Score:", resnet50_f1)
print("ResNet50 Classification Report:")
print(classification_report(test_labels, resnet50_predictions))


# Results

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# F1 scores
f1_scores = [svc_f1, vgg16_f1, resnet50_f1]
models = ['SVC', 'VGG16', 'ResNet50']

# Bar plot for F1 scores
plt.figure(figsize=(10, 6))
plt.bar(models, f1_scores, color=['blue', 'green', 'red'])
plt.ylabel('F1 Score')
plt.title('F1 Score Comparison')
plt.show()

# Calculate false positives and false negatives
def calculate_fp_fn(cm):
    return cm.sum(axis=0) - np.diag(cm) + cm.sum(axis=1) - np.diag(cm)

svc_cm = confusion_matrix(test_labels, svc_predictions)
vgg16_cm = confusion_matrix(test_labels, vgg16_predictions)
resnet50_cm = confusion_matrix(test_labels, resnet50_predictions)

svc_fp_fn = np.sum(calculate_fp_fn(svc_cm))
vgg16_fp_fn = np.sum(calculate_fp_fn(vgg16_cm))
resnet50_fp_fn = np.sum(calculate_fp_fn(resnet50_cm))

fp_fn_counts = [svc_fp_fn, vgg16_fp_fn, resnet50_fp_fn]

# Bar plot for false positives and negatives
plt.figure(figsize=(10, 6))
plt.bar(models, fp_fn_counts, color=['blue', 'green', 'red'])
plt.ylabel('False Positives + False Negatives')
plt.title('False Positives and False Negatives Comparison')
plt.show()
