In [47]:
# Import libraries
import numpy as np
import os
import random
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization, GlobalAveragePooling2D, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt3
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [48]:
#Dataset link: https://www.kaggle.com/datasets/ninadaithal/imagesoasis

# Dataset path and class names
dataset_path = 'OASIS Data'
classes = ['Non Demented', 'Mild Dementia', 'Moderate Dementia', 'Very mild Dementia']
image_paths = []
labels = []

num_files = 1464  # Number of files to select randomly from each category (number was chosen because there are only 488 images for moderate dementia)

# Function to load images
def load_images(paths, img_size=(224, 224)):
    images = []
    for path in paths:
        # Read image (grayscale for simplicity)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, img_size)
        img = img / 255.0  # Normalize to [0, 1]
        images.append(img)
    return np.array(images)

# Iterate through classes and load images
non_demented_path = os.path.join(dataset_path, 'Non Demented')
non_demented_files = os.listdir(non_demented_path)
non_demented_files = random.sample(non_demented_files, min(num_files, len(non_demented_files)))

for image_filename in non_demented_files:
    image_path = os.path.join(non_demented_path, image_filename)
    if os.path.isfile(image_path):
        image_paths.append(image_path)
        labels.append(0)  # Label for Non Demented

dementia_classes = ['Mild Dementia', 'Moderate Dementia', 'Very mild Dementia']
for category in dementia_classes:
    category_path = os.path.join(dataset_path, category)
    category_files = os.listdir(category_path)
    selected_files = random.sample(category_files, min(num_files // len(dementia_classes), len(category_files)))
    for image_filename in selected_files:
        image_path = os.path.join(category_path, image_filename)
        if os.path.isfile(image_path):
            image_paths.append(image_path)
            labels.append(1)  # Label for Dementia (combined)

# Convert labels to numpy array
labels = np.array(labels)

# Load the images
X = load_images(image_paths)

# Extract subject ID
subjects = [image_path.split(os.path.sep)[-1].split('OAS1_')[1].split('_')[0] for image_path in image_paths]

# Perform train/test split by subject to prevent data leakage
unique_subjects = list(set(subjects))
train_subjects, val_subjects = train_test_split(unique_subjects, test_size=0.3, random_state=42)

# Split images based on subject IDs
train_indices = [i for i, subject in enumerate(subjects) if subject in train_subjects]
val_indices = [i for i, subject in enumerate(subjects) if subject in val_subjects]
X_train = X[train_indices]
y_train = labels[train_indices]
X_val = X[val_indices]
y_val = labels[val_indices]

# Reshape to include channel dimension (grayscale)
X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]

# Normalize the images
train_datagen = ImageDataGenerator(
    rescale=1./255,         
)

# Validation data does not require augmentation, only rescaling
val_datagen = ImageDataGenerator(rescale=1./255)

# Apply augmentation only to the training data
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)


#Basic model 
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 1)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),

    Dropout(0.5),
    Dense(1, activation='sigmoid')  
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(train_generator, validation_data=val_generator, epochs=20)
