In [3]:
# ! pip install tensorflow
# ! pip install kagglehub
# ! pip install psutil
# ! pip install matplotlib

# Libraries

In [4]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # to surpress the CUDA warnings

In [5]:
import matplotlib.pyplot as plt
import numpy as np

In [6]:
# check how many cp we are working with
print(f"Number of CPU cores available: {os.cpu_count()}")

Number of CPU cores available: 16


# Importing Dataset

In [7]:
# importing the data
import kagglehub

# download latest version
path = kagglehub.dataset_download("uraninjo/augmented-alzheimer-mri-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/gitpod/.cache/kagglehub/datasets/uraninjo/augmented-alzheimer-mri-dataset/versions/1


# Splitting the Dataset into Training and Testing Set

In [11]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Directories for Original and Augmented datasets
original_dir = os.path.join(path, 'OriginalDataset')
augmented_dir = os.path.join(path, 'AugmentedAlzheimerDataset')

def load_and_split_data(directory, img_size=(224, 224), batch_size=32):
    # Load all images without batching
    dataset = image_dataset_from_directory(
        directory,
        labels='inferred',
        label_mode='int',
        image_size=img_size,
        batch_size=None  # Load all images as individual items
    )

    # Calculate dataset size and split
    dataset_size = sum(1 for _ in dataset)
    train_size = int(0.7 * dataset_size)

    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)

    # Apply batching and prefetching
    train_dataset = train_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    val_dataset = val_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return train_dataset, val_dataset

# Load and split Original dataset
original_train, original_val = load_and_split_data(original_dir)

# Load and split Augmented dataset
augmented_train, augmented_val = load_and_split_data(augmented_dir)

# Combine the train and validation datasets
train_dataset = original_train.concatenate(augmented_train)
val_dataset = original_val.concatenate(augmented_val)

# Print dataset sizes in terms of individual samples
print(f"Original Train Size: {sum(1 for _ in original_train.unbatch())} images")
print(f"Augmented Train Size: {sum(1 for _ in augmented_train.unbatch())} images")
print(f"Total Train Size: {sum(1 for _ in train_dataset.unbatch())} images")

print(f"Original Validation Size: {sum(1 for _ in original_val.unbatch())} images")
print(f"Augmented Validation Size: {sum(1 for _ in augmented_val.unbatch())} images")
print(f"Total Validation Size: {sum(1 for _ in val_dataset.unbatch())} images")

# Save train and validation datasets for use in the training part
train_dataset_path = "train_dataset.tfrecord"
val_dataset_path = "val_dataset.tfrecord"

Found 6400 files belonging to 4 classes.
Found 33984 files belonging to 4 classes.
Original Train Size: 4480 images
Augmented Train Size: 23788 images
Total Train Size: 28268 images
Original Validation Size: 1920 images
Augmented Validation Size: 10196 images
Total Validation Size: 12116 images


2024-11-14 20:34:28.205198: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


# Training Dataset

In [None]:
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ConvNeXtTiny

# Define ConvNeXt model
def create_convnext_model(input_shape=(224, 224, 3), num_classes=4):
    """Define and return a ConvNeXt model."""
    base_model = ConvNeXtTiny(
        include_top=False,
        input_shape=input_shape,
        weights='imagenet'  # Use pre-trained weights
    )
    base_model.trainable = False  # Freeze the base model layers

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# Use a distributed strategy for multi-GPU training
strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    # Create and compile the model inside the strategy scope
    model = create_convnext_model()
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)

# Evaluate the model
results = model.evaluate(val_dataset)
print(f"Validation Accuracy: {results[1]*100:.2f}%")


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


2024-11-14 20:44:29.281514: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


Epoch 1/10


I0000 00:00:1731617076.039569    4628 service.cc:148] XLA service 0x7ff02c008240 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731617076.039741    4628 service.cc:156]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1731617076.062171    4628 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 38/884[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:42:27[0m 12s/step - accuracy: 0.4769 - loss: 1.1759