In [1]:
# Import necessary libraries
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from sklearn.metrics import classification_report, confusion_matrix

2025-02-28 16:15:25.541539: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Set the dataset directory (ensure the folder is in your working directory)
dataset_dir = "data/Training/"

In [None]:
# Data Exploration

# List all class folders (each folder represents a tumor type)
classes = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]
print("Classes found:", classes)

# Initialize dictionary to store image counts per class
class_counts = defaultdict(int)

# Loop through each class folder, count and display the number of images
for cls in classes:
    class_path = os.path.join(dataset_dir, cls)
    image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    class_counts[cls] = len(image_files)
    print(f"Class '{cls}' has {len(image_files)} images.")

# Visualize class distribution
plt.figure(figsize=(8, 5))
plt.bar(class_counts.keys(), class_counts.values(), color='skyblue')
plt.title("Class Distribution in Brain Tumor MRI Dataset")
plt.xlabel("Tumor Class")
plt.ylabel("Number of Images")
plt.show()

In [None]:
# Function to load and preprocess an image
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    try:
        # Read image using OpenCV (BGR format)
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Image not loaded properly.")
        # Convert from BGR to RGB format for display
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # Resize image to target size
        img = cv2.resize(img, target_size)
        # Normalize pixel values to [0, 1]
        img = img / 255.0
        return img
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        return None

# Load and display one sample image from each class
sample_images = {}
for cls in classes:
    class_path = os.path.join(dataset_dir, cls)
    image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if image_files:
        sample_path = os.path.join(class_path, image_files[0])
        img = load_and_preprocess_image(sample_path)
        if img is not None:
            sample_images[cls] = img

plt.figure(figsize=(15, 5))
for idx, (cls, img) in enumerate(sample_images.items()):
    plt.subplot(1, len(sample_images), idx+1)
    plt.imshow(img)
    plt.title(cls)
    plt.axis('off')
plt.suptitle("Sample Preprocessed Images from Each Tumor Class")
plt.show()

In [None]:
# Data Augmentation using Keras ImageDataGenerator

# Define augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

# Example: Augment a sample image from one class
if sample_images:
    sample_class = list(sample_images.keys())[0]
    img = sample_images[sample_class]
    img_expanded = np.expand_dims(img, axis=0)  # Expand dims for generator
    aug_iter = datagen.flow(img_expanded, batch_size=1)
    aug_images = [next(aug_iter)[0] for _ in range(5)]
    
    plt.figure(figsize=(15, 5))
    for i, aug_img in enumerate(aug_images):
        plt.subplot(1, 5, i+1)
        plt.imshow(aug_img)
        plt.axis('off')
    plt.suptitle(f"Data Augmentation Examples for Class '{sample_class}'")
    plt.show()

In [None]:
# Modeling

# Basic CNN Model
def create_basic_cnn(input_shape=(224,224,3), num_classes=4):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2,2)),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(pool_size=(2,2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model

basic_cnn = create_basic_cnn()
basic_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
basic_cnn.summary()

In [None]:
# Enhanced CNN Model with Regularization
def create_enhanced_cnn(input_shape=(224,224,3), num_classes=4):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.25),
        
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.25),
        
        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2,2)),
        Dropout(0.25),
        
        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

enhanced_cnn = create_enhanced_cnn()
enhanced_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
enhanced_cnn.summary()

In [None]:
# Transfer Learning Model
def create_transfer_learning_model(input_shape=(224,224,3), num_classes=4):
    # Load pre-trained VGG16 model without the top classifier layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    
    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

transfer_model = create_transfer_learning_model()
transfer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
transfer_model.summary()

In [None]:
# Training and evaluation

# Define image dimensions and batch size
img_height, img_width = 224, 224
batch_size = 32

# Set the directory where the training data is stored (adjust this path as needed)
train_dir = "data/Training/"
test_dir = "data/Testing/"

# Create an ImageDataGenerator with augmentation and a validation split for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # Reserve 20% for validation
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Generate training data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Generate validation data
validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# Create a separate ImageDataGenerator for the test set (only rescaling, no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Generate test data from the hold-out Testing directory
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Set training parameters
epochs = 10  # Adjust epochs as needed

# Function to train and evaluate a given model on training and validation sets
def train_and_evaluate(model, model_name):
    print(f"\nTraining {model_name}...")
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // batch_size,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // batch_size,
        epochs=epochs
    )
    
    # Evaluate the model on the validation set
    val_loss, val_acc = model.evaluate(validation_generator, steps=validation_generator.samples // batch_size)
    print(f"{model_name} Validation Loss: {val_loss:.4f}")
    print(f"{model_name} Validation Accuracy: {val_acc:.4f}")
    
    # Generate predictions for a classification report on validation set
    validation_generator.reset()
    preds = model.predict(validation_generator, steps=validation_generator.samples // batch_size, verbose=1)
    predicted_class_indices = np.argmax(preds, axis=1)
    true_class_indices = validation_generator.classes[:len(predicted_class_indices)]
    
    print(f"\nClassification Report for {model_name} (Validation):")
    target_names = list(validation_generator.class_indices.keys())
    print(classification_report(true_class_indices, predicted_class_indices, target_names=target_names))
    
    return history, val_loss, val_acc

# Function to evaluate a given model on the hold-out test set
def evaluate_on_test(model, model_name):
    print(f"\nEvaluating {model_name} on hold-out Test dataset...")
    test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
    print(f"{model_name} Test Loss: {test_loss:.4f}")
    print(f"{model_name} Test Accuracy: {test_acc:.4f}")
    
    # Generate predictions for a classification report on test set
    test_generator.reset()
    preds = model.predict(test_generator, steps=test_generator.samples // batch_size, verbose=1)
    predicted_class_indices = np.argmax(preds, axis=1)
    true_class_indices = test_generator.classes[:len(predicted_class_indices)]
    
    print(f"\nClassification Report for {model_name} (Test):")
    target_names = list(test_generator.class_indices.keys())
    print(classification_report(true_class_indices, predicted_class_indices, target_names=target_names))
    
    return test_loss, test_acc

# Train and evaluate each model variation on training/validation, then test on the hold-out set

# Basic CNN Model Evaluation
print("### Basic CNN Model Evaluation ###")
history_basic, loss_basic, acc_basic = train_and_evaluate(basic_cnn, "Basic CNN Model")
test_loss_basic, test_acc_basic = evaluate_on_test(basic_cnn, "Basic CNN Model")

# Enhanced CNN Model Evaluation
print("\n### Enhanced CNN Model Evaluation ###")
history_enhanced, loss_enhanced, acc_enhanced = train_and_evaluate(enhanced_cnn, "Enhanced CNN Model")
test_loss_enhanced, test_acc_enhanced = evaluate_on_test(enhanced_cnn, "Enhanced CNN Model")

# Transfer Learning Model Evaluation
print("\n### Transfer Learning Model Evaluation ###")
history_transfer, loss_transfer, acc_transfer = train_and_evaluate(transfer_model, "Transfer Learning Model")
test_loss_transfer, test_acc_transfer = evaluate_on_test(transfer_model, "Transfer Learning Model")

In [None]:
# Model comparison

# Define model names and their corresponding performance metrics
models = ['Basic CNN', 'Enhanced CNN', 'Transfer Learning']
test_accuracies = [test_acc_basic, test_acc_enhanced, test_acc_transfer]
test_losses = [test_loss_basic, test_loss_enhanced, test_loss_transfer]

# Create a figure with two subplots: one for accuracy and one for loss
plt.figure(figsize=(12, 5))

# Plot Test Accuracy Comparison
plt.subplot(1, 2, 1)
plt.bar(models, test_accuracies, color=['skyblue', 'lightgreen', 'salmon'])
plt.title("Test Accuracy Comparison")
plt.ylabel("Accuracy")
plt.ylim(0, 1)  # Accuracy ranges from 0 to 1

# Plot Test Loss Comparison
plt.subplot(1, 2, 2)
plt.bar(models, test_losses, color=['skyblue', 'lightgreen', 'salmon'])
plt.title("Test Loss Comparison")
plt.ylabel("Loss")
plt.ylim(0, max(test_losses)*1.2)  # Extend y-axis a bit above the maximum loss

plt.tight_layout()
plt.show()