# Project Overview

Brief description of the brain tumor image classification project.

In this section, I converted all images to RGB format and resized them to (224, 224).
I also applied a set of transformations to normalize the data properly, which is a common preprocessing step before feeding the images into a convolutional neural network.


<span style="color:red"><b><<<<<<< local</b></span>

In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from pathlib import Path

transform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB")),  # Ensure RGB
    transforms.Resize((224, 224)),                      # Resize to 224x224
    transforms.ToTensor(),                              # Convert to tensor
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)      # Normalize to [-1,1]
])

# Step 2: Define relative paths
base_dir = Path.cwd()
train_dir = base_dir / "archive" / "training"
test_dir = base_dir / "archive" / "testing"

# Step 3: Load datasets with transformations
train_dataset = datasets.ImageFolder(root=str(train_dir), transform=transform)
test_dataset = datasets.ImageFolder(root=str(test_dir), transform=transform)

# Step 4: Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Step 5: Check the detected classes
print("Detected classes:", train_dataset.classes)



# Exploratory Data Analysis

I verified that no class rebalancing is necessary, since the number of images per class is already reasonably balanced.
Additionally, I displayed several examples of the images after preprocessing to visualize how they look before being passed to the mode

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

labels = [label for _, label in train_dataset]
counter = Counter(labels)

plt.bar(counter.keys(), counter.values(), tick_label=train_dataset.classes)
plt.title("Class distribution (training set)")
plt.xlabel("Class")
plt.ylabel("Number of images")
plt.show()



In [None]:
# Reverse normalization: from [-1,1] to [0,1]
inv_normalize = transforms.Normalize(
    mean=[-1.0, -1.0, -1.0],
    std=[2.0, 2.0, 2.0]
)

# Collect one image per class
class_images = {}
for img, label in train_dataset:
    class_name = train_dataset.classes[label]
    if class_name not in class_images:
        class_images[class_name] = img
    if len(class_images) == len(train_dataset.classes):
        break

# Display: row 1 -> normalized, row 2 -> de-normalized
n_classes = len(class_images)
plt.figure(figsize=(n_classes * 2.5, 5))

for idx, (class_name, img) in enumerate(class_images.items()):
    # Row 1: Normalized image
    plt.subplot(2, n_classes, idx + 1)
    plt.imshow(img.permute(1, 2, 0).numpy())
    plt.title(f"{class_name}\n(normalized)")
    plt.axis('off')

    # Row 2: De-normalized image (to visualize properly)
    img_inv = inv_normalize(img)
    img_inv = torch.clamp(img_inv, 0, 1)  # ensure values are in [0,1]
    plt.subplot(2, n_classes, n_classes + idx + 1)
    plt.imshow(img_inv.permute(1, 2, 0).numpy())
    plt.title(f"{class_name}\n(original)")
    plt.axis('off')

plt.tight_layout()
plt.show()

# Data Loading

Code to load image dataset for brain tumor classification.

<span style="color:red"><b>=======</b></span>

In [None]:
import kagglehub

# Download latest version
base_dir = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", base_dir)

<span style="color:red"><b>>>>>>>> remote</b></span>

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# Tumor categories
categories = ["glioma", "meningioma", "notumor", "pituitary"]

# Training directories
train_dir = os.path.join(base_dir, "Training")
train_glioma_dir = os.path.join(train_dir, "glioma")
train_meningioma_dir = os.path.join(train_dir, "meningioma")
train_notumor_dir = os.path.join(train_dir, "notumor")
train_pituitary_dir = os.path.join(train_dir, "pituitary")

# Testing directories
test_dir = os.path.join(base_dir, "Testing")
test_glioma_dir = os.path.join(test_dir, "glioma")
test_meningioma_dir = os.path.join(test_dir, "meningioma")
test_notumor_dir = os.path.join(test_dir, "notumor")
test_pituitary_dir = os.path.join(test_dir, "pituitary")

# Example: Listing number of images in glioma training and testing folders
train_glioma_files = os.listdir(train_glioma_dir)
test_glioma_files = os.listdir(test_glioma_dir)
train_meningioma_files = os.listdir(train_meningioma_dir)
test_meningioma_files = os.listdir(test_meningioma_dir)
train_notumor_files = os.listdir(train_notumor_dir)
test_notumor_files = os.listdir(test_notumor_dir)
train_pituitary_files = os.listdir(train_pituitary_dir)
test_pituitary_files = os.listdir(test_pituitary_dir)

print("Training glioma images:", len(train_glioma_files))
print("Testing glioma images:", len(test_glioma_files))
print("Training meningioma images:", len(train_meningioma_files))
print("Testing meningioma images:", len(test_meningioma_files))
print("Training notumor images:", len(train_notumor_files))
print("Testing notumor images:", len(test_notumor_files))
print("Training pituitary images:", len(train_pituitary_files))
print("Testing pituitary images:", len(test_pituitary_files))


# Data preprocessing

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from skimage import exposure

target_size = (128, 128)  # width, height

# Paths for training and testing sets by category
train_test_data_dirs = {
    "train": {cat: os.path.join(base_dir, "Training", cat) for cat in categories},
    "test": {cat: os.path.join(base_dir, "Testing", cat) for cat in categories},
}

# Dictionary to hold processed images
processed_data = {
    "train": {},
    "test": {}
}

def preprocess_images(image_dir, target_size=target_size):
    """Load, grayscale, resize, normalize all images in a directory."""
    image_files = os.listdir(image_dir)
    images = []
    for filename in image_files:
        img_path = os.path.join(image_dir, filename)
        with Image.open(img_path) as img:
            img = img.convert('L')  # grayscale
            img = img.resize(target_size)
            img_arr = np.array(img) / 255.0  # normalize to [0,1]
            #img_eq = exposure.equalize_hist(img_arr)  # histogram equalization
            images.append(img_arr)
    return np.array(images)

# Preprocess images for all categories and both splits
for split in ["train", "test"]:
    for cat in categories:
        print(f"Processing {split} {cat} images...")
        split_path = train_test_data_dirs[split][cat]
        processed_data[split][cat] = preprocess_images(split_path)
        print(f"{split.capitalize()} {cat} processed shape: {processed_data[split][cat].shape}")

# Example: display first preprocessed training glioma image
plt.imshow(processed_data["train"]["glioma"][0], cmap='gray')
plt.axis('off')
plt.title('First Preprocessed Training Glioma Image')
plt.show()


# Exploratory Data Analysis

This section is dedicated to Exploratory Data Analysis (EDA) of the brain tumor classification dataset. It aims to provide insights into the dataset's structure, class distribution, and image properties.


In [None]:
<<<<<<< LOCAL CELL DELETED >>>>>>>


# Model Building

Define the architecture of the classification model (e.g., CNN).

# Model Training

Train the model using the training dataset.

# Model Evaluation

Evaluate the model performance on the validation/test dataset.

# Predictions

Make predictions on new or unseen images.

# Conclusions and Next Steps

Summarize findings and suggest future improvements.