# Project Overview

Brief description of the brain tumor image classification project.

In [None]:
import kagglehub

# Download latest version
base_dir = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", base_dir)

# Data Loading

Code to load image dataset for brain tumor classification.

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# Tumor categories
categories = ["glioma", "meningioma", "notumor", "pituitary"]

# Training directories
train_dir = os.path.join(base_dir, "Training")
train_glioma_dir = os.path.join(train_dir, "glioma")
train_meningioma_dir = os.path.join(train_dir, "meningioma")
train_notumor_dir = os.path.join(train_dir, "notumor")
train_pituitary_dir = os.path.join(train_dir, "pituitary")

# Testing directories
test_dir = os.path.join(base_dir, "Testing")
test_glioma_dir = os.path.join(test_dir, "glioma")
test_meningioma_dir = os.path.join(test_dir, "meningioma")
test_notumor_dir = os.path.join(test_dir, "notumor")
test_pituitary_dir = os.path.join(test_dir, "pituitary")

# Example: Listing number of images in glioma training and testing folders
train_glioma_files = os.listdir(train_glioma_dir)
test_glioma_files = os.listdir(test_glioma_dir)
train_meningioma_files = os.listdir(train_meningioma_dir)
test_meningioma_files = os.listdir(test_meningioma_dir)
train_notumor_files = os.listdir(train_notumor_dir)
test_notumor_files = os.listdir(test_notumor_dir)
train_pituitary_files = os.listdir(train_pituitary_dir)
test_pituitary_files = os.listdir(test_pituitary_dir)

print("Training glioma images:", len(train_glioma_files))
print("Testing glioma images:", len(test_glioma_files))
print("Training meningioma images:", len(train_meningioma_files))
print("Testing meningioma images:", len(test_meningioma_files))
print("Training notumor images:", len(train_notumor_files))
print("Testing notumor images:", len(test_notumor_files))
print("Training pituitary images:", len(train_pituitary_files))
print("Testing pituitary images:", len(test_pituitary_files))


# Data preprocessing

In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from skimage import exposure

target_size = (128, 128)  # width, height

# Paths for training and testing sets by category
train_test_data_dirs = {
    "train": {cat: os.path.join(base_dir, "Training", cat) for cat in categories},
    "test": {cat: os.path.join(base_dir, "Testing", cat) for cat in categories},
}

# Dictionary to hold processed images
processed_data = {
    "train": {},
    "test": {}
}

def preprocess_images(image_dir, target_size=target_size):
    """Load, grayscale, resize, normalize all images in a directory."""
    image_files = os.listdir(image_dir)
    images = []
    for filename in image_files:
        img_path = os.path.join(image_dir, filename)
        with Image.open(img_path) as img:
            img = img.convert('L')  # grayscale
            img = img.resize(target_size)
            img_arr = np.array(img) / 255.0  # normalize to [0,1]
            #img_eq = exposure.equalize_hist(img_arr)  # histogram equalization
            images.append(img_arr)
    return np.array(images)

# Preprocess images for all categories and both splits
for split in ["train", "test"]:
    for cat in categories:
        print(f"Processing {split} {cat} images...")
        split_path = train_test_data_dirs[split][cat]
        processed_data[split][cat] = preprocess_images(split_path)
        print(f"{split.capitalize()} {cat} processed shape: {processed_data[split][cat].shape}")

# Example: display first preprocessed training glioma image
plt.imshow(processed_data["train"]["glioma"][0], cmap='gray')
plt.axis('off')
plt.title('First Preprocessed Training Glioma Image')
plt.show()


# Exploratory Data Analysis

This section is dedicated to Exploratory Data Analysis (EDA) of the brain tumor classification dataset. It aims to provide insights into the dataset's structure, class distribution, and image properties.


# Model Building

Define the architecture of the classification model (e.g., CNN).

# Model Training

Train the model using the training dataset.

# Model Evaluation

Evaluate the model performance on the validation/test dataset.

# Predictions

Make predictions on new or unseen images.

# Conclusions and Next Steps

Summarize findings and suggest future improvements.