Step 1: Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Step 2: Install Libraries

In [2]:
# Make sure you have a GPU enabled in Colab: Runtime -> Change runtime type -> GPU
!pip install kagglehub transformers datasets evaluate albumentations torch accelerate scikit-learn -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h

Step 3: Download the Dataset from Kaggle

In [4]:
# Cell 3
import os
import kagglehub
from google.colab import userdata

# --- 1. Authenticate with Kaggle ---
os.environ["KAGGLE_USERNAME"] = userdata.get('KAGGLE_USERNAME')
os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')

# --- 2. Download the Brain Tumor MRI CLASSIFICATION Dataset ---
print("Downloading the Brain Tumor MRI (Classification) dataset...")
# THIS IS THE MODIFIED LINE
dataset_path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print(f"\nDataset downloaded and available at: {dataset_path}")

# --- 3. Verify the contents ---
print("\nContents of the 'Training' directory:")
!ls -l {dataset_path}/Training

Downloading the Brain Tumor MRI (Classification) dataset...

Dataset downloaded and available at: /kaggle/input/brain-tumor-mri-dataset

Contents of the 'Training' directory:
total 0
drwxr-sr-x 2 1000 1000 0 Apr  1 05:32 glioma
drwxr-sr-x 2 1000 1000 0 Apr  1 05:32 meningioma
drwxr-sr-x 2 1000 1000 0 Apr  1 05:32 notumor
drwxr-sr-x 2 1000 1000 0 Apr  1 05:32 pituitary


Step 4: Prepare the Data for Classification
- This step organizes the dataset for the classification task. It scans the Training and Testing folders to create a complete list of all images. It uses the name of the subfolder (e.g., 'glioma', 'notumor') as the label for each image. Finally, it creates PyTorch Datasets that will feed the images and their corresponding class labels to the model for training.


In [5]:

import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import SegformerImageProcessor
import os
import glob

# --- 1. Define the main data directories ---
train_dir = os.path.join(dataset_path, "Training")
test_dir = os.path.join(dataset_path, "Testing")

# --- 2. Find all image paths and get labels from folder names ---
image_paths = list(glob.glob(os.path.join(train_dir, "*", "*.jpg")))
image_paths += list(glob.glob(os.path.join(test_dir, "*", "*.jpg")))
labels = [os.path.basename(os.path.dirname(p)) for p in image_paths]

print(f"Found {len(image_paths)} total images in the dataset.")

# --- 3. Create a Train/Validation Split ---
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.20, random_state=42, stratify=labels
)
print(f"Split into {len(train_paths)} training examples and {len(val_paths)} validation examples.")

# --- 4. Define the Image Processor ---
# This resizes and normalizes images for the SegFormer model
model_checkpoint = "nvidia/segformer-b0-finetuned-ade-512-512"
image_processor = SegformerImageProcessor.from_pretrained(model_checkpoint)

# Get unique labels and create mappings
unique_labels = sorted(list(set(labels)))
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
print(f"Class Mappings: {label2id}")

# --- 5. Define the Custom PyTorch Dataset for CLASSIFICATION ---
class BrainTumorClassificationDataset(Dataset):
    def __init__(self, image_paths, labels, image_processor):
        self.image_paths = image_paths
        self.labels = labels
        self.image_processor = image_processor

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label_id = label2id[self.labels[idx]]

        # Process the image and squeeze to remove the batch dimension
        pixel_values = self.image_processor(image, return_tensors="pt").pixel_values.squeeze()

        return {"pixel_values": pixel_values, "labels": torch.tensor(label_id)}

# --- 6. Create Dataset instances ---
train_dataset = BrainTumorClassificationDataset(train_paths, train_labels, image_processor)
val_dataset = BrainTumorClassificationDataset(val_paths, val_labels, image_processor)

print("\nSuccessfully created classification datasets.")

Found 7023 total images in the dataset.
Split into 5618 training examples and 1405 validation examples.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

Class Mappings: {'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}

Successfully created classification datasets.


  image_processor = cls(**image_processor_dict)


Step 5: Load SegFormer for IMAGE CLASSIFICATION

In [6]:
from transformers import SegformerForImageClassification

# --- 1. Define Labels and Model Checkpoint ---
# The labels were defined in the previous cell
num_labels = len(unique_labels)
model_checkpoint = "nvidia/segformer-b0-finetuned-ade-512-512"

# --- 2. Load the Pre-trained Model for IMAGE CLASSIFICATION ---
model = SegformerForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True, # This adds a new classification head
)

print("SegFormer model loaded and configured for IMAGE CLASSIFICATION.")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/15.0M [00:00<?, ?B/s]

Some weights of SegformerForImageClassification were not initialized from the model checkpoint at nvidia/segformer-b0-finetuned-ade-512-512 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SegFormer model loaded and configured for IMAGE CLASSIFICATION.


Step 6: Train the Classification Model

In [8]:

from transformers import TrainingArguments, Trainer
import evaluate
import numpy as np
import os

# ==============================================================================
# USER INPUT FOR SAVE LOCATION (This part is correct and remains)
# ==============================================================================
print("--- Step 1: Configure Save Location in Google Drive ---")

default_folder_name = "segformer-brain-tumor-classification"
user_folder_name = input(f"Enter the folder name to save the model in [default: {default_folder_name}]: ")

if not user_folder_name:
    user_folder_name = default_folder_name
    print(f"No name entered. Using default: '{default_folder_name}'")

base_drive_path = "/content/drive/MyDrive"
DRIVE_SAVE_PATH = os.path.join(base_drive_path, user_folder_name)

if not os.path.exists(DRIVE_SAVE_PATH):
    print(f"Directory does not exist. Creating it now at '{DRIVE_SAVE_PATH}'...")
    os.makedirs(DRIVE_SAVE_PATH)
    print("    Directory created successfully.")
else:
    print(f"⚠️  WARNING: Directory already exists at '{DRIVE_SAVE_PATH}'.")

# ==============================================================================
# TRAINING CODE (With fixes for the older library version)
# ==============================================================================
print("\n--- Step 2: Preparing for Training ---")
print(f"✅ Model will be saved to: {DRIVE_SAVE_PATH}")

os.environ["WANDB_DISABLED"] = "true"

accuracy_metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

# --- Define Training Arguments (CORRECTED for older library version) ---
training_args = TrainingArguments(
    output_dir=DRIVE_SAVE_PATH,
    learning_rate=3e-5,
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    save_total_limit=3,
    logging_steps=50,

    # THE FIX: We remove the 'evaluation_strategy' and 'save_strategy' arguments
    # as they are not recognized by this library version. The trainer will
    # default to evaluating and saving at the end of each epoch.

    # THE SECOND FIX: Set this to False to prevent a conflict. The model from
    # the final epoch will be saved.
    load_best_model_at_end=False,

    # 'metric_for_best_model' is removed as it's not needed now.

    push_to_hub=False,
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    tokenizer=image_processor,
)

# Start Training!
print("\n--- Step 3: Starting Model Training ---")
trainer.train()

# Save the Final Model
trainer.save_model(DRIVE_SAVE_PATH)
print(f"\n✅ Training complete! The final model has been saved to your Google Drive at: {DRIVE_SAVE_PATH}")

--- Step 1: Configure Save Location in Google Drive ---
Enter the folder name to save the model in [default: segformer-brain-tumor-classification]: Segformer-Brain-Tumor Model

--- Step 2: Preparing for Training ---
✅ Model will be saved to: /content/drive/MyDrive/Segformer-Brain-Tumor Model


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(



--- Step 3: Starting Model Training ---


Step,Training Loss
50,0.8678
100,0.49
150,0.3953
200,0.3448
250,0.3133
300,0.2922
350,0.2694
400,0.2087
450,0.31
500,0.2285



✅ Training complete! The final model has been saved to your Google Drive at: /content/drive/MyDrive/Segformer-Brain-Tumor Model
