In [6]:
import nibabel as nib
import numpy as np
import cv2
import os
from tqdm import tqdm

# Path to the extracted BraTS 2021 training data
data_root = 'BraTS2021_Training_Data'

# Output folders
output_img_dir = 'output/images'
output_mask_dir = 'output/masks'

# Make sure output directories exist
os.makedirs(output_img_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# List of patient directories
patient_dirs = [d for d in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, d))]

print(f"Found {len(patient_dirs)} patient folders...")

# Loop through each patient
for patient in tqdm(patient_dirs):
    try:
        patient_path = os.path.join(data_root, patient)

        # Paths to T1CE scan and segmentation mask
        t1ce_file = [f for f in os.listdir(patient_path) if "_t1ce.nii.gz" in f]
        seg_file = [f for f in os.listdir(patient_path) if "_seg.nii.gz" in f]

        if not t1ce_file or not seg_file:
            print(f"Skipping {patient}: Missing T1CE or Seg file.")
            continue

        t1ce_path = os.path.join(patient_path, t1ce_file[0])
        seg_path = os.path.join(patient_path, seg_file[0])

        # Load 3D image and mask volumes
        img_3d = nib.load(t1ce_path).get_fdata()
        seg_3d = nib.load(seg_path).get_fdata()

        for i in range(img_3d.shape[2]):  # Loop through axial slices
            img_slice = img_3d[:, :, i]
            mask_slice = seg_3d[:, :, i]

            # Skip slices without tumors
            if np.max(mask_slice) == 0:
                continue

            # Normalize image to 0-255 and convert to uint8
            img_slice = cv2.normalize(img_slice, None, 0, 255, cv2.NORM_MINMAX)
            img_slice = img_slice.astype(np.uint8)

            # Convert mask to binary: tumor vs background
            mask_slice = (mask_slice > 0).astype(np.uint8) * 255

            # Output filenames
            img_filename = os.path.join(output_img_dir, f"{patient}_slice_{i}.png")
            mask_filename = os.path.join(output_mask_dir, f"{patient}_slice_{i}.png")

            # Save slices
            cv2.imwrite(img_filename, img_slice)
            cv2.imwrite(mask_filename, mask_slice)

    except Exception as e:
        print(f"❌ Error processing {patient}: {e}")


Found 1252 patient folders...


100%|██████████| 1252/1252 [21:32<00:00,  1.03s/it] 

Skipping output: Missing T1CE or Seg file.





In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

image_dir = 'output/images'
mask_dir = 'output/masks'
label_dir = 'output/labels'

os.makedirs(label_dir, exist_ok=True)

image_files = sorted(os.listdir(image_dir))

for img_file in tqdm(image_files):
    img_path = os.path.join(image_dir, img_file)
    mask_path = os.path.join(mask_dir, img_file)
    label_path = os.path.join(label_dir, img_file.replace('.png', '.txt'))

    img = cv2.imread(img_path)
    mask = cv2.imread(mask_path, 0)

    height, width = mask.shape

    # Find contours (objects) in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    with open(label_path, 'w') as f:
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)

            # YOLO format: class x_center y_center width height (all normalized)
            x_center = (x + w / 2) / width
            y_center = (y + h / 2) / height
            norm_w = w / width
            norm_h = h / height

            # Write only if bbox is valid (non-zero area)
            if w > 0 and h > 0:
                f.write(f"0 {x_center:.6f} {y_center:.6f} {norm_w:.6f} {norm_h:.6f}\n")


100%|██████████| 81437/81437 [1:13:42<00:00, 18.42it/s]  


In [3]:
import os
import shutil
import random

# Define source and target directories
image_dir = 'output/images'
label_dir = 'output/labels'
base_dir = 'yolo_brain_dataset'

# Output folders
train_img = os.path.join(base_dir, 'images/train')
val_img = os.path.join(base_dir, 'images/val')
train_lbl = os.path.join(base_dir, 'labels/train')
val_lbl = os.path.join(base_dir, 'labels/val')

# Create target directories
os.makedirs(train_img, exist_ok=True)
os.makedirs(val_img, exist_ok=True)
os.makedirs(train_lbl, exist_ok=True)
os.makedirs(val_lbl, exist_ok=True)

# Collect all images
all_images = [f for f in os.listdir(image_dir) if f.endswith('.png')]
random.shuffle(all_images)

# 80-20 split
split = int(0.8 * len(all_images))
train_files = all_images[:split]
val_files = all_images[split:]

# Move train files
for file in train_files:
    shutil.copy(os.path.join(image_dir, file), train_img)
    label_file = file.replace('.png', '.txt')
    shutil.copy(os.path.join(label_dir, label_file), train_lbl)

# Move val files
for file in val_files:
    shutil.copy(os.path.join(image_dir, file), val_img)
    label_file = file.replace('.png', '.txt')
    shutil.copy(os.path.join(label_dir, label_file), val_lbl)

print("✅ Dataset split into training and validation sets.")


✅ Dataset split into training and validation sets.


In [4]:
yaml_path = os.path.join(base_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write("""train: images/train
val: images/val

nc: 1
names: ['tumor']
""")

print("✅ data.yaml created.")


✅ data.yaml created.
