<h1>Data Preperation</h1>

In [4]:
# import utilities libraries
import os 
import cv2 
import numpy as np 
from pathlib import Path
from tqdm import tqdm
import shutil
import glob
import random

# import yolo model
from ultralytics import YOLO
import torch

In [2]:
# Load your custom model configuration
model = YOLO('yolo11n-bifpn.yaml')

# Print info to verify layers are there
model.info()

YOLO11n-bifpn summary: 217 layers, 1,467,211 parameters, 1,467,195 gradients, 9.4 GFLOPs


(217, 1467211, 1467195, 9.4240512)

In [3]:
# --- CONFIGURATION ---
# We create a NEW folder for the multi-class dataset
DEST_ROOT = Path("yolo_dataset_multiclass")

# Define Class Mapping based on your filenames
# gl = glioma (0), me = meningioma (1), pi = pituitary (2)
CLASS_MAP = {
    'gl': 0, 
    'me': 1, 
    'pi': 2
}

def find_source_root():
    current_dir = Path.cwd()
    # Priority search locations
    candidates = [
        current_dir / "brisc2025" / "segmentation_task",
        current_dir / "segmentation_task",
        current_dir
    ]
    for path in candidates:
        if (path / "train").exists():
            return path
    raise FileNotFoundError("Could not find 'segmentation_task' folder!")

def get_class_id(filename):
    """Determines class ID (0,1,2) from the filename string."""
    fname = filename.lower()
    
    # Check for specific codes in the filename
    if '_gl_' in fname or 'glioma' in fname:
        return 0
    elif '_me_' in fname or 'meningioma' in fname:
        return 1
    elif '_pi_' in fname or 'pituitary' in fname:
        return 2
    
    return None # Unknown class

def convert_mask_to_yolo_box(mask_path, class_id):
    mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
    if mask is None: return None
    
    _, thresh = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    labels = []
    h_img, w_img = mask.shape[:2]
    
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if w < 3 or h < 3: continue 
            
        x_center = (x + w / 2) / w_img
        y_center = (y + h / 2) / h_img
        w_norm = w / w_img
        h_norm = h / h_img
        
        # Use the detected Class ID
        labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")
        
    return labels

def prepare_multiclass_dataset():
    if DEST_ROOT.exists():
        print(f"Dataset folder '{DEST_ROOT}' already exists. Delete it if you want to re-create it.")
        return

    try:
        source_root = find_source_root()
        print(f"Found source data at: {source_root}")
    except Exception as e:
        print(e)
        return

    for split in ["train", "test"]:
        src_path = source_root / split
        img_src = src_path / "images" if (src_path / "images").exists() else src_path / "image"
        mask_src = src_path / "masks" if (src_path / "masks").exists() else src_path / "mask"
        
        img_dst = DEST_ROOT / "images" / split
        lbl_dst = DEST_ROOT / "labels" / split
        
        img_dst.mkdir(parents=True, exist_ok=True)
        lbl_dst.mkdir(parents=True, exist_ok=True)
        
        images = list(img_src.glob("*.*"))
        valid_exts = {'.jpg', '.jpeg', '.png', '.bmp'}
        images = [x for x in images if x.suffix.lower() in valid_exts]
        
        # --- TQDM ADDED HERE ---
        skipped_count = 0
        for img_path in tqdm(images, desc=f"Processing {split}"):
            # 1. Determine Class
            class_id = get_class_id(img_path.name)
            
            if class_id is None:
                skipped_count += 1
                continue # Skip images where we can't determine the tumor type
            
            # 2. Find Mask
            possible_masks = [
                mask_src / img_path.name,
                mask_src / img_path.with_suffix('.png').name,
                mask_src / img_path.with_suffix('.jpg').name
            ]
            
            labels_written = False
            for mask_p in possible_masks:
                if mask_p.exists():
                    yolo_labels = convert_mask_to_yolo_box(mask_p, class_id)
                    if yolo_labels:
                        # Copy Image ONLY if we have a valid label
                        shutil.copy(img_path, img_dst / img_path.name)
                        
                        with open(lbl_dst / img_path.with_suffix('.txt').name, 'w') as f:
                            f.write('\n'.join(yolo_labels))
                        labels_written = True
                    break
            
        if skipped_count > 0:
            print(f"  Warning: Skipped {skipped_count} images in {split} (unknown class name in file).")

    # Create YAML config for Multiclass
    yaml_content = f"""
        path: {DEST_ROOT.resolve().as_posix()}
        train: images/train
        val: images/test
        test: images/test

        # Classes
        nc: 3
        names: ['glioma', 'meningioma', 'pituitary']
        """
    with open("data_multiclass.yaml", "w") as f:
        f.write(yaml_content)
    print("\n✅ Multiclass Dataset Prepared!")
    print(f"Config saved to: data_multiclass.yaml")

if __name__ == "__main__":
    prepare_multiclass_dataset()

Found source data at: d:\Data Adji\Binus\Semester 5\DeepLearning\Project\brisc2025\segmentation_task


Processing train: 100%|██████████| 3933/3933 [00:15<00:00, 261.32it/s]
Processing test: 100%|██████████| 860/860 [00:03<00:00, 277.61it/s]


✅ Multiclass Dataset Prepared!
Config saved to: data_multiclass.yaml





In [4]:
def train_custom_model():
    print(f"Using Device: {torch.cuda.get_device_name(0)}")
    
    # 1. Initialize your Custom Architecture
    # This reads the YAML we fixed (with the correct channel sizes)
    model = YOLO('yolo11n-bifpn.yaml') 

    # 2. Transfer Learning
    # We load standard YOLOv11n weights. 
    # The backbone will match. The new BiFPN layers will start with random weights.
    try:
        print("Loading pre-trained weights...")
        model.load('yolo11n.pt') 
    except Exception as e:
        print("Note: Partial weight loading is expected because architecture changed.")

    # 3. Start Training
    # VRAM NOTE: I only use batch=4 because my vram is 4GB. It is very tight for BiFPN+CBAM.
    model.train(
        data='data_multiclass.yaml',         
        epochs=50,               
        imgsz=640,               
        batch=4,                  
        device=0,                
        workers=2,                
        project='BrainTumor_BiFPN', 
        name='run1',              
        optimizer='AdamW',      
        lr0=0.001,                
        cos_lr=True,             
        amp=True,                 
        exist_ok=True      
    )
    
    print("Training Complete!")

if __name__ == '__main__':
    # This line is REQUIRED for Windows to prevent crashing
    torch.multiprocessing.freeze_support()
    train_custom_model()

Using Device: NVIDIA GeForce RTX 3050 Laptop GPU
Loading pre-trained weights...
Transferred 196/586 items from pretrained weights
New https://pypi.org/project/ultralytics/8.3.235 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.233  Python-3.11.14 torch-2.5.1 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=data_multiclass.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_rati

In [15]:
# 1. Load your best model
model = YOLO('BrainTumor_BiFPN/run1/weights/best.pt')

# 2. Get a random image from the test set
test_images = glob.glob('yolo_dataset_multiclass/images/test/*.jpg')
if not test_images:
    print("No images found in test folder!")
    exit()

# Pick 3 random images to test
for i in range(3):
    img_path = random.choice(test_images)
    
    # 3. Predict
    # conf=0.4 means "Only show me if you are 40% sure"
    results = model.predict(img_path, iou=0.4, conf=0.4, max_det=1, save=True) 

    # 4. Show Result
    for result in results:
        res_plotted = result.plot()
        cv2.imshow(f"Result {i+1}", res_plotted)

print("Press any key to close the image windows...")
cv2.waitKey(0)
cv2.destroyAllWindows()


image 1/1 d:\Data Adji\Binus\Semester 5\DeepLearning\Project\yolo_dataset_multiclass\images\test\brisc2025_test_00377_me_ax_t1.jpg: 640x640 1 meningioma, 112.7ms
Speed: 7.0ms preprocess, 112.7ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mD:\Data Adji\Binus\Semester 5\DeepLearning\Project\runs\detect\predict6[0m

image 1/1 d:\Data Adji\Binus\Semester 5\DeepLearning\Project\yolo_dataset_multiclass\images\test\brisc2025_test_00114_gl_co_t1.jpg: 640x640 1 glioma, 51.0ms
Speed: 6.0ms preprocess, 51.0ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mD:\Data Adji\Binus\Semester 5\DeepLearning\Project\runs\detect\predict6[0m

image 1/1 d:\Data Adji\Binus\Semester 5\DeepLearning\Project\yolo_dataset_multiclass\images\test\brisc2025_test_00238_gl_sa_t1.jpg: 640x640 1 glioma, 50.7ms
Speed: 5.5ms preprocess, 50.7ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mD:\Data Adji\B

In [None]:
# Load your trained weights
model = YOLO('BrainTumor_BiFPN/run1/weights/best.pt')

# Export to ONNX (opset=12 is most compatible for web)
# 'dynamic=False' helps keeping it simpler for basic inference
model.export(format='onnx', opset=12, dynamic=False)

print("Exporting model 'best.onnx' in weights folder.")