In [1]:
import sys
sys.path.append('..')

import os
import numpy as np
from sklearn.model_selection import train_test_split
from src.dataset import BrainMRIDataset

# Seting data path
DATA_PATH = '../data/mri-segmentation/kaggle_3m/'

In [2]:
# Get all patient IDs
dataset = BrainMRIDataset(data_path=DATA_PATH)
patient_ids = list(set([os.path.basename(os.path.dirname(path)) for path in dataset.image_paths]))

# Split patients
train_patients, test_patients = train_test_split(patient_ids, test_size=0.15, random_state=42)
train_patients, val_patients = train_test_split(train_patients, test_size=0.176, random_state=42)

# Create filtered datasets
train_dataset = BrainMRIDataset(DATA_PATH, patient_list=train_patients)
val_dataset = BrainMRIDataset(DATA_PATH, patient_list=val_patients)
test_dataset = BrainMRIDataset(DATA_PATH, patient_list=test_patients)

print(f"Train dataset: {len(train_dataset)} slices")
print(f"Val dataset: {len(val_dataset)} slices")
print(f"Test dataset: {len(test_dataset)} slices")

Found 3929 image-mask pairs
Found 2770 image-mask pairs
Found 565 image-mask pairs
Found 594 image-mask pairs
Train dataset: 2770 slices
Val dataset: 565 slices
Test dataset: 594 slices


In [3]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Training augmentations
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Affine(scale=(0.9, 1.1), translate_percent=(-0.1, 0.1), rotate=(-15, 15), p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Validation/Test augmentations
val_transform = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

print("Transforms defined!")

Transforms defined!


In [4]:
# Recreate datasets with transforms
train_dataset = BrainMRIDataset(DATA_PATH, transform=train_transform, patient_list=train_patients)
val_dataset = BrainMRIDataset(DATA_PATH, transform=val_transform, patient_list=val_patients)
test_dataset = BrainMRIDataset(DATA_PATH, transform=val_transform, patient_list=test_patients)

print("Datasets created with transforms!")
print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

Found 2770 image-mask pairs
Found 565 image-mask pairs
Found 594 image-mask pairs
Datasets created with transforms!
Train: 2770 | Val: 565 | Test: 594


In [5]:
import torch

# Get one sample
image, mask = train_dataset[0]

print(f"Image type: {type(image)}")
print(f"Image shape: {image.shape}")  # Should be (3, 256, 256) - PyTorch format
print(f"Mask shape: {mask.shape}")
print(f"Image dtype: {image.dtype}")
print(f"Image range: [{image.min():.2f}, {image.max():.2f}]")  # Should be normalized

Image type: <class 'torch.Tensor'>
Image shape: torch.Size([3, 256, 256])
Mask shape: torch.Size([256, 256])
Image dtype: torch.float32
Image range: [-2.12, 1.56]


In [6]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

Train batches: 174
Val batches: 36
Test batches: 38


In [7]:
images, masks = next(iter(train_loader))

print(f"Batch images shape: {images.shape}")  # Should be (16, 3, 256, 256)
print(f"Batch masks shape: {masks.shape}")    # Should be (16, 256, 256)
print(f"Images dtype: {images.dtype}")
print(f"Masks dtype: {masks.dtype}")

Batch images shape: torch.Size([16, 3, 256, 256])
Batch masks shape: torch.Size([16, 256, 256])
Images dtype: torch.float32
Masks dtype: torch.float32
