In [1]:
# Robust conversion: use Excel 'split' column exactly, clear output dirs first,
# save .nii only, avoid duplicates, and verify counts at the end.

import os, json, shutil
import nibabel as nib
import pandas as pd

# nibabel canonical orientation compatibility
try:
    from nibabel.processing import as_closest_canonical
except Exception:
    def as_closest_canonical(img): return img

# === USER CONFIG ===
DATA_ROOT = '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed'
PARTICIPANTS_XLSX = '/kaggle/input/participants/participants.xlsx'
OUT_ROOT = '/kaggle/working/nnUNet_raw_data/Task001_BonnFCD'
T1_PATTERNS = ['_T1w.nii', '_T1w.nii.gz', '_T1.nii', '_T1.nii.gz', 'T1w.nii', 'T1w.nii.gz']
MASK_PATTERNS = ['roi', 'lesion']   # adapt if masks have different names

# === OUTPUT FOLDERS (will be cleared) ===
imagesTr_dir = os.path.join(OUT_ROOT, 'imagesTr')
imagesTs_dir = os.path.join(OUT_ROOT, 'imagesTs')
labelsTr_dir = os.path.join(OUT_ROOT, 'labelsTr')

# Clear any previous outputs to avoid duplicates
if os.path.exists(OUT_ROOT):
    shutil.rmtree(OUT_ROOT)
os.makedirs(imagesTr_dir, exist_ok=True)
os.makedirs(imagesTs_dir, exist_ok=True)
os.makedirs(labelsTr_dir, exist_ok=True)

# === Helpers ===
def is_t1(filename):
    return any(filename.endswith(p) for p in T1_PATTERNS)

def is_mask(filename):
    fn = filename.lower()
    return any(p in fn for p in MASK_PATTERNS) and fn.endswith('.nii')

def normalize_id(s):
    """Normalize participant id into plain integer-like string: sub-00001 -> 1, 0001 -> 1, 1 -> 1."""
    if s is None:
        return None
    s = str(s).strip()
    # remove 'sub-' prefix if present
    if s.lower().startswith('sub-'):
        s = s[4:]
    # strip leading non-digits
    s = s.lstrip('0') or '0'
    # If still contains non-digit, extract numeric part
    digits = ''.join(ch for ch in s if ch.isdigit())
    return digits.lstrip('0') or '0'

def scan_subject(sub_path):
    anat = os.path.join(sub_path, 'anat')
    if not os.path.isdir(anat):
        return None
    files = sorted(os.listdir(anat))
    t1_path = None
    mask_path = None
    for f in files:
        if is_t1(f):
            t1_path = os.path.join(anat, f)
        if is_mask(f):
            mask_path = os.path.join(anat, f)
    if t1_path is None:
        return None
    case_name = os.path.basename(sub_path)  # e.g., 'sub-00001'
    norm = normalize_id(case_name)
    return {'case': case_name, 'case_norm': norm, 't1': t1_path, 'mask': mask_path}

# === Read Excel and build split map ===
df = pd.read_excel(PARTICIPANTS_XLSX)
if 'participant_id' not in df.columns or 'split' not in df.columns:
    raise ValueError("Excel must contain 'participant_id' and 'split' columns.")

df['participant_id'] = df['participant_id'].astype(str).str.strip()
df['split'] = df['split'].astype(str).str.strip().str.lower()

split_map = {}
for _, row in df.iterrows():
    pid = row['participant_id']
    split = row['split']
    split_map[normalize_id(pid)] = split  # normalized key

# === Scan dataset to find subjects ===
subs = [os.path.join(DATA_ROOT, d) for d in sorted(os.listdir(DATA_ROOT)) if d.startswith('sub-')]
cases_all = []
for s in subs:
    c = scan_subject(s)
    if c is not None:
        cases_all.append(c)

print(f"Found {len(cases_all)} subjects (with T1). Example entries:\n", cases_all[:6])

# === Filter to only those with masks ===
cases_with_masks = [c for c in cases_all if c['mask'] is not None]
print(f"Filtered to {len(cases_with_masks)} subjects that have lesion masks.")

# === Assign to train/test using Excel split_map ===
train_cases = []
test_cases = []
unmatched = []
for c in cases_with_masks:
    key = c['case_norm']  # normalized id
    if key in split_map:
        s = split_map[key]
        if s == 'train':
            train_cases.append(c)
        elif s == 'test':
            test_cases.append(c)
        else:
            # if split column contains other labels, treat as unmatched
            unmatched.append((c, f"invalid split value: {s}"))
    else:
        unmatched.append((c, "not present in Excel split"))

print(f"Assigned from Excel -> Train: {len(train_cases)} | Test: {len(test_cases)} | Unmatched: {len(unmatched)}")

# Report unmatched cases (if any)
if unmatched:
    print("Examples of unmatched (will be skipped):")
    for c, reason in unmatched[:10]:
        print(f"  {c['case']}  normalized={c['case_norm']}  reason={reason}")

# === Sanity: ensure no overlap (shouldn't be) ===
train_ids = set([c['case'] for c in train_cases])
test_ids = set([c['case'] for c in test_cases])
overlap = train_ids.intersection(test_ids)
if overlap:
    raise RuntimeError(f"Overlap between train and test assignments: {overlap}")

# === Utility: save nib image as uncompressed .nii only ===
def save_as_nifti_uncompressed(imgobj, out_path_nii):
    # Remove any old files
    if os.path.exists(out_path_nii + '.gz'):
        os.remove(out_path_nii + '.gz')
    if os.path.exists(out_path_nii):
        os.remove(out_path_nii)
    nib.save(imgobj, out_path_nii)

# === Write files: imagesTr, imagesTs, labelsTr (labels only for train) ===
def write_case(case_dict, in_train):
    case = case_dict['case']
    t1_src = case_dict['t1']
    mask_src = case_dict['mask']

    img = nib.load(t1_src)
    img = as_closest_canonical(img)

    img_dst = os.path.join(imagesTr_dir if in_train else imagesTs_dir, f"{case}_0000.nii")
    save_as_nifti_uncompressed(img, img_dst)

    if in_train:
        if mask_src is None:
            # Shouldn't happen because we filtered, but guard anyway
            print(f"Warning: trying to write label for {case} but no mask file found.")
        else:
            lbl = nib.load(mask_src)
            lbl = as_closest_canonical(lbl)
            lbl_dst = os.path.join(labelsTr_dir, f"{case}.nii")
            save_as_nifti_uncompressed(lbl, lbl_dst)

# write train files
for c in train_cases:
    write_case(c, in_train=True)
# write test images (no labels)
for c in test_cases:
    write_case(c, in_train=False)

# === Final cleanup safety: remove any .nii.gz leftovers (should be none) ===
for root, dirs, files in os.walk(OUT_ROOT):
    for fn in files:
        if fn.endswith('.nii.gz'):
            try:
                os.remove(os.path.join(root, fn))
            except Exception:
                pass

# === Create dataset.json ===
dataset_json = {
    "name": "BonnFCD_T1_only",
    "description": "Bonn FCD-II converted for nnU-Net (T1 only, Excel-defined split).",
    "tensorImageSize": "3D",
    "reference": "Bonn FCD dataset",
    "licence": "see data provider",
    "release": "1.0",
    "modality": {"0": "T1"},
    "labels": {"0": "background", "1": "lesion"},
    "numTraining": len(train_cases),
    "numTest": len(test_cases),
    "training": [{"image": f"imagesTr/{c['case']}_0000.nii", "label": f"labelsTr/{c['case']}.nii"} for c in train_cases],
    "test": [f"imagesTs/{c['case']}_0000.nii" for c in test_cases]
}

with open(os.path.join(OUT_ROOT, 'dataset.json'), 'w') as f:
    json.dump(dataset_json, f, indent=2)

# === Verification summary ===
print("\n=== Verification summary ===")
print("Excel train count (from split_map):", sum(1 for v in split_map.values() if v=='train'))
print("Excel test count (from split_map):", sum(1 for v in split_map.values() if v=='test'))
print("Found subjects with masks:", len(cases_with_masks))
print("Assigned train_cases:", len(train_cases))
print("Assigned test_cases:", len(test_cases))
print("Files in imagesTr:", len(os.listdir(imagesTr_dir)))
print("Files in imagesTs:", len(os.listdir(imagesTs_dir)))
print("Files in labelsTr:", len(os.listdir(labelsTr_dir)))
print("dataset.json saved at:", os.path.join(OUT_ROOT, 'dataset.json'))

# show first 10 train/test filenames for quick spot-check
print("\nFirst 10 imagesTr:", sorted(os.listdir(imagesTr_dir))[:10])
print("First 10 imagesTs:", sorted(os.listdir(imagesTs_dir))[:10])
print("First 10 labelsTr:", sorted(os.listdir(labelsTr_dir))[:10])

Found 170 subjects (with T1). Example entries:
 [{'case': 'sub-00001', 'case_norm': '1', 't1': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00001/anat/sub-00001_acq-iso08_T1w.nii', 'mask': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00001/anat/sub-00001_acq-T2sel_FLAIR_roi.nii'}, {'case': 'sub-00002', 'case_norm': '2', 't1': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00002/anat/sub-00002_acq-iso08_T1w.nii', 'mask': None}, {'case': 'sub-00003', 'case_norm': '3', 't1': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00003/anat/sub-00003_acq-iso08_T1w.nii', 'mask': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00003/anat/sub-00003_acq-T2sel_FLAIR_roi.nii'}, {'case': 'sub-00004', 'case_norm': '4', 't1': '/kaggle/input/organized-bonn-fcd-ii-epilepsy-mri-dataset/bonn_fcd_fixed/sub-00004/anat/sub-00004_acq-iso08_T1w.nii', 'mask': '/kagg

In [2]:
!pip install nnunetv2

Collecting nnunetv2
  Downloading nnunetv2-2.6.2.tar.gz (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting acvl-utils<0.3,>=0.2.3 (from nnunetv2)
  Downloading acvl_utils-0.2.5.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dynamic-network-architectures<0.5,>=0.4.1 (from nnunetv2)
  Downloading dynamic_network_architectures-0.4.2.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting batchgenerators>=0.25.1 (from nnunetv2)
  Downloading batchgenerators-0.25.1.tar.gz (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Col

In [3]:
import os

os.environ["nnUNet_raw"] = "/kaggle/working/nnUNet_raw_data"
os.environ["nnUNet_preprocessed"] = "/kaggle/working/nnUNet_preprocessed"
os.environ["nnUNet_results"] = "/kaggle/working/nnUNet_results"

# confirm they’re set correctly
for key in ["nnUNet_raw", "nnUNet_preprocessed", "nnUNet_results"]:
    print(f"{key}: {os.environ.get(key)}")

nnUNet_raw: /kaggle/working/nnUNet_raw_data
nnUNet_preprocessed: /kaggle/working/nnUNet_preprocessed
nnUNet_results: /kaggle/working/nnUNet_results


In [4]:
mv /kaggle/working/nnUNet_raw_data/Task001_BonnFCD /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD

In [5]:
import os

!ls -R /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD | head -n 20

/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD:
dataset.json
imagesTr
imagesTs
labelsTr

/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/imagesTr:
sub-00001_0000.nii
sub-00003_0000.nii
sub-00010_0000.nii
sub-00014_0000.nii
sub-00015_0000.nii
sub-00016_0000.nii
sub-00018_0000.nii
sub-00024_0000.nii
sub-00027_0000.nii
sub-00033_0000.nii
sub-00038_0000.nii
sub-00040_0000.nii
sub-00044_0000.nii


In [6]:
import json
import os

dataset_path = "/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/dataset.json"

dataset_v2 = {
    "dataset_name": "BonnFCD",
    "description": "Bonn FCD II Epilepsy MRI dataset (T1w)",
    "reference": "Bonn University Hospital",
    "licence": "CC BY 4.0",
    "release": "1.0",
    "channel_names": {
        "0": "T1w"
    },
    "labels": {
        "background": 0,
        "lesion": 1
    },
    "numTraining": len(os.listdir("/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/imagesTr")),
    "file_ending": ".nii",
    "overwrite_image_reader_writer": "SimpleITKIO"
}

with open(dataset_path, "w") as f:
    json.dump(dataset_v2, f, indent=4)

print("✅ dataset.json rewritten for nnUNetv2.")
!cat /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/dataset.json

✅ dataset.json rewritten for nnUNetv2.
{
    "dataset_name": "BonnFCD",
    "description": "Bonn FCD II Epilepsy MRI dataset (T1w)",
    "reference": "Bonn University Hospital",
    "licence": "CC BY 4.0",
    "release": "1.0",
    "channel_names": {
        "0": "T1w"
    },
    "labels": {
        "background": 0,
        "lesion": 1
    },
    "numTraining": 57,
    "file_ending": ".nii",
    "overwrite_image_reader_writer": "SimpleITKIO"
}

In [7]:
import nibabel as nib
import numpy as np
import os
from nilearn.image import resample_to_img

# Set your dataset paths
img_dir = "/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/imagesTr"
seg_dir = "/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr"
output_seg_dir = "/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr_fixed"

os.makedirs(output_seg_dir, exist_ok=True)

# Loop over all subjects
for fname in os.listdir(img_dir):
    if fname.endswith("_0000.nii") or fname.endswith("_0000.nii.gz"):
        img_path = os.path.join(img_dir, fname)
        seg_name = fname.replace("_0000.nii", ".nii").replace("_0000.nii.gz", ".nii.gz")
        seg_path = os.path.join(seg_dir, seg_name)

        if not os.path.exists(seg_path):
            print(f"⚠️ Missing label for {fname}")
            continue

        img = nib.load(img_path)
        seg = nib.load(seg_path)

        # Resample segmentation to match image geometry
        seg_resampled = resample_to_img(seg, img, interpolation='nearest')  # preserve labels

        # Save new segmentation
        out_path = os.path.join(output_seg_dir, seg_name)
        nib.save(seg_resampled, out_path)
        print(f"✅ Fixed {seg_name}")

✅ Fixed sub-00133.nii
✅ Fixed sub-00027.nii
✅ Fixed sub-00116.nii
✅ Fixed sub-00098.nii
✅ Fixed sub-00059.nii
✅ Fixed sub-00072.nii
✅ Fixed sub-00068.nii
✅ Fixed sub-00018.nii
✅ Fixed sub-00010.nii
✅ Fixed sub-00123.nii
✅ Fixed sub-00122.nii
✅ Fixed sub-00140.nii
✅ Fixed sub-00015.nii
✅ Fixed sub-00091.nii
✅ Fixed sub-00146.nii
✅ Fixed sub-00073.nii
✅ Fixed sub-00076.nii
✅ Fixed sub-00014.nii
✅ Fixed sub-00097.nii
✅ Fixed sub-00053.nii
✅ Fixed sub-00047.nii
✅ Fixed sub-00109.nii
✅ Fixed sub-00087.nii
✅ Fixed sub-00120.nii
✅ Fixed sub-00044.nii
✅ Fixed sub-00077.nii
✅ Fixed sub-00080.nii
✅ Fixed sub-00115.nii
✅ Fixed sub-00071.nii
✅ Fixed sub-00038.nii
✅ Fixed sub-00101.nii
✅ Fixed sub-00130.nii
✅ Fixed sub-00040.nii
✅ Fixed sub-00141.nii
✅ Fixed sub-00138.nii
✅ Fixed sub-00081.nii
✅ Fixed sub-00090.nii
✅ Fixed sub-00139.nii
✅ Fixed sub-00105.nii
✅ Fixed sub-00063.nii
✅ Fixed sub-00058.nii
✅ Fixed sub-00131.nii
✅ Fixed sub-00024.nii
✅ Fixed sub-00112.nii
✅ Fixed sub-00089.nii
✅ Fixed su

In [8]:
!mv /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr_fixed \
   /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr

In [9]:
!rm -rf /kaggle/working/nnUNet_preprocessed/Dataset001_BonnFCD

In [10]:
import os
import nibabel as nib
import numpy as np
from nibabel.processing import resample_from_to
from tqdm import tqdm

root = "/kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD"
imagesTr = os.path.join(root, "imagesTr")
labelsTr = os.path.join(root, "labelsTr")
fixed_labels = os.path.join(root, "labelsTr_fixed")
os.makedirs(fixed_labels, exist_ok=True)

for fname in tqdm(os.listdir(labelsTr)):
    if not fname.endswith(".nii"):
        continue

    case_id = fname.replace(".nii", "")
    img_path = os.path.join(imagesTr, f"{case_id}_0000.nii")
    seg_path = os.path.join(labelsTr, fname)
    fixed_path = os.path.join(fixed_labels, fname)

    if not os.path.exists(img_path):
        print(f"⚠️ Skipping {case_id}: no matching T1 image found.")
        continue

    # Load image and mask
    img = nib.load(img_path)
    seg = nib.load(seg_path)

    # Resample mask to match image geometry
    seg_resampled = resample_from_to(seg, img, order=0)  # order=0 keeps mask labels discrete

    # Save corrected mask
    nib.save(seg_resampled, fixed_path)

print("✅ All masks resampled to match their corresponding T1 images.")

100%|██████████| 58/58 [02:12<00:00,  2.29s/it]

✅ All masks resampled to match their corresponding T1 images.





In [11]:
!rm -rf /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr

In [12]:
!mv /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr_fixed \
   /kaggle/working/nnUNet_raw_data/Dataset001_BonnFCD/labelsTr

In [13]:
!rm -rf /kaggle/working/nnUNet_preprocessed/Dataset001_BonnFCD

In [14]:
!nnUNetv2_plan_and_preprocess -d 001 --verify_dataset_integrity

Fingerprint extraction...
Dataset001_BonnFCD
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> reader/writer

####################
verify_dataset_integrity Done. 
If you didn't see any error messages then your dataset is most likely OK!
####################

Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> reader/writer
100%|███████████████████████████████████████████| 57/57 [00:33<00:00,  1.71it/s]
Experiment planning...

############################
INFO: You are using the old nnU-Net default planner. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Attempting to find 3d_lowres config. 
Current spacing: [1.03 1.03 1.03]. 
Current patch size: (160, 160, 96). 
Current median shape: [247.57281553 247.57281553 155.33980583]
Dropping 3d_lowres config because the image size differenc