
```
/kaggle/input/dataset002-bonnfcd-flair/
  ├─ nnUNet_preprocessed/
  │   └─ Dataset002_BonnFCD/
  └─ nnUNet_raw_data/
      └─ Dataset002_BonnFCD/
          ├─ imagesTr
          ├─ imagesTs
          └─ labelsTr
```

We **copy** RAW (and, if present, PREPROCESSED) into `/kaggle/working` so nnU-Net can train and write results.
If you want to **force re-planning**, set `FORCE_REPLAN=True` in the config cell.


# 0) Install & configure

In [None]:
!pip install nnunetv2

Collecting nnunetv2
  Downloading nnunetv2-2.6.2.tar.gz (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting acvl-utils<0.3,>=0.2.3 (from nnunetv2)
  Downloading acvl_utils-0.2.5.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dynamic-network-architectures<0.5,>=0.4.1 (from nnunetv2)
  Downloading dynamic_network_architectures-0.4.2.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting batchgenerators>=0.25.1 (from nnunetv2)
  Downloading batchgenerators-0.25.1.tar.gz (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
C

In [None]:
import os, shutil
from pathlib import Path

# ---- Adjust these if your dataset folder name changes ----
INPUT_ROOT = Path("/kaggle/input/dataset002-bonnfcd-flair")
RAW_SRC    = INPUT_ROOT / "nnUNet_raw_data"
PP_SRC     = INPUT_ROOT / "nnUNet_preprocessed"
DATASET_ID = 2
DATASET_NAME = "BonnFCD"
DATASET_DIR = f"Dataset{DATASET_ID:03d}_{DATASET_NAME}"

# Where nnU-Net expects to READ/WRITE
os.environ["nnUNet_raw"]          = "/kaggle/working/nnUNet_raw_data"
os.environ["nnUNet_preprocessed"] = "/kaggle/working/nnUNet_preprocessed"
os.environ["nnUNet_results"]      = "/kaggle/working/nnUNet_results"

RAW_DST = Path(os.environ["nnUNet_raw"])
PP_DST  = Path(os.environ["nnUNet_preprocessed"])
RES_DST = Path(os.environ["nnUNet_results"])

FORCE_REPLAN = False   # Set True to ignore provided preprocessed and re-run planning

print("INPUT_ROOT:", INPUT_ROOT)
print("RAW_SRC   :", RAW_SRC / DATASET_DIR)
print("PP_SRC    :", PP_SRC / DATASET_DIR)
print("RAW_DST   :", RAW_DST)
print("PP_DST    :", PP_DST)
print("RES_DST   :", RES_DST)
print("DATASET_DIR:", DATASET_DIR)

INPUT_ROOT: /kaggle/input/dataset002-bonnfcd-flair
RAW_SRC   : /kaggle/input/dataset002-bonnfcd-flair/nnUNet_raw_data/Dataset002_BonnFCD
PP_SRC    : /kaggle/input/dataset002-bonnfcd-flair/nnUNet_preprocessed/Dataset002_BonnFCD
RAW_DST   : /kaggle/working/nnUNet_raw_data
PP_DST    : /kaggle/working/nnUNet_preprocessed
RES_DST   : /kaggle/working/nnUNet_results
DATASET_DIR: Dataset002_BonnFCD


# 1) Copy RAW (and optionally PREPROCESSED) to working


In [None]:
from pathlib import Path
import shutil, os

RAW_DST.mkdir(parents=True, exist_ok=True)
PP_DST.mkdir(parents=True, exist_ok=True)
Path(os.environ["nnUNet_results"]).mkdir(parents=True, exist_ok=True)

# Copy RAW if not already copied
src = (Path("/kaggle/input/dataset002-bonnfcd-flair") / "nnUNet_raw_data" / DATASET_DIR)
dst = RAW_DST / DATASET_DIR
if not dst.exists():
    print("Copying RAW dataset to working directory...")
    shutil.copytree(src, dst)
else:
    print("RAW already present at:", dst)

# Copy PREPROCESSED if available & not forcing replan
pp_src_ds = (Path("/kaggle/input/dataset002-bonnfcd-flair") / "nnUNet_preprocessed" / DATASET_DIR)
pp_dst_ds = PP_DST / DATASET_DIR
if pp_src_ds.exists() and not FORCE_REPLAN:
    if not pp_dst_ds.exists():
        print("Copying PREPROCESSED to working directory...")
        shutil.copytree(pp_src_ds, pp_dst_ds)
    else:
        print("PREPROCESSED already present at:", pp_dst_ds)
else:
    if FORCE_REPLAN:
        print("FORCE_REPLAN=True → will run plan_and_preprocess.")
    else:
        print("No preprocessed input found; will run plan_and_preprocess.")

Copying RAW dataset to working directory...
Copying PREPROCESSED to working directory...


In [None]:
from pathlib import Path
PREP_ROOT = os.environ["nnUNet_preprocessed"]            # should be .../nnUNet_preprocessed
PP = Path(PREP_ROOT) / "Dataset002_BonnFCD"

print("PREP DS:", PP)
print("exists:", PP.exists())
print("has 3d_fullres plans folder:", (PP/"nnUNetPlans_3d_fullres").exists())
print("has fingerprint:", (PP/"dataset_fingerprint.json").exists())

# rough count of processed files
count_any = sum(1 for _ in PP.rglob("*")) if PP.exists() else 0
print("preprocessed files (rough count):", count_any)


PREP DS: /kaggle/working/nnUNet_preprocessed/Dataset002_BonnFCD
exists: True
has 3d_fullres plans folder: True
has fingerprint: True
preprocessed files (rough count): 405


# 2) Preflight checks (structure + dataset.json + geometry)

In [None]:
import json, nibabel as nib

DS_ROOT = Path(os.environ["nnUNet_raw"]) / DATASET_DIR
assert DS_ROOT.exists(), f"Dataset not found at {DS_ROOT}"

for folder in ["imagesTr","labelsTr"]:
    p = DS_ROOT/folder
    assert p.exists() and p.is_dir(), f"Missing folder: {p}"
imagesTs_exists = (DS_ROOT/"imagesTs").exists() and (DS_ROOT/"imagesTs").is_dir()
print("imagesTs:", "OK" if imagesTs_exists else "Not found (you can still train)")

dj_path = DS_ROOT/"dataset.json"
assert dj_path.exists(), "dataset.json is missing!"
with open(dj_path) as f:
    dj = json.load(f)

print("channel_names:", dj.get("channel_names"))
print("labels:", dj.get("labels"))
print("file_ending:", dj.get("file_ending"))
assert dj.get("file_ending") in [".nii", ".nii.gz"], "file_ending must be .nii or .nii.gz"

# Check 1:1 pairing for first 5
labels = sorted((DS_ROOT/"labelsTr").glob("*.nii*"))
missing = []
for lbl in labels:
    stem = lbl.name.replace(".nii.gz","").replace(".nii","")
    img0 = DS_ROOT/"imagesTr"/f"{stem}_0000.nii"
    img1 = DS_ROOT/"imagesTr"/f"{stem}_0000.nii.gz"
    if not img0.exists() and not img1.exists():
        missing.append(lbl.name)
assert not missing, f"Missing images for labels: {missing[:10]}"

# Geometry spot-check
checked = 0
for lbl in labels[:5]:
    stem = lbl.name.replace(".nii.gz","").replace(".nii","")
    img_path = DS_ROOT/"imagesTr"/f"{stem}_0000.nii"
    if not img_path.exists():
        img_path = DS_ROOT/"imagesTr"/f"{stem}_0000.nii.gz"
    img = nib.load(str(img_path))
    seg = nib.load(str(lbl))
    assert img.shape == seg.shape, f"Shape mismatch for {stem}: {img.shape} vs {seg.shape}"
    checked += 1

print(f"✅ Preflight OK. Checked {checked} sample pairs.")
print("imagesTr count:", len(list((DS_ROOT/'imagesTr').glob('*.nii*'))))
print("labelsTr count:", len(list((DS_ROOT/'labelsTr').glob('*.nii*'))))
if imagesTs_exists:
    print("imagesTs count:", len(list((DS_ROOT/'imagesTs').glob('*.nii*'))))

imagesTs: OK
channel_names: {'0': 'FLAIR'}
labels: {'background': 0, 'lesion': 1}
file_ending: .nii
✅ Preflight OK. Checked 5 sample pairs.
imagesTr count: 57
labelsTr count: 57
imagesTs count: 28


# 3) Plan & preprocess (only if needed)

In [None]:
from pathlib import Path
need_replan = FORCE_REPLAN or not (Path(os.environ["nnUNet_preprocessed"]) / DATASET_DIR).exists()

if need_replan:
    print("Running plan_and_preprocess...")
    !nnUNetv2_plan_and_preprocess -d "$(printf '%03d' $DATASET_ID)" --verify_dataset_integrity
else:
    print("Skipping plan_and_preprocess (preprocessed data already present).")

Skipping plan_and_preprocess (preprocessed data already present).


In [None]:
# Make sure every child process sees these (works in Kaggle/Colab)
%env NNUNET_USE_COMPILE=0
%env TORCHDYNAMO_DISABLE=1
%env TORCH_COMPILE_DISABLE=1   # extra belt-and-suspenders


env: NNUNET_USE_COMPILE=0
env: TORCHDYNAMO_DISABLE=1
env: TORCH_COMPILE_DISABLE=1   # extra belt-and-suspenders


# 4) Train (choose one)

In [None]:
# Option A: Train ALL folds
!nnUNetv2_train "$(printf '%03d' $DATASET_ID)" 3d_fullres all

# Option B: Train single fold
FOLD=0
# !nnUNetv2_train "$(printf '%03d' $DATASET_ID)" 3d_fullres $FOLD


############################
INFO: You are using the old nnU-Net default plans. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

2025-10-17 12:04:44.944687: Using torch.compile...
2025-10-17 12:04:44.960479: do_dummy_2d_data_aug: False
using pin_memory on device 0
using pin_memory on device 0

This is the configuration used by this training:
Configuration name: 3d_fullres
 {'data_identifier': 'nnUNetPlans_3d_fullres', 'pr