# Build 3 region model
https://github.com/Phyrise/nnUNet_translation/blob/master/notebooks/nnUNetv2_translation_tutorial_dualmod.ipynb

In [30]:
import os, glob, shutil, json
from pathlib import Path
import SimpleITK as sitk
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## config

In [31]:
TASK = 'Task1'
REGION = 'HN'
MODEL = 'res'
DATASET_ID = 312

config = {
    "region": REGION, 
    "dataset_id": DATASET_ID,
    "dataset_data_name": f"synthrad2025_task1_MR_{REGION}_{MODEL}",
    "dataset_target_name": f"synthrad2025_task1_CT_{REGION}_{MODEL}",
    "data_root": f"/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_{TASK}_Train/{TASK}/{REGION}", # include centreD
    "preprocessing_CT": "CT", 
    "preprocessing_MRI": "MR",
    "preprocessing_mask": "no mask",
    "fold": 0,
    "dataset_plan": "nnUNetPlannerResEnc(M/L/XL)",
    "configuration": "3d_fullres",
    "trainer": 'nnUNetTrainerMRCT'
}

In [32]:
# save config to a JSON file
config_path = Path(f"config_{config['dataset_id']}.json")
with open(config_path, 'w') as f:
    json.dump(config, f, indent=4)

In [33]:

data_root = config["data_root"]
os.environ["nnUNet_raw"] = "/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/raw"
os.environ["nnUNet_preprocessed"] = "/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed"
os.environ["nnUNet_results"] = "/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/results"

# example with 2 input modalities
list_data_mri = sorted(glob.glob(os.path.join(data_root, '**','mr.mha'), recursive=True))
# list_data_mask = sorted(glob.glob(os.path.join(data_root, '**','mask.mha'), recursive=True))
list_data_ct = sorted(glob.glob(os.path.join(data_root, '**','ct.mha'), recursive=True))


print("input1 ---", len(list_data_mri), list_data_mri)
# print("input2 ---", len(list_data_mask), list_data_mask)
print("target ---", len(list_data_ct), list_data_ct)

input1 --- 221 ['/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA001/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA004/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA006/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA008/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA010/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA012/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA013/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA014/mr.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1H

## Define dataset ID and make paths

In [34]:
dataset_id = config["dataset_id"]
dataset_data_name = config["dataset_data_name"]
dataset_target_name = config["dataset_target_name"]

# we will copy the datas
# do not use exist_ok=True, we want an error if the dataset exist already
dataset_data_path = os.path.join(os.environ['nnUNet_raw'], f'Dataset{dataset_id:03d}_{dataset_data_name}') 
os.makedirs(dataset_data_path, exist_ok = True)
os.makedirs(os.path.join(dataset_data_path, 'imagesTr'), exist_ok=True)
os.makedirs(os.path.join(dataset_data_path, 'labelsTr'), exist_ok = True)

dataset_target_path = os.path.join(os.environ['nnUNet_raw'], f'Dataset{dataset_id+1:03d}_{dataset_target_name}') 
os.makedirs(dataset_target_path, exist_ok = True)
os.makedirs(os.path.join(dataset_target_path, 'imagesTr'), exist_ok = True)
os.makedirs(os.path.join(dataset_target_path, 'labelsTr'), exist_ok = True)

## Copy files and create dummy masks

In [35]:


def process_file(data_path, dataset_path, modality_suffix="_0000"):
    curr_img = sitk.ReadImage(data_path)
    filename = data_path.split(os.sep)[-2]
    if not filename.endswith(f'{modality_suffix}.mha'):
        filename = filename + f'{modality_suffix}.mha'
    sitk.WriteImage(curr_img, os.path.join(dataset_path, f'imagesTr/{filename}'))

    data = sitk.GetArrayFromImage(curr_img)
    data = np.ones_like(data)

    filename = filename.replace(modality_suffix, '')  # Remove modality suffix for masks
    label_path = os.path.join(dataset_path, f'labelsTr/{filename}')
    if not os.path.exists(label_path):
        label_img = sitk.GetImageFromArray(data)
        label_img.SetDirection(curr_img.GetDirection())
        label_img.SetOrigin(curr_img.GetOrigin())
        label_img.SetSpacing(curr_img.GetSpacing())
        sitk.WriteImage(label_img, label_path)

# Use the affine from the last MRI as a placeholder, but for sitk we use spacing/origin/direction from the image itself

with ThreadPoolExecutor() as executor:
    list(tqdm(executor.map(lambda data_path: process_file(data_path, dataset_data_path, "_0000"), list_data_mri), total=len(list_data_mri)))

# with ThreadPoolExecutor() as executor:
#     list(tqdm(executor.map(lambda data_path: process_file(data_path, dataset_data_path, "_0001"), list_data_mask), total=len(list_data_mask)))

with ThreadPoolExecutor() as executor:
    list(tqdm(executor.map(lambda target_path: process_file(target_path, dataset_target_path), list_data_ct), total=len(list_data_ct)))


  0%|          | 0/221 [00:00<?, ?it/s]

 14%|█▍        | 32/221 [00:08<00:47,  3.96it/s]


KeyboardInterrupt: 

## Create dataset.json

In [36]:
# /!\ you will need to edit this with regards to the number of modalities used;
data_dataset_json = {
    "labels": {
        "label_001": "1", 
        "background": 0
    },
    "channel_names": {
        "0": config["preprocessing_MRI"],
        # "1": config["preprocessing_mask"],
        
    },
    "numTraining": len(list_data_mri),
    "file_ending": ".mha"
}
dump_data_datasets_path = os.path.join(dataset_data_path, 'dataset.json')
with open(dump_data_datasets_path, 'w') as f:
    json.dump(data_dataset_json, f)

target_dataset_json = {
    "labels": {
        "label_001": "1",
        "background": 0
    },
    "channel_names": {
        "0": config["preprocessing_CT"],
    },
    "numTraining": len(list_data_ct),
    "file_ending": ".mha"
}
dump_target_datasets_path = os.path.join(dataset_target_path, 'dataset.json')
with open(dump_target_datasets_path, 'w') as f:
    json.dump(target_dataset_json, f)

## Apply preprocessing and unpacking

In [37]:
if 'MPLBACKEND' in os.environ: 
    del os.environ['MPLBACKEND'] # avoid conflicts with matplotlib backend  

dataset_plan = config['dataset_plan']
configuration = config['configuration']
fold = config['fold']
    
os.system(f'nnUNetv2_plan_and_preprocess -d {dataset_id} -pl nnUNetPlannerResEncM')
os.system(f'nnUNetv2_unpack {dataset_id} {configuration} {fold} -p nnUNetResEncUNetMPlans')

os.system(f'nnUNetv2_plan_and_preprocess -d {dataset_id + 1} -pl nnUNetPlannerResEncM')
os.system(f'nnUNetv2_unpack {dataset_id + 1} {configuration} {fold} -p nnUNetResEncUNetMPlans')

Fingerprint extraction...
Dataset312_synthrad2025_task1_MR_HN_res
Experiment planning...
Using ZScoreNormalization for image normalization
Using ZScoreNormalization for image normalization
Attempting to find 3d_lowres config. 
Current spacing: [3.   1.03 1.03]. 
Current patch size: (56, 192, 160). 
Current median shape: [ 89.         287.37864078 270.87378641]
Using ZScoreNormalization for image normalization
Attempting to find 3d_lowres config. 
Current spacing: [3.     1.0609 1.0609]. 
Current patch size: (56, 192, 160). 
Current median shape: [ 89.         279.0083891  262.98425865]
Dropping 3d_lowres config because the image size difference to 3d_fullres is too small. 3d_fullres: [ 89. 296. 279.], 3d_lowres: [89, 279, 263]
Using ZScoreNormalization for image normalization
2D U-Net configuration:
{'data_identifier': 'nnUNetPlans_2d', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 32, 'patch_size': (320, 320), 'median_image_size_in_voxels': array([296., 279.]), 'spacing': 

100%|████████████████████████████████████████| 221/221 [01:12<00:00,  3.04it/s]


Configuration: 3d_fullres...


100%|████████████████████████████████████████| 221/221 [02:07<00:00,  1.73it/s]


Configuration: 3d_lowres...
INFO: Configuration 3d_lowres not found in plans file nnUNetResEncUNetMPlans.json of dataset Dataset312_synthrad2025_task1_MR_HN_res. Skipping.
Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

Fingerprint extraction...
Dataset313_synthrad2025_task1_CT_HN_res
Experiment planning...
Using CTNormalization for image normalization
Using CTNormalization for image normalization
Attempting to find 3d_lowres config. 
Current spacing: [3.   1.03 1.03]. 
Current patch size: (56, 192, 160). 
Current median shape: [ 89.         287.37864078 270.87378641]
Using CTNormalization for image normalization
Att

100%|████████████████████████████████████████| 221/221 [01:10<00:00,  3.12it/s]


Configuration: 3d_fullres...


100%|████████████████████████████████████████| 221/221 [02:11<00:00,  1.67it/s]


Configuration: 3d_lowres...
INFO: Configuration 3d_lowres not found in plans file nnUNetResEncUNetMPlans.json of dataset Dataset313_synthrad2025_task1_CT_HN_res. Skipping.
Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################



0

## Copy mask raw data to the preprocessed folder


In [38]:
import shutil
list_data_mask = sorted(glob.glob(os.path.join(data_root, '**','mask.mha'), recursive=True))
print("input2 ---", len(list_data_mask), list_data_mask)


def process_mask_file(data_path, dataset_mask_path):

    filename = data_path.split(os.sep)[-2]
    if not filename.endswith(f'.mha'):
        filename = filename + f'.mha'
    shutil.copy(data_path, os.path.join(dataset_mask_path, filename))


# Use the affine from the last MRI as a placeholder, but for sitk we use spacing/origin/direction from the image itself
dataset_mask_path = os.path.join(os.environ['nnUNet_preprocessed'], f'Dataset{dataset_id:03d}_{dataset_data_name}', 'masks')
os.makedirs(dataset_mask_path, exist_ok=True) 
with ThreadPoolExecutor() as executor:
    list(tqdm(executor.map(lambda data_path: process_mask_file(data_path, dataset_mask_path), list_data_mask), total=len(list_data_mask)))


input2 --- 221 ['/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA001/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA004/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA006/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA008/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA010/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA012/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA013/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_Train/Task1/HN/1HNA014/mask.mha', '/datasets/work/hb-synthrad2023/source/synthrad2025_data_v2/synthRAD2025_Task1_T

100%|██████████| 221/221 [00:00<00:00, 806.00it/s]


## Define 2nd modality raw data as gt_seg of 1st modality

In [39]:
nnunet_datas_preprocessed_dir = os.path.join(os.environ['nnUNet_preprocessed'], f'Dataset{dataset_id+1:03d}_{dataset_target_name}') 
nnunet_targets_preprocessed_dir = os.path.join(os.environ['nnUNet_preprocessed'], f'Dataset{dataset_id:03d}_{dataset_data_name}') 

list_targets = glob.glob(os.path.join(f"{dataset_target_path}/imagesTr", '*'))
list_targets.sort()
list_gt_segmentations_datas = glob.glob(os.path.join(f"{nnunet_targets_preprocessed_dir}/gt_segmentations", '*'))
list_gt_segmentations_datas.sort()

print(nnunet_targets_preprocessed_dir)

for (preprocessed_path, gt_path) in zip(list_targets, list_gt_segmentations_datas):
    # here, gt_path is the path to the gt_segmentation in nnUNet_preprocessed.
    print(preprocessed_path, "->", gt_path) # ensure correct file pairing; 
    shutil.copy(src = preprocessed_path, dst = gt_path) # we use shutil.copy to ensure safety, but switching to shutil.move would be more efficient

/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res
/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/raw/Dataset313_synthrad2025_task1_CT_HN_res/imagesTr/1HNA001_0000.mha -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/gt_segmentations/1HNA001.mha
/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/raw/Dataset313_synthrad2025_task1_CT_HN_res/imagesTr/1HNA004_0000.mha -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/gt_segmentations/1HNA004.mha
/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/raw/Dataset313_synthrad2025_task1_CT_HN_res/imagesTr/1HNA006_0000.mha -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_stru

## Define 2nd modality preprocessed files as ground truth of 1st modality


In [40]:
list_preprocessed_datas_seg_path = sorted(glob.glob(os.path.join(nnunet_targets_preprocessed_dir, 'nnUNetPlans_3d_fullres/*_seg.npy')))

list_preprocessed_targets_path = sorted(glob.glob(os.path.join(nnunet_datas_preprocessed_dir, 'nnUNetPlans_3d_fullres/*.npy')))
list_preprocessed_targets_path = [name for name in list_preprocessed_targets_path if '_seg' not in name]

for (datas_path, targets_path) in zip(list_preprocessed_datas_seg_path, list_preprocessed_targets_path):
    print(targets_path, "->", datas_path)
    shutil.copy(src = targets_path, dst = datas_path) 

/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset313_synthrad2025_task1_CT_HN_res/nnUNetPlans_3d_fullres/1HNA001.npy -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/nnUNetPlans_3d_fullres/1HNA001_seg.npy
/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset313_synthrad2025_task1_CT_HN_res/nnUNetPlans_3d_fullres/1HNA004.npy -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/nnUNetPlans_3d_fullres/1HNA004_seg.npy
/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset313_synthrad2025_task1_CT_HN_res/nnUNetPlans_3d_fullres/1HNA006.npy -> /datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/nnUN

You should be able to start training with :
```
export nnUNet_raw="/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/raw"
export nnUNet_preprocessed="/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed"
export nnUNet_results="/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/results"
```
nnUNetv2_train 200 3d_fullres 0 -tr nnUNetTrainerMRCT


In [41]:
# move split file
baseline_split = f'{os.environ["nnUNet_preprocessed"]}/Dataset302_synthrad2025_task1_MR_HN/splits_final.json'
dest_split = os.path.join(os.environ["nnUNet_preprocessed"], f'Dataset{dataset_id:03d}_{dataset_data_name}', "splits_final.json")
shutil.copy(baseline_split, dest_split)

'/datasets/work/hb-synthrad2023/work/synthrad2025/bw_workplace/data/nnunet_struct/preprocessed/Dataset312_synthrad2025_task1_MR_HN_res/splits_final.json'

In [44]:
trainer = config['trainer']
train_command = f'nnUNetv2_train {dataset_id} {configuration} {fold} -tr {trainer} -p nnUNetResEncUNetMPlans'
print(train_command)
# os.system(train_commend)


nnUNetv2_train 312 3d_fullres 0 -tr nnUNetTrainerMRCT -p nnUNetResEncUNetMPlans
