this notebook intends to help you create the dataset for the nnunet_translation model 

In [2]:
import os
import glob
import numpy as np
import nibabel as nib
import json
import shutil

# 
data_dir = '<path_to_data>'
target_dir = '<path_to_target>'

os.environ['nnUNet_results'] = '<>'
os.environ['nnUNet_raw'] = '<>'
os.environ['nnUNet_preprocessed'] = '<>'

# example with 2 input modalities
list_datas1 = sorted(glob.glob(os.path.join(data_dir, '*voided.nii.gz')))
list_datas2 = sorted(glob.glob(os.path.join(data_dir, '*mask.nii.gz')))
list_targets = sorted(glob.glob(glob.glob(os.path.join(data_dir, '*target.nii.gz'))))

In [4]:
dataset_id = 50 # /!\ we will use both the dataset_id and the dataset_id + 1 
dataset_data_name = 'image_inpainting_brats_2024_voided_with_masks'
dataset_target_name = 'image_inpainting_brats_2024_targets'

# we will copy the datas
# do not use exist_ok=True, we want an error if the dataset exist already
dataset_data_path = os.path.join(os.environ['nnUNet_raw'], f'Dataset{dataset_id:03d}_{dataset_data_name}') 
os.makedirs(dataset_data_path, exist_ok = True)
os.makedirs(os.path.join(dataset_data_path, 'imagesTr'), exist_ok=True)
os.makedirs(os.path.join(dataset_data_path, 'labelsTr'), exist_ok = True)

dataset_target_path = os.path.join(os.environ['nnUNet_raw'], f'Dataset{dataset_id+1:03d}_{dataset_target_name}') 
os.makedirs(dataset_target_path, exist_ok = True)
os.makedirs(os.path.join(dataset_target_path, 'imagesTr'), exist_ok = True)
os.makedirs(os.path.join(dataset_target_path, 'labelsTr'), exist_ok = True)

In [5]:
# we will load a single image to recover the .nii matrix, to create the segmentation masks .nii.gz files 
# this is not long because nibabel has lazy loading
mat = nib.load(list_datas1[-1]).affine # NOTE: I'm already loading them in the loop, so I'm adding more disk access here ... anyway not the best

for i, (data1_path, data2_path) in enumerate(zip(list_datas1, list_datas2)) :
    
    # process data1
    curr_nifti = nib.load(data1_path)
    curr_nifti.to_filename(os.path.join(dataset_data_path, f'imagesTr/FILE_{i:03d}_0000.nii.gz')) # copy to the correct folder
    
    # process data2
    curr_nifti = nib.load(data2_path)
    curr_nifti.to_filename(os.path.join(dataset_data_path, f'imagesTr/FILE_{i:03d}_0001.nii.gz')) # copy to the correct folder
    data = curr_nifti.get_fdata()

    # /!\ include here the computation of the segmentation mask you're using. In my case it does not matter, i just put ones everywhere
    data = np.ones_like(data)    
    nib.Nifti1Image(data, mat).to_filename(os.path.join(dataset_data_path, f'labelsTr/FILE_{i:03d}.nii.gz'))
    
for i, target_path in enumerate(list_targets):
    curr_nifti = nib.load(target_path)
    curr_nifti.to_filename(os.path.join(dataset_target_path, f'imagesTr/FILE_{i:03d}_0000.nii.gz')) # copy to the correct folder
    data = curr_nifti.get_fdata()
    data = np.ones_like(data)    
    nib.Nifti1Image(data, mat).to_filename(os.path.join(dataset_target_path, f'labelsTr/FILE_{i:03d}.nii.gz'))

In [6]:
# dump the dataset.json for both datasets, /!\ you will need to edit this with regards to the number of modalities used;
data_dataset_json = {
    "labels": {
        "label_001": "1",
        "background": 0
    },
    "channel_names": {
        "0": "T1w",
        "1": "MASK",
    },
    "numTraining": len(list_datas1),
    "file_ending": ".nii.gz"
}
dump_data_datasets_path = os.path.join(dataset_data_path, 'dataset.json')
with open(dump_data_datasets_path, 'w') as f:
    json.dump(data_dataset_json, f)

target_dataset_json = {
    "labels": {
        "label_001": "1",
        "background": 0
    },
    "channel_names": {
        "0": "T1w",
        #"1": "MASK" # This doesn't matter as long as you don't put CT
    },
    "numTraining": len(list_targets),
    "file_ending": ".nii.gz"
}
dump_target_datasets_path = os.path.join(dataset_target_path, 'dataset.json')
with open(dump_target_datasets_path, 'w') as f:
    json.dump(target_dataset_json, f)


# TODO : either you need to use a bash kernel; or write it as a script
# TODO : Make a script to do just the dataset unpacking (instead of lauching the training). its not that complicated (copy-paste nnUNetv2_train entrypoint and add break after the unpacking)


In [None]:
%writefile nnUNet_unpacking.sh

#!/bin/bash
# copy the above nnUNet path here,
export nnUNet_results=
export nnUNet_raw=
export nnUNet_preeprocessed=

!nnUNetv2_plan_and_preprocess -d {dataset_id} -c 3d_fullres 
!nnUNetv2_plan_and_preprocess -d {dataset_id+1} -c 3d_fullres 

!nnUNetv2_unpacking {dataset_id} 3d_fullres 0
!nnUNetv2_unpacking {dataset_id + 1} 3d_fullres 0 

In [None]:
os.system('bash nnUNet_unpacking.sh')

In [7]:
nnunet_datas_preprocessed_dir = os.path.join(os.environ['nnUNet_preprocessed'], f'Dataset{dataset_id+1:03d}_{dataset_target_name}') 
nnunet_targets_preprocessed_dir = os.path.join(os.environ['nnUNet_preprocessed'], f'Dataset{dataset_id:03d}_{dataset_data_name}') 

list_targets = glob.glob(os.path.join(f"{dataset_target_path}/imagesTr", '*'))
list_targets.sort()
list_gt_segmentations_datas = glob.glob(os.path.join(f"{nnunet_targets_preprocessed_dir}/gt_segmentations", '*'))
list_gt_segmentations_datas.sort()

print(nnunet_targets_preprocessed_dir)

for (preprocessed_path, gt_path) in zip(list_targets, list_gt_segmentations_datas):
    # here, gt_path is the path to the gt_segmentation in nnUNet_preprocessed.
    print(preprocessed_path, gt_path) # ensure correct file pairing; 
    shutil.copy(src = preprocessed_path, dst = gt_path) # we copy here to be sure, but we should make shutil.move right? save disk space
    

/data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks
/data/elebot/nnUNet_raw_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/imagesTr/FILE_000_0000.nii.gz /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/gt_segmentations/FILE_000.nii.gz
/data/elebot/nnUNet_raw_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/imagesTr/FILE_001_0000.nii.gz /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/gt_segmentations/FILE_001.nii.gz
/data/elebot/nnUNet_raw_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/imagesTr/FILE_002_0000.nii.gz /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/gt_segmentations/FILE_002.nii.gz
/data/elebot/nnUNet_raw_translation_challenge_202

In [8]:
# here might be a bit tricky, because the name of the folder depends on the trainer you used ... 

# now we look for the .npy
list_preprocessed_datas_seg_path = glob.glob(os.path.join(nnunet_targets_preprocessed_dir, 'nnUNetPlans_3d_fullres/*_seg.npy'))
list_preprocessed_datas_seg_path.sort()

# here we will match all the files; and filter it later on, because glob parsing don't allow for "easy" parsing 
list_preprocessed_targets_path = glob.glob(os.path.join(nnunet_datas_preprocessed_dir, 'nnUNetPlans_3d_fullres/*.npy'))
list_preprocessed_targets_path.sort()
list_preprocessed_targets_path = [name for name in list_preprocessed_targets_path if '_seg' not in name]

for ( datas_path, targets_path) in zip(list_preprocessed_datas_seg_path, list_preprocessed_targets_path):
    # note that everywhere i called targets the path to the target translation dataset, and datas, the input translation dataset
    # so here datas_path correspond to the intended new _seg files, while targets correspond to the ones generated w/ 
    print(targets_path, datas_path)
    shutil.copy(src = targets_path, dst = datas_path) # we copy here to be sure, but we should make shutil.move right? save disk space

/data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/nnUNetPlans_3d_fullres/FILE_000.npy /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/nnUNetPlans_3d_fullres/FILE_000_seg.npy
/data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/nnUNetPlans_3d_fullres/FILE_001.npy /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/nnUNetPlans_3d_fullres/FILE_001_seg.npy
/data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_targets/nnUNetPlans_3d_fullres/FILE_002.npy /data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset050_image_inpainting_brats_2024_voided_with_masks/nnUNetPlans_3d_fullres/FILE_002_seg.npy
/data/elebot/nnUNet_preprocessed_translation_challenge_2024/Dataset051_image_inpainting_brats_2024_t

In [22]:
list_preprocessed_targets_path

[]

In [25]:
nnunet_targets_preprocessed_dir

'/data/elebot/datasets/nnUNetDatasets/nnUNet_preprocessed/Dataset300_image_translation_ofsep_with_lesion_mask'

In [23]:
list_preprocessed_targets_path = glob.glob(os.path.join(nnunet_datas_preprocessed_dir, 'nnUNetPlans_3d_fullres/*.npy'))
