In [3]:
#requires you to pip install nnunetv2 and some others
#for install details see: https://github.com/MIC-DKFZ/nnUNet
import nnunetv2
from nnunetv2.dataset_conversion.generate_dataset_json import generate_dataset_json
import os,sys
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm

sys.path.append('path_to_dir_w_scripts') #not required if in the same dir
from nnunet_utils.utils import np2sitk, set_env_nnunet, write_envlines_nnunet, assign_trainjobs_to_gpus
from nnunet_utils.preprocess import write_as_nnunet, nnunet_directory_structure, preprocess_data
from nnunet_utils.run import train_single_model, nnunet_train_shell

#root in what folder your nnunet data is stored
root = '/home/hvv/Documents/nnunet'
datano = '512' #this is an arbitrary number you can choose --> should not be the same as other studies
project_name = 'nameyourproject'
task = 'Task{}_nameyourproject'.format(datano) #this is also something you choose
datasetID = 'Dataset{}_nameyourproject'.format(datano)
#where your train (or test) data is stored
p_dir = os.path.join(root,'nnUNet_raw',datasetID)

In [None]:
#iterate over your dataset
#create a training images and label folder

#write your own script here loading an IMG and GT every iteration
#something like:

p_data = 'path_to_source_img_lbl' #subfolders with IDs and scan and gt files
for ID in os.listdir(p_data):
    pid = os.path.join(p_data,ID)
    
    p_img = os.path.join(pid,'scan.nii.gz')
    p_gt = os.path.join(pid,'gt.nii.gz')
    
    IMG = sitk.ReadImage(p_img)
    GT = sitk.ReadImage(p_gt)
    #this example should be applied to all your training images-labels
    #you can also use this to preprocess your test set
    #this is however not required
    write_as_nnunet(IMG, GT, p_dir, ID)
#IMG: sitk.Image with the CT/MR scan 
#GT: sitk Image with corresponding ground truth segmentations
#p_dir: where the imagesTr and labelsTr should be stored
#ID: ID number (including dataset name) for identification of IMG-GT pairs

#sanity check to see if all images have labels
root_images = os.path.join(p_dir,'imagesTr')
root_gt = os.path.join(p_dir, 'labelsTr')
img_lbl_paircount(root_images, root_gt)

In [2]:
#this creates the nnunet directory structure inside the root folder
nnunet_directory_structure(root,version=2)
#make sure your data imagesTr and labelsTr folders are pasted in nnUnet_raw folder

In [None]:
#now the scans have to be preprocessed for training
#this is something specifically required by nnUnet
#this may take a while, if it fails run again
preprocess_data(root, 
                datano=datano,
                datasetID=datasetID, #or task name in old version
                dataset_name=project_name,
                modalities=['BL_MR_FLAIR'] #should be a list representing each input channel --> important: should include MR or CT
               )

In [None]:
#there are two options to instantiate training models
#1) one-by-one: 
#train models consecutively for each fold --> run this manually 5 times
train_single_model(gpu=0, #each pc with a single gpu has number 0, selecting another gpu on a server is possible
                   datasetID=datasetID, #defined above
                   resolution='3d_fullres', #can select nnUnet config: ['2d','3d_fullres','3d_lowres', '3d_cascade_fullres'] 
                   fold=0, #start with the first fold (number 0)
                  )


#2) parallel across gpus: 
#2a) Create mapping: which GPU does what
#Assign jobs to gpus: this is an equal distribution script
#it can be wise to first check gpu availability 
#and then make your own dictionary with distribution dictionary
#returns a dictionary with per entry:
# gpu_number:[job1, job2] 
#where each job:
#(resolution, fold_number)
gpu_dct = assign_trainjobs_to_gpus(num_gpus, #total number of GPUs available OR a list of available GPU numbers
                           num_folds, #number of folds to train (default=5)
                           resolutions #list of resolutions, any from ['2d','3d_fullres','3d_lowres', '3d_cascade_fullres'] 
                                )


#2b) Create shell script
#create a train_job.sh shell script to run multiple folds at the same time
#the shell script manages parallel computation across gpus
nnunet_train_shell(datasetID=datasetID, #defined above
                    root=root,#defined above
                    conda_env='/path/to/miniconda3/envs/nnunetv2', #path to your environment
                    gpu_res_fold_dct=gpu_dct, #is dictionary mapping resolutions, folds and gpus (see above)
                    version=2)

#2c) Run shell script
#Last thing: run the shell script on the server
#ssh to server, cd to nnunet folder then: bash train_job.sh
#to make sure the server stays running when you close your pc
#use tmux: https://tmuxcheatsheet.com/ and https://hamvocke.com/blog/a-quick-and-easy-guide-to-tmux/

In [13]:
#Inference: to predict segmentations using a trained model
#After your model is trained these scripts can be used on new cases
#there are three ways
#1) Run in the python script line by line
from nnunet_utils.infv2 import init_predictor, nnunetv2_get_props, nnunetv2_predict

model_path = os.path.join(root,'nnUNet_trained_models', datasetID,'nnUNetTrainer__nnUNetPlans__3d_fullres')
predictor = init_predictor(model_path)

p_data = 'your_test_set_folder'

for ID in tqdm(os.listdir(p_data)):
    pid = os.path.join(p_crisp,ID)
    #input file
    file = os.path.join(pid,'scan.nii.gz')
    
    #output nifti segmentation and also probability output as npy
    p_vseg_out = os.path.join(pid,'vesselseg.nii.gz')
    p_npy_vseg = os.path.join(pid,'vesselseg')
    
    #sanity check to not run the same stuff twice
    if os.path.exists(p_vseg_out) and os.path.exists(p_npy_vseg+'.npy'):
        continue
    #running this for loop can take long
    #so a try-except to prevent stopping somewhere in the middle
    try:
        mra = sitk.ReadImage(file)
        props = nnunetv2_get_props(mra)
        mra_inp = np.expand_dims(sitk.GetArrayFromImage(mra),0)
        seg = nnunetv2_predict(mra_inp,props,predictor, return_probabilities=True)

        sitk.WriteImage(np2sitk(seg[0],mra),p_vseg_out)

        np.save(p_npy_vseg,seg[1])
    except:
        continue



In [None]:
#2) Create a file with all input images similar to the imagesTr (but now imagesTs)
#and run it in a batch at once
from nnunet_utils.run import nnunet_inference_on_dir

#first put all scans in the test folder:
#nnUNet_raw/DatasetID/imagesTs
test_img_dir = os.path.join(p_dir,'imagesTs')
for ID in os.listdir(p_data):
    pid = os.path.join(p_data,ID)
    p_img = os.path.join(pid,'scan_test.nii.gz')
    #this function copies using shutil (=really fast)
    copy_inference_image(p_img, test_img_dir)
    #as an alternative you can read with sitk and write
    #which is slower but still ok in speed
    
#with the data in the right order
#it is now possible to run the inference commands
model_path = os.path.join(root,'nnUNet_trained_models', datasetID,'nnUNetTrainer__nnUNetPlans__3d_fullres')
seg_test_pred_dir = os.path.join(p_dir,'predictions')
if not os.path.exists(seg_test_pred_dir):
    os.makedirs(seg_test_pred_dir)
    
nnunet_inference_on_dir(model_path=model_path, #path to the trained folds
                        dir_input_images=test_img_dir, #where the imagesTs (inference images) are stored
                        dir_output_seg=seg_test_pred_dir, #where you want to store the predictions
                        resolution='fullres', #can select nnUnet config (must correspond with model_path): ['2d','3d_fullres','3d_lowres', '3d_cascade_fullres'] 
                        save_probs=True #if you want predicted probabilities
                        )

In [None]:
#3) For large datasets it can be useful to run inference on multiple GPUs
#Similar to training: create a gpu_dct defining the distribution
#of images across GPUs and run it using a shell script
from nnunet_utils.run import nnunetv2_inference_shell

#input parameters
path_images_in = 'directory/with/inference/images'
path_segs_out = 'directory/to/output/segmentation/folder'
p_model = 'path/to/nnunet/model/nnUNet_trained_models/Datasetxxx_name/nnUNetTrainer__nnUNetPlans__3d_fullres'
conda_env = 'miniconda3/envs/nnunetv2' #contains pip installed nnunetv2
root = 'root/to/nnunet/dir/to/set/path/variables'

gpu_dct = gpu_distributed_inference(images=path_images_in, #input images for inference
                                      num_gpus=1, #int or list defining available gpus
                                      resolutions=['fullres_3d'],
                                      separate_folders=False, #if True GPU batches of images are copied to separate folders
                                      seg_dir=None, #pass path_segs_out to skip certain 
                                    )

job_file = nnunetv2_inference_shell(root=root,
                                    conda_env=conda_env,
                                    gpu_dct=gpu_dct,  # is created with utils function assign_to_gpu
                                    path_model=p_model,
                                    dir_output_seg=path_segs_out,
                                    return_probabilities=True,
                                    path_nnunet_utils='path/to/nnunet_utils',
                                    version=2)
print(job_file)
#run the job_file using bash