In [1]:
import sys
import os
sys.path.append('/home/edwardsb/repositories/be-SATGOpenFL/openfl/federated/task')
from fl_setup import main as setup_fl
from fedsim_setup_using_fl_setup import main as setup_fedsim
from nnunet_v1 import train_nnunet

import pickle as pkl
import shutil

import hashlib

network = '3d_fullres'
network_trainer = 'nnUNetTrainerV2'
fold = '0'

cuda_device='5'

os.environ['CUDA_VISIBLE_DEVICES']=cuda_device

def train_on_task(task, continue_training=True, current_epoch=0, without_data_unpacking=False, use_compressed_data=False):
    print(f"###########\nStarting training for task: {task}\n")
    train_nnunet(epochs=1, 
                 current_epoch = current_epoch, 
                 network = network,
                 task=task, 
                 network_trainer = network_trainer, 
                 fold=fold, 
                 continue_training=continue_training, 
                 use_compressed_data=use_compressed_data)
    



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet



In [2]:
def read_pkl(path):
    with open(path, 'rb') as _file:
        return pkl.load(_file)

In [None]:
#  First seeeing that the splits file that we create does not get overwritten by the training run on 568

In [3]:
def pardir(task_num):
    return  f'/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task{task_num}_MultPathTest'

os.listdir(pardir(568))

['splits_final.pkl',
 'POSTOPP_BACKUP_splits_final.pkl',
 'gt_segmentations',
 'dataset.json',
 'nnUNetPlansv2.1_plans_3D.pkl',
 'dataset_properties.pkl',
 'nnUNetData_plans_v2.1_stage0',
 'nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl',
 'nnUNetPlansv2.1_plans_2D.pkl']

In [4]:
my_codes_split_path = os.path.join(pardir(568), 'POSTOPP_BACKUP_splits_final.pkl')
# I'm curious whether these splits could be different, the backup one was written into that path the same time the same file was written into splits_final.pkl. I was
# worried however that the splits_final.pkl could have later been overwritten. So here I compare them
after_train_split_path = os.path.join(pardir(568), 'splits_final.pkl')

In [5]:
my_codes_split = read_pkl(my_codes_split_path)

after_train_split = read_pkl(after_train_split_path)


In [6]:
my_codes_split[0]

OrderedDict([('train',
              array(['BraTS2021_00565_2008.03.26', 'BraTS2021_01277_2008.03.26',
                     'BraTS2021_00512_2008.03.26', 'BraTS2021_00512_2008.12.11'],
                    dtype='<U26')),
             ('val', array(['BraTS2021_00468_2008.03.26'], dtype='<U26'))])

In [7]:
after_train_split[0]

OrderedDict([('train',
              array(['BraTS2021_00565_2008.03.26', 'BraTS2021_01277_2008.03.26',
                     'BraTS2021_00512_2008.03.26', 'BraTS2021_00512_2008.12.11'],
                    dtype='<U26')),
             ('val', array(['BraTS2021_00468_2008.03.26'], dtype='<U26'))])

In [None]:
# YAY, they are the same !!!!

In [None]:
# Now seeing that training with the plan created during data preprocing of 523 does not work when using 522's 'initial' model (i.e. we found data to use for 523 to cause a shape mismatch), and 
# that it does work with our plan overwritten using the initial model plan.

In [8]:
# NOTE: Immediately balow are the paths the training script will pull from
initial_model_path_568 = '/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task568_MultPathTest/nnUNetTrainerV2__nnUNetPlans_pretrained_POSTOPP/fold_0/model_initial_checkpoint.model'
initial_model_info_path_568 = '/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task568_MultPathTest/nnUNetTrainerV2__nnUNetPlans_pretrained_POSTOPP/fold_0/model_initial_checkpoint.model.pkl'
POSTOPP_plan_path_568 = os.path.join(pardir(568), 'nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl')

# Now the 569 stuff ---
# The plan derived using the 568 P_plan
POSTOPP_plan_path_569 = os.path.join(pardir(569), 'nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl')
# The original plan produced during independent preprocessing
Orig_plan_path_569 = os.path.join(pardir(569), 'nnUNetPlansv2.1_plans_3D.pkl')
# A placeholder path for holding the postopp plan while we overwrite it
Backup_P_plan_path_569 = os.path.join(pardir(569), 'BACKUP_nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl')




In [9]:
# First let's see that the model architectures do look the same in each of the POSTOPP plans (568 versus 569) - and that it looks different in the 569 plan coming from its own preprocessing

P_plan_568 = read_pkl(POSTOPP_plan_path_568)
P_plan_569 = read_pkl(POSTOPP_plan_path_569)

O_plan_569 = read_pkl(Orig_plan_path_569)



In [10]:
P_plan_568['plans_per_stage'][0]

{'batch_size': 2,
 'num_pool_per_axis': [5, 5, 4],
 'patch_size': array([128, 160, 112]),
 'median_patient_size_in_voxels': array([137, 171, 130]),
 'current_spacing': array([1., 1., 1.]),
 'original_spacing': array([1., 1., 1.]),
 'do_dummy_2D_data_aug': False,
 'pool_op_kernel_sizes': [[2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 1]],
 'conv_kernel_sizes': [[3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3]]}

In [11]:
P_plan_569['plans_per_stage'][0]

{'batch_size': 2,
 'num_pool_per_axis': [5, 5, 4],
 'patch_size': array([128, 160, 112]),
 'median_patient_size_in_voxels': array([137, 171, 130]),
 'current_spacing': array([1., 1., 1.]),
 'original_spacing': array([1., 1., 1.]),
 'do_dummy_2D_data_aug': False,
 'pool_op_kernel_sizes': [[2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 1]],
 'conv_kernel_sizes': [[3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3]]}

In [None]:
# Yay, they are the same

# Now to see that the Original plan at 569 has a different model architecture

In [12]:
O_plan_569['plans_per_stage'][0]

{'batch_size': 2,
 'num_pool_per_axis': [5, 5, 5],
 'patch_size': array([128, 128, 128]),
 'median_patient_size_in_voxels': array([138, 161, 135]),
 'current_spacing': array([1., 1., 1.]),
 'original_spacing': array([1., 1., 1.]),
 'do_dummy_2D_data_aug': False,
 'pool_op_kernel_sizes': [[2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2]],
 'conv_kernel_sizes': [[3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3]]}

In [None]:
# Yes, note for one that the num_pool_per_axis are different

In [13]:
# Now let's see training fail if we drop in the original plan
# first make a backup
shutil.copyfile(src=POSTOPP_plan_path_569,dst=Backup_P_plan_path_569)
shutil.copyfile(src=Orig_plan_path_569,dst=POSTOPP_plan_path_569)

'/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task569_MultPathTest/nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl'

In [14]:
# Now try to train
train_on_task(task='Task569_MultPathTest')

###########
Starting training for task: Task569_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 5], 'patch_size': array([128, 128, 128]), 'median_patient_size_in_voxels': array([138, 161, 135]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]], 'co

done
2024-06-02 15:25:21.885410: loading checkpoint /raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task569_MultPathTest/nnUNetTrainerV2__nnUNetPlans_pretrained_POSTOPP/fold_0/model_final_checkpoint.model train= True


RuntimeError: Error(s) in loading state_dict for Generic_UNet:
	size mismatch for tu.0.weight: copying a param with shape torch.Size([320, 320, 2, 2, 1]) from checkpoint, the shape in current model is torch.Size([320, 320, 2, 2, 2]).

In [15]:
# Now restore the plan and try training again
shutil.copyfile(src=Backup_P_plan_path_569,dst=POSTOPP_plan_path_569)

'/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task569_MultPathTest/nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl'

In [16]:
# Now try to train
train_on_task(task='Task569_MultPathTest')

###########
Starting training for task: Task569_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([137, 171, 130]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

In [1]:
# Refining test below to make it cleaner

import sys
import os
import shutil
sys.path.append('/home/edwardsb/repositories/be-SATGOpenFL/openfl/federated/task')
# older script
from fedsim_setup import main as setup_fedsim
from fl_setup import main as setup_fl
from nnunet_v1_with_old_plans_name_hardcoded import train_nnunet
from nnunet.paths import default_plans_identifier

network = '3d_fullres'
network_trainer = 'nnUNetTrainerV2'
fold = '0'

cuda_device='5'

os.environ['CUDA_VISIBLE_DEVICES']=cuda_device

def train_on_task(task, continue_training=True, current_epoch=0, without_data_unpacking=False, use_compressed_data=False):
    print(f"###########\nStarting training for task: {task}\n")
    train_nnunet(epochs=1, 
                 current_epoch = current_epoch, 
                 network = network,
                 task=task, 
                 network_trainer = network_trainer, 
                 fold=fold, 
                 continue_training=continue_training, 
                 use_compressed_data=use_compressed_data)



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet



In [2]:
def rm_tree(path):
    if os.path.exists(path):
        shutil.rmtree(path)

In [3]:
# NOTE: The training runs included in the test below take about 6 minutes on spr01 (Titan XP)

# First do what we did before

# Some paths below will be under my folders, but someone else might want to replace this part
test_pardir = '/raid/edwardsb/projects/RANO'


datadir_568 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_preprocessed', 'Task568_MultPathTest')
# next two for deleting only
alt_1_datadir_568 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_raw_data', 'Task568_MultPathTest')
alt_2_datadir_568 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_cropped_data', 'Task568_MultPathTest')

model_pre_dir_568 = os.path.join(test_pardir, 'NNUnetModels', 'nnUNet', '3d_fullres', 'Task568_MultPathTest')
modeldir_568 = os.path.join(model_pre_dir_568, 'nnUNetTrainerV2__nnUNetPlansv2.1', 'fold_0')

datadir_569 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_preprocessed', 'Task569_MultPathTest')
# next two for deleting only
alt_1_datadir_569 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_raw_data', 'Task569_MultPathTest')
alt_2_datadir_569 = os.path.join(test_pardir, 'BraTS22_pretending_tobe_postopp', 'nnUNet_raw_data_base', 'nnUNet_cropped_data', 'Task569_MultPathTest')

model_pre_dir_569 = os.path.join(test_pardir, 'NNUnetModels', 'nnUNet', '3d_fullres', 'Task569_MultPathTest')
model_dir_569 = os.path.join(model_pre_dir_569, 'nnUNetTrainerV2__nnUNetPlansv2.1', 'fold_0')

# At this point deleted some folders via linux command line

rm_tree(datadir_568)
rm_tree(alt_1_datadir_568)
rm_tree(alt_2_datadir_568)
rm_tree(model_pre_dir_568)


rm_tree(datadir_569)
rm_tree(alt_1_datadir_569)
rm_tree(alt_2_datadir_569)
rm_tree(model_pre_dir_569)


setup_fedsim(postopp_pardirs=['/raid/edwardsb/projects/RANO/test_data_links_random_times_0'], 
         first_three_digit_task_num=568,
         init_model_path=None,
         init_model_info_path= None, 
         task_name='MultPathTest', 
         percent_train=0.8, 
         split_logic='by_subject_time_pair', 
         network='3d_fullres', 
         network_trainer='nnUNetTrainerV2', 
         fold=0,  
         timestamp_selection='all', 
         num_institutions=1, 
         cuda_device='5', 
         verbose=False)

# The source for the init model paths are located in the 568 results folder
init_model_path_568 = os.path.join(modeldir_568, 'model_initial_checkpoint.model') 
init_model_info_path_568 =  os.path.join(modeldir_568, 'model_initial_checkpoint.model.pkl') 

# The plan paths are under the data folders
plan_path_568 = os.path.join(datadir_568, 'nnUNetPlansv2.1_plans_3D.pkl')
plan_path_569 = os.path.join(datadir_569, 'nnUNetPlansv2.1_plans_3D.pkl')

# Now set up 569 as we did in the first fed test, with the old script that passes only initial model and initial model info bu only shares plan across the first collaborator
# in the call with the others (though there are no others here)

setup_fedsim(postopp_pardirs=['/raid/edwardsb/projects/RANO/test_data_links_random_times_1'], 
         first_three_digit_task_num=569,
         init_model_path=init_model_path_568,
         init_model_info_path= init_model_info_path_568,
         task_name='MultPathTest', 
         percent_train=0.8, 
         split_logic='by_subject_time_pair', 
         network='3d_fullres', 
         network_trainer='nnUNetTrainerV2', 
         fold=0,  
         timestamp_selection='all', 
         num_institutions=1, 
         cuda_device='5', 
         verbose=False)

# Now try to train and see we get a shape mismatch (note I need to replace the new default value for p below as before we used the default plans identifier but now the code useds a POSTOPP specific string)
train_on_task(task='Task569_MultPathTest')


######### CREATING SYMLINKS TO POSTOPP DATA FOR COLLABORATOR 0 #########


######### GENERATING DATA JSON FILE FOR COLLABORATOR 0 #########


######### OS CALL TO PREPROCESS DATA FOR COLLABORATOR 0 #########



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

Verifying training set
checking case BraTS2021_00468_2008.03.26
checking case BraTS2021_00512_2008.03.26
checking case BraTS2021_00512_2008.12.11
checking case BraTS2021_00565_2008.03.26
checking case BraTS2021_01277_2008.03.26
Verifying label values
Expected label values are [0, 1, 2, 3, 4]
Labels OK
Dataset OK
BraTS2021_00468_2008.03.26
BraTS2021_00512_2008.03.26
BraTS2021_00512_2008.12.11
BraTS2021_00565_2008.03.26
BraTS2

  global_dc_per_class = [i for i in [2 * i / (2 * i + j + k) for i, j, k in


2024-06-03 11:46:24.502413: lr: 0.009991
2024-06-03 11:46:24.503276: This epoch took 456.986573 s

2024-06-03 11:46:24.578341: saving checkpoint to /raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task568_MultPathTest/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model...
2024-06-03 11:46:25.086878: done, saving took 0.58 seconds

######### CREATING SYMLINKS TO POSTOPP DATA FOR COLLABORATOR 0 #########


######### GENERATING DATA JSON FILE FOR COLLABORATOR 0 #########


######### OS CALL TO PREPROCESS DATA FOR COLLABORATOR 0 #########



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

Verifying training set
checking case BraTS2021_00122_2008.03.26
chec

RuntimeError: Error(s) in loading state_dict for Generic_UNet:
	size mismatch for tu.0.weight: copying a param with shape torch.Size([320, 320, 2, 2, 1]) from checkpoint, the shape in current model is torch.Size([320, 320, 2, 2, 2]).

In [4]:
# Now drop in the plans file that should have been shared and see that training works now

shutil.copyfile(src=plan_path_568,dst=plan_path_569)

'/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task569_MultPathTest/nnUNetPlansv2.1_plans_3D.pkl'

In [5]:
# Now try to train again and this time see it works
train_on_task(task='Task569_MultPathTest')

###########
Starting training for task: Task569_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([137, 171, 130]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co