In [1]:
"""
This notebook is used to test the recent setup script, "fl_setup.py", which was a copy of fedsim_setup.py modified to apply for non-simulation and incorporating a fix to model shape missmatch

I will run it with the following command:
python fedsim_setup.py --postopp_pardirs /raid/edwardsb/projects/RANO/test_data_links_random_times_0,/raid/edwardsb/projects/RANO/test_data_links_random_times_1 --first_three_digit_task_num 568 --task_name MultPathTest --num_institutions 2

This will also perform training on col_0 to populate model and model info files.

Then here we see if we can run training from col_1
"""

'\nThis notebook is used to test the recent setup script, "fedsim_setup.py"\n\nI will run it with the following command:\npython fedsim_setup.py --postopp_pardir /raid/edwardsb/projects/RANO/test_data_links --first_three_digit_task_num 555 --init_model_path /raid/edwardsb/projects/RANO/NNUnetInitialModelFilesTest/model_initial_checkpoint.model --init_model_info_path /raid/edwardsb/projects/RANO/NNUnetInitialModelFilesTest/model_initial_checkpoint.model.pkl --task_name FedSimTest --num_institutions 2\n\n\nthen see if we can run training from col_0, then pick up and continue the training from col 1 using the col_0 latest model output from training\n'

In [1]:

import torch
import os
import sys
import shutil
sys.path.append('/home/edwardsb/repositories/be-SATGOpenFL/openfl/federated/tasks')
from nnunet_v1 import train_nnunet

import pickle as pkl




Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet



In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='8'
print(f"CUDA is available? {torch.cuda.is_available()}")

CUDA is available? True


In [None]:
# Let's confirm that the plans files look good for each collaborator

In [3]:
Task268_plan_fpath = '/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task568_MultPathTest/nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl'
Task269_plan_fpath = '/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task569_MultPathTest/nnUNetPlans_pretrained_POSTOPP_plans_3D.pkl'

Task268_plan_used_to_overwrite_fpath = '/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task568_MultPathTest/nnUNetPlansv2.1_plans_3D.pkl'

In [9]:
with open(Task268_plan_fpath, 'rb') as _file:
    Task268_plan = pkl.load(_file)

In [10]:
with open(Task269_plan_fpath, 'rb') as _file:
    Task269_plan = pkl.load(_file)

In [11]:
with open(Task268_plan_used_to_overwrite_fpath, 'rb') as _file:
    Task268_plan_used_to_overwrite = pkl.load(_file)

In [12]:
Task268_plan.keys()

dict_keys(['num_stages', 'num_modalities', 'modalities', 'normalization_schemes', 'dataset_properties', 'list_of_npz_files', 'original_spacings', 'original_sizes', 'preprocessed_data_folder', 'num_classes', 'all_classes', 'base_num_features', 'use_mask_for_norm', 'keep_only_largest_region', 'min_region_size_per_class', 'min_size_per_class', 'transpose_forward', 'transpose_backward', 'data_identifier', 'plans_per_stage', 'preprocessor_name', 'conv_per_stage'])

In [14]:
Task268_plan['dataset_properties'].keys()

dict_keys(['all_sizes', 'all_spacings', 'all_classes', 'modalities', 'intensityproperties', 'size_reductions'])

In [15]:
Task268_plan['dataset_properties']['all_sizes']

[(144, 169, 133),
 (144, 169, 133),
 (136, 171, 130),
 (136, 171, 130),
 (137, 173, 128),
 (141, 176, 141)]

In [16]:
Task269_plan['dataset_properties']['all_sizes']

[(144, 169, 133),
 (144, 169, 133),
 (136, 171, 130),
 (136, 171, 130),
 (137, 173, 128),
 (141, 176, 141)]

In [17]:
Task268_plan_used_to_overwrite['dataset_properties']['all_sizes']

[(144, 169, 133),
 (144, 169, 133),
 (136, 171, 130),
 (136, 171, 130),
 (137, 173, 128),
 (141, 176, 141)]

In [18]:
Task268_plan['dataset_properties']['size_reductions']

OrderedDict([('BraTS2021_00468_2008.03.26', 0.3625322580645161),
             ('BraTS2021_00468_2008.12.11', 0.3625322580645161),
             ('BraTS2021_00512_2008.03.26', 0.3386290322580645),
             ('BraTS2021_00512_2008.12.11', 0.3386290322580645),
             ('BraTS2021_00565_2008.03.26', 0.3397992831541219),
             ('BraTS2021_01277_2008.03.26', 0.39191935483870965)])

In [13]:
Task269_plan['dataset_properties']['size_reductions']

OrderedDict([('BraTS2021_00468_2008.03.26', 0.3625322580645161),
             ('BraTS2021_00468_2008.12.11', 0.3625322580645161),
             ('BraTS2021_00512_2008.03.26', 0.3386290322580645),
             ('BraTS2021_00512_2008.12.11', 0.3386290322580645),
             ('BraTS2021_00565_2008.03.26', 0.3397992831541219),
             ('BraTS2021_01277_2008.03.26', 0.39191935483870965)])

In [14]:
Task268_plan_used_to_overwrite['dataset_properties']['size_reductions']

OrderedDict([('BraTS2021_00468_2008.03.26', 0.3625322580645161),
             ('BraTS2021_00468_2008.12.11', 0.3625322580645161),
             ('BraTS2021_00512_2008.03.26', 0.3386290322580645),
             ('BraTS2021_00512_2008.12.11', 0.3386290322580645),
             ('BraTS2021_00565_2008.03.26', 0.3397992831541219),
             ('BraTS2021_01277_2008.03.26', 0.39191935483870965)])

In [5]:
Task268_plan['plans_per_stage'][0]

{'batch_size': 2,
 'num_pool_per_axis': [5, 5, 4],
 'patch_size': array([128, 160, 112]),
 'median_patient_size_in_voxels': array([139, 171, 132]),
 'current_spacing': array([1., 1., 1.]),
 'original_spacing': array([1., 1., 1.]),
 'do_dummy_2D_data_aug': False,
 'pool_op_kernel_sizes': [[2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 1]],
 'conv_kernel_sizes': [[3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3]]}

In [16]:
Task269_plan['plans_per_stage'][0]

{'batch_size': 2,
 'num_pool_per_axis': [5, 5, 4],
 'patch_size': array([128, 160, 112]),
 'median_patient_size_in_voxels': array([139, 171, 132]),
 'current_spacing': array([1., 1., 1.]),
 'original_spacing': array([1., 1., 1.]),
 'do_dummy_2D_data_aug': False,
 'pool_op_kernel_sizes': [[2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 2],
  [2, 2, 1]],
 'conv_kernel_sizes': [[3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3],
  [3, 3, 3]]}

In [21]:
Task269_plan['data_identifier']

'nnUNetData_plans_v2.1'

In [22]:
Task268_plan['data_identifier']

'nnUNetData_plans_v2.1'

In [4]:
tasks_dict = {568: 'Task568_MultPathTest', 569: 'Task569_MultPathTest'}

In [3]:
network = '3d_fullres'
network_trainer = 'nnUNetTrainerV2'
fold = '0'

def train_on_task(task, continue_training=True, current_epoch=0, without_data_unpacking=False, use_compressed_data=False):
    print(f"###########\nStarting training for task: {task}\n")
    train_nnunet(epochs=1, 
                 current_epoch = current_epoch, 
                 network = network,
                 task=task, 
                 network_trainer = network_trainer, 
                 fold=fold, 
                 continue_training=continue_training, 
                 use_compressed_data=use_compressed_data)

In [5]:
"""
plan_555_path = '/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task555_FedSimTest/nnUNetPlansv2.1_plans_3D.pkl'
plan_556_path = '/raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task556_FedSimTest/nnUNetPlansv2.1_plans_3D.pkl'

model_555_paths = ['/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task555_FedSimTest/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model', 
                   '/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task555_FedSimTest/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model.pkl']
model_556_paths = ['/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task556_FedSimTest/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model', 
                   '/raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task556_FedSimTest/nnUNetTrainerV2__nnUNetPlansv2.1/fold_0/model_final_checkpoint.model.pkl']
"""

"""HERE make backup copies of plan files above, and make sure at this point the model files are empty"""

'HERE make backup copies of plan files above, and make sure at this point the model files are empty'

In [6]:
# Train the second collaborator for a whole epoch using the distributed model 

train_on_task(tasks_dict[569], current_epoch=1)

###########
Starting training for task: Task569_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([139, 171, 132]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

  global_dc_per_class = [i for i in [2 * i / (2 * i + j + k) for i, j, k in


2024-05-30 17:22:42.255024: lr: 0.009982
2024-05-30 17:22:42.255784: This epoch took 467.362480 s

2024-05-30 17:22:42.327071: saving checkpoint to /raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task569_MultPathTest/nnUNetTrainerV2__nnUNetPlans_pretrained_POSTOPP/fold_0/model_final_checkpoint.model...
2024-05-30 17:22:42.866731: done, saving took 0.61 seconds


In [5]:
# Train the second collaborator after performing test in notes (NNUnetIntoGANDLF_DevNotes_2)

train_on_task('Task523_MultPathTest', current_epoch=1)

###########
Starting training for task: Task523_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([139, 171, 132]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

OutOfMemoryError: CUDA out of memory. Tried to allocate 280.00 MiB. GPU 0 has a total capacity of 11.90 GiB of which 231.69 MiB is free. Process 2116822 has 6.77 GiB memory in use. Including non-PyTorch memory, this process has 4.89 GiB memory in use. Of the allocated memory 4.59 GiB is allocated by PyTorch, and 118.43 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [4]:
train_on_task('Task522_MultPathTest', current_epoch=1)

###########
Starting training for task: Task522_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([141, 173, 133]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

  global_dc_per_class = [i for i in [2 * i / (2 * i + j + k) for i, j, k in


2024-05-31 10:37:02.795627: lr: 0.009982
2024-05-31 10:37:02.796533: This epoch took 467.149750 s

2024-05-31 10:37:02.873856: saving checkpoint to /raid/edwardsb/projects/RANO/NNUnetModels/nnUNet/3d_fullres/Task522_MultPathTest/nnUNetTrainerV2__nnUNetPlans_pretrained_POSTOPP/fold_0/model_final_checkpoint.model...
2024-05-31 10:37:03.437887: done, saving took 0.64 seconds


In [5]:
train_on_task('Task524_MultPathTest', current_epoch=1)

###########
Starting training for task: Task524_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([141, 173, 133]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

In [11]:
train_on_task('Task523_MultPathTest', current_epoch=1)

###########
Starting training for task: Task523_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([141, 173, 133]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

In [7]:
# training again on col_0 

train_on_task(tasks_dict[568], current_epoch=1)

###########
Starting training for task: Task568_MultPathTest

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 4], 'patch_size': array([128, 160, 112]), 'median_patient_size_in_voxels': array([138, 170, 135]), 'current_spacing': array([1., 1., 1.]), 'original_spacing': array([1., 1., 1.]), 'do_dummy_2D_data_aug': False, 'pool_op_kernel_sizes': [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 1]], 'co

In [8]:
# Left over from previous testing


"""
# Now transfer model checkpoint file over from 555 to 556

for path_555, path_556 in zip(model_555_paths, model_556_paths):
    shutil.copyfile(src=model_555_paths[0], dst=model_556_paths[0])
    
"""

In [9]:
# Left over from previous testing

"""
# Now train with 556 stuff
train_on_task(tasks_dict[556], current_epoch=2)
"""

###########
Starting training for task: Task556_FedSimTest

Brandon DEBUG - task: Task556_FedSimTest, preprocessing_output_dir: /raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed

Brandon in get_default_configuration


Brandon - DEBUG - dataset dir: /raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task556_FedSimTest and plans file: /raid/edwardsb/projects/RANO/BraTS22_pretending_tobe_postopp/nnUNet_raw_data_base/nnUNet_preprocessed/Task556_FedSimTest/nnUNetPlansv2.1_plans_3D.pkl

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  4
modalities:  {0: '_0000', 1: '_0001', 2: '_0002', 3: '_0003'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_la

In [5]:
from collections import OrderedDict
OrderedDict({'A': [4, 4], 'B': [9, 10]})

OrderedDict([('A', [4, 4]), ('B', [9, 10])])

In [3]:
import numpy as np

ex = [1, 2, 3, 4, 5, 6, 7, 8, 9]

np.random.shuffle(ex)
ex

[7, 9, 1, 3, 4, 6, 5, 2, 8]

In [7]:
np.array(['HELLO'])

array(['HELLO'], dtype='<U5')