# Create Splits

In [None]:
import os
import shutil
import json
import nibabel as nib
import numpy as np

source_train = 'original_data/UHN-MedImg3D-ML-quiz/train'
source_val = 'original_data/UHN-MedImg3D-ML-quiz/validation'
source_test = 'original_data/UHN-MedImg3D-ML-quiz/test'


# Initialize class mapping dictionary
class_mapping = {}

# Function to move files and record class labels
def create_splits(source, class_mapping, case_id=0):
    
    if not label_dest:
        case_id = case_id
        total = len(os.listdir(source))
        for file in os.listdir(source):
            if file.endswith('0000.nii.gz'):
                src_image_file = os.path.join(source, file)

                case_str_image = f"case_{case_id:03d}_0000.nii.gz"
                dest_image_file = os.path.join(image_dest, case_str_image)
                
                case_id += 1         
        return case_id
            
    else:
        case_id = case_id
        for subtype in os.listdir(source):
            subtype_path = os.path.join(source, subtype)
            if os.path.isdir(subtype_path):
                class_id = int(os.path.basename(subtype_path)[-1])
                total = len(os.listdir(subtype_path))
                for file in os.listdir(subtype_path):
                    if file.endswith('0000.nii.gz'):
                        
                        src_image_file = os.path.join(subtype_path, file)
                        case_str_image = f"case_{case_id:03d}_0000.nii.gz"
                        dest_image_file = os.path.join(image_dest, case_str_image)
                        
                        src_label_file = src_image_file.replace('_0000.nii.gz', '.nii.gz')
                        case_str_label = f"case_{case_id:03d}.nii.gz"
                        dest_label_file = os.path.join(label_dest, case_str_label)
                        
                        class_mapping[case_str_image] = class_id

                        case_id += 1
                        # Remove the temporary corrected label file
                        os.remove(corrected_label_file)
                        
        return case_id

case_id = create_splits(source_train, class_mapping, 1)
print("\n\n##################\n\n")
case_id = create_splits(source_val, class_mapping, case_id)


In [4]:
import os
import json
import nibabel as nib
import numpy as np

# Define the source directories
source_train = 'original_data/UHN-MedImg3D-ML-quiz/train'
source_val = 'original_data/UHN-MedImg3D-ML-quiz/validation'
source_test = 'original_data/UHN-MedImg3D-ML-quiz/test'

# Define the target directories
target_base = 'original_data'
task_name = 'Task006_PancreasUHN'
target_task_dir = os.path.join(target_base, task_name)
images_tr_dir = os.path.join(target_task_dir, 'imagesTr')
images_ts_dir = os.path.join(target_task_dir, 'imagesTs')
labels_tr_dir = os.path.join(target_task_dir, 'labelsTr')

# Create the target directories if they don't exist
os.makedirs(images_tr_dir, exist_ok=True)
os.makedirs(images_ts_dir, exist_ok=True)
os.makedirs(labels_tr_dir, exist_ok=True)

# Initialize class mapping and file mapping dictionaries
class_mapping = {}
file_mapping = {}

# Function to move files and record class labels
def map_files(source, image_dest, label_dest, class_mapping, case_id=0):
    
    if not label_dest:
        total = len(os.listdir(source))
        for file in os.listdir(source):
            if file.endswith('0000.nii.gz'):
                src_image_file = os.path.join(source, file)
                case_str_image = f"case_{case_id:03d}"
                dest_image_file = os.path.join(image_dest, case_str_image)

                # Store the original and renamed file paths in the mapping
                file_mapping[src_image_file] = dest_image_file
                
                case_id += 1
                
        return case_id
            
    else:
        for subtype in os.listdir(source):
            subtype_path = os.path.join(source, subtype)
            if os.path.isdir(subtype_path):
                class_id = int(os.path.basename(subtype_path)[-1])
                total = len(os.listdir(subtype_path))
                for file in os.listdir(subtype_path):
                    if file.endswith('0000.nii.gz'):
                        src_image_file = os.path.join(subtype_path, file)
                        case_str_image = f"case_{case_id:03d}"
                        dest_image_file = os.path.join(image_dest, case_str_image)

                        # Store the original and renamed file paths in the mapping
                        file_mapping[src_image_file] = dest_image_file

                        src_label_file = src_image_file.replace('_0000.nii.gz', '.nii.gz')

                        case_str_label = f"case_{case_id:03d}.nii.gz"
                        dest_label_file = os.path.join(label_dest, case_str_label)
                        
                        class_mapping[case_str_image] = class_id

                        case_id += 1
                        
        return case_id

# Move training files and record class labels
case_id = map_files(source_train, images_tr_dir, labels_tr_dir, class_mapping, 1)
case_id = map_files(source_val, images_tr_dir, labels_tr_dir, class_mapping, case_id)
case_id = map_files(source_test, images_ts_dir, None, class_mapping, case_id)

In [6]:
with open("nnUNet_raw_data_base/nnUNet_raw_data/Task006_PancreasUHN/file_mapping.json", "w") as outfile: 
    json.dump(file_mapping, outfile)

### Import File Mapping

In [1]:
import json

# Define the path to the JSON file
json_file_path = 'nnUNet_raw_data_base/nnUNet_raw_data/Task006_PancreasUHN/file_mapping.json'

# Open the JSON file and load it into a dictionary
with open(json_file_path, 'r') as f:
    file_mapping = json.load(f)

In [2]:
import pandas as pd

# Set maximum number of rows to display
pd.set_option('display.max_rows', None)

# Set maximum number of columns to display
pd.set_option('display.max_columns', None)

# Set maximum display width
pd.set_option('display.width', 1000)

# Set maximum number of columns to display
pd.set_option('display.max_columns', None)

# Set maximum column width
pd.set_option('display.max_colwidth', None)

df = pd.DataFrame.from_dict(file_mapping, orient='index', columns=['Renamed Path'])

# Rename the index to 'Original Path'
df.index.name = 'Original Path'

# Reset the index to make 'Original Path' a column
df.reset_index(inplace=True)

In [3]:
import csv
with open('subtype_results.csv', 'w') as csv_file:  
    writer = csv.writer(csv_file)
    for key, value in class_mapping.items():
       writer.writerow([key, value])

NameError: name 'class_mapping' is not defined

### Import Class Mapping

In [37]:
import json

# Define the path to the JSON file
json_file_path = 'nnUNet_raw_data_base/nnUNet_raw_data/Task006_PancreasUHN/class_mapping.json'

# Open the JSON file and load it into a dictionary
with open(json_file_path, 'r') as f:
    class_mapping = json.load(f)

In [5]:
class_mapping = {k.replace('_0000.nii.gz',''):v for k,v in class_mapping.items()}

In [40]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np


In [42]:
y = list(class_mapping.values())
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)    

In [43]:
class_weights

array([1.35211268, 0.79338843, 1.        ])

In [6]:
df['Case ID'] = df['Renamed Path'].apply(lambda x: x.split('/')[-1].split('.')[0]) 
df['Class Label'] = df['Case ID'].map(class_mapping)

In [7]:
df.to_csv('nnUNet_raw_data_base/nnUNet_raw_data/Task006_PancreasUHN/class_mapping.csv')

In [8]:
df[df['Case ID'] == 'case_001']['Original Path'].values[0]

'original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_416_0000.nii.gz'

In [9]:
df[df['Case ID'] == 'case_001']['Class Label'].values[0]

2.0

In [10]:
df_train = df[df['Original Path'].str.contains('train')]
df_test = df[df['Original Path'].str.contains('test')]
df_val = df[df['Original Path'].str.contains('validation')]

In [15]:
df_train

Unnamed: 0,Original Path,Renamed Path,Case ID,Class Label
0,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_416_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_001,case_001,2.0
1,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_040_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_002,case_002,2.0
2,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_493_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_003,case_003,2.0
3,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_321_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_004,case_004,2.0
4,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_002_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_005,case_005,2.0
5,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_501_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_006,case_006,2.0
6,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_387_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_007,case_007,2.0
7,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_004_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_008,case_008,2.0
8,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_051_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_009,case_009,2.0
9,original_data/UHN-MedImg3D-ML-quiz/train/subtype2/quiz_2_058_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_010,case_010,2.0


In [16]:
df_val

Unnamed: 0,Original Path,Renamed Path,Case ID,Class Label
252,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_191_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_253,case_253,2.0
253,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_241_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_254,case_254,2.0
254,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_084_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_255,case_255,2.0
255,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_377_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_256,case_256,2.0
256,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_098_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_257,case_257,2.0
257,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_089_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_258,case_258,2.0
258,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_085_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_259,case_259,2.0
259,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_080_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_260,case_260,2.0
260,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_074_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_261,case_261,2.0
261,original_data/UHN-MedImg3D-ML-quiz/validation/subtype2/quiz_2_088_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTr/case_262,case_262,2.0


In [17]:
df_test

Unnamed: 0,Original Path,Renamed Path,Case ID,Class Label
288,original_data/UHN-MedImg3D-ML-quiz/test/quiz_037_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_289,case_289,
289,original_data/UHN-MedImg3D-ML-quiz/test/quiz_512_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_290,case_290,
290,original_data/UHN-MedImg3D-ML-quiz/test/quiz_045_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_291,case_291,
291,original_data/UHN-MedImg3D-ML-quiz/test/quiz_391_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_292,case_292,
292,original_data/UHN-MedImg3D-ML-quiz/test/quiz_056_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_293,case_293,
293,original_data/UHN-MedImg3D-ML-quiz/test/quiz_361_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_294,case_294,
294,original_data/UHN-MedImg3D-ML-quiz/test/quiz_499_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_295,case_295,
295,original_data/UHN-MedImg3D-ML-quiz/test/quiz_282_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_296,case_296,
296,original_data/UHN-MedImg3D-ML-quiz/test/quiz_373_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_297,case_297,
297,original_data/UHN-MedImg3D-ML-quiz/test/quiz_141_0000.nii.gz,original_data/Task006_PancreasUHN/imagesTs/case_298,case_298,


In [138]:
class_counts = df_train['Class Label'].value_counts()

In [139]:
class_counts

Class Label
1.0    106
2.0     84
0.0     62
Name: count, dtype: int64

In [20]:
c_counts = df_train['Class Label'].value_counts()

In [11]:
import os
import pickle
import numpy as np
from collections import OrderedDict

# Paths to the data
source_train = 'original_data/UHN-MedImg3D-ML-quiz/train'
source_val = 'original_data/UHN-MedImg3D-ML-quiz/validation'


# Function to create splits
def create_splits(source, case_id=0):
    cases = []
    for subtype in os.listdir(source):
        subtype_path = os.path.join(source, subtype)
        if os.path.isdir(subtype_path):
            for file in os.listdir(subtype_path):
                if file.endswith('0000.nii.gz'):
                    case_str = f"case_{case_id:03d}"
                    cases.append(case_str)
                    case_id += 1
    return cases, case_id

# Create training and validation splits
train_cases, case_id = create_splits(source_train, case_id=1)
val_cases, _ = create_splits(source_val, case_id)

### Try Overfitting

In [12]:
train_0 = df_train[df_train['Class Label'] == 0.0]['Case ID'].tolist()
val_0 = df_val[df_val['Class Label'] == 0.0]['Case ID'].tolist()

train_1 = df_train[df_train['Class Label'] == 1.0]['Case ID'].tolist()
val_1 = df_val[df_val['Class Label'] == 1.0]['Case ID'].tolist()

train_2 = df_train[df_train['Class Label'] == 2.0]['Case ID'].tolist()
val_2 = df_val[df_val['Class Label'] == 2.0]['Case ID'].tolist()

### 0. True (Original) Split

In [275]:
splits = []

In [276]:
# Create the splits dictionary
splits = [
    OrderedDict([
        ('train', np.array(train_cases, dtype='<U8')),
        ('val', np.array(val_cases, dtype='<U8'))
    ])
]

### 1. Only Class 1 Split

In [277]:
splits.append(
    OrderedDict([
        ('train', np.array(train_1, dtype='<U8')),
        ('val', np.array(val_1, dtype='<U8'))
    ])
)

### 2. Tiny Subset Split

In [278]:
import random

eq_train_subset_18 = train_0[:6] + train_1[:6] + train_2[:6]
random.shuffle(eq_train_subset_18)

eq_val_subset_18 = val_0[:6] + val_1[:6] + val_2[:6]
random.shuffle(eq_val_subset_18)

In [279]:
for case_id in eq_train_subset_18:
    print(f"{case_id}: {df[df['Case ID'] == case_id]['Class Label'].values[0]}")

case_195: 0.0
case_002: 2.0
case_003: 2.0
case_085: 1.0
case_191: 0.0
case_194: 0.0
case_001: 2.0
case_090: 1.0
case_087: 1.0
case_088: 1.0
case_005: 2.0
case_006: 2.0
case_086: 1.0
case_192: 0.0
case_004: 2.0
case_196: 0.0
case_089: 1.0
case_193: 0.0


In [280]:
for case_id in eq_val_subset_18:
    print(f"{case_id}: {df[df['Case ID'] == case_id]['Class Label'].values[0]}")

case_256: 2.0
case_253: 2.0
case_281: 0.0
case_266: 1.0
case_254: 2.0
case_283: 0.0
case_282: 0.0
case_285: 0.0
case_284: 0.0
case_280: 0.0
case_269: 1.0
case_267: 1.0
case_257: 2.0
case_265: 1.0
case_268: 1.0
case_270: 1.0
case_255: 2.0
case_258: 2.0


In [281]:
splits.append(
    OrderedDict([
        ('train', np.array(eq_train_subset_18, dtype='<U8')),
        ('val', np.array(eq_val_subset_18, dtype='<U8'))
    ])
)

### 3. Atomic Dataset

In [282]:
import random

eq_train_subset_3 = train_0[:1] + train_1[:1] + train_2[:1]
random.shuffle(eq_train_subset_18)

eq_val_subset_3 = val_0[:1] + val_1[:1] + val_2[:1]
random.shuffle(eq_val_subset_18)

In [283]:
for case_id in eq_train_subset_3:
    print(f"{case_id}: {df[df['Case ID'] == case_id]['Class Label'].values[0]}")

case_191: 0.0
case_085: 1.0
case_001: 2.0


In [284]:
for case_id in eq_val_subset_3:
    print(f"{case_id}: {df[df['Case ID'] == case_id]['Class Label'].values[0]}")

case_280: 0.0
case_265: 1.0
case_253: 2.0


In [285]:
splits.append(
    OrderedDict([
        ('train', np.array(eq_train_subset_3, dtype='<U8')),
        ('val', np.array(eq_val_subset_3, dtype='<U8'))
    ])
)

### 4. Overfitting 16

In [286]:
import random

eq_train_subset_15 = train_0[:5] + train_1[:5] + train_2[:5]
random.shuffle(eq_train_subset_15)

In [287]:
# Make train set the same as val set to eval overfitting
splits.append(
    OrderedDict([
        ('train', np.array(eq_train_subset_15, dtype='<U8')),
        ('val', np.array(eq_train_subset_15, dtype='<U8'))
    ])
)

### 5. Duplicate Original (Concurrent Experiments)

In [288]:
splits.append(
    OrderedDict([
        ('train', np.array(train_cases, dtype='<U8')),
        ('val', np.array(val_cases, dtype='<U8'))
    ])
)

### 6. Duplicate Original (Concurrent Experiments)

In [329]:
splits.append(
    OrderedDict([
        ('train', np.array(train_cases, dtype='<U8')),
        ('val', np.array(val_cases, dtype='<U8'))
    ])
)

### Verify Splits

In [330]:
len(splits)

7

In [331]:
splits[0]

OrderedDict([('train',
              array(['case_001', 'case_002', 'case_003', 'case_004', 'case_005',
                     'case_006', 'case_007', 'case_008', 'case_009', 'case_010',
                     'case_011', 'case_012', 'case_013', 'case_014', 'case_015',
                     'case_016', 'case_017', 'case_018', 'case_019', 'case_020',
                     'case_021', 'case_022', 'case_023', 'case_024', 'case_025',
                     'case_026', 'case_027', 'case_028', 'case_029', 'case_030',
                     'case_031', 'case_032', 'case_033', 'case_034', 'case_035',
                     'case_036', 'case_037', 'case_038', 'case_039', 'case_040',
                     'case_041', 'case_042', 'case_043', 'case_044', 'case_045',
                     'case_046', 'case_047', 'case_048', 'case_049', 'case_050',
                     'case_051', 'case_052', 'case_053', 'case_054', 'case_055',
                     'case_056', 'case_057', 'case_058', 'case_059', 'case_060',
     

In [332]:
splits[1]

OrderedDict([('train',
              array(['case_085', 'case_086', 'case_087', 'case_088', 'case_089',
                     'case_090', 'case_091', 'case_092', 'case_093', 'case_094',
                     'case_095', 'case_096', 'case_097', 'case_098', 'case_099',
                     'case_100', 'case_101', 'case_102', 'case_103', 'case_104',
                     'case_105', 'case_106', 'case_107', 'case_108', 'case_109',
                     'case_110', 'case_111', 'case_112', 'case_113', 'case_114',
                     'case_115', 'case_116', 'case_117', 'case_118', 'case_119',
                     'case_120', 'case_121', 'case_122', 'case_123', 'case_124',
                     'case_125', 'case_126', 'case_127', 'case_128', 'case_129',
                     'case_130', 'case_131', 'case_132', 'case_133', 'case_134',
                     'case_135', 'case_136', 'case_137', 'case_138', 'case_139',
                     'case_140', 'case_141', 'case_142', 'case_143', 'case_144',
     

In [333]:
splits[2]

OrderedDict([('train',
              array(['case_195', 'case_002', 'case_003', 'case_085', 'case_191',
                     'case_194', 'case_001', 'case_090', 'case_087', 'case_088',
                     'case_005', 'case_006', 'case_086', 'case_192', 'case_004',
                     'case_196', 'case_089', 'case_193'], dtype='<U8')),
             ('val',
              array(['case_256', 'case_253', 'case_281', 'case_266', 'case_254',
                     'case_283', 'case_282', 'case_285', 'case_284', 'case_280',
                     'case_269', 'case_267', 'case_257', 'case_265', 'case_268',
                     'case_270', 'case_255', 'case_258'], dtype='<U8'))])

In [334]:
splits[3]

OrderedDict([('train',
              array(['case_191', 'case_085', 'case_001'], dtype='<U8')),
             ('val',
              array(['case_280', 'case_265', 'case_253'], dtype='<U8'))])

In [335]:
splits[4]

OrderedDict([('train',
              array(['case_001', 'case_085', 'case_195', 'case_003', 'case_193',
                     'case_088', 'case_194', 'case_089', 'case_086', 'case_192',
                     'case_087', 'case_005', 'case_002', 'case_191', 'case_004'],
                    dtype='<U8')),
             ('val',
              array(['case_001', 'case_085', 'case_195', 'case_003', 'case_193',
                     'case_088', 'case_194', 'case_089', 'case_086', 'case_192',
                     'case_087', 'case_005', 'case_002', 'case_191', 'case_004'],
                    dtype='<U8'))])

In [336]:
splits[5]

OrderedDict([('train',
              array(['case_001', 'case_002', 'case_003', 'case_004', 'case_005',
                     'case_006', 'case_007', 'case_008', 'case_009', 'case_010',
                     'case_011', 'case_012', 'case_013', 'case_014', 'case_015',
                     'case_016', 'case_017', 'case_018', 'case_019', 'case_020',
                     'case_021', 'case_022', 'case_023', 'case_024', 'case_025',
                     'case_026', 'case_027', 'case_028', 'case_029', 'case_030',
                     'case_031', 'case_032', 'case_033', 'case_034', 'case_035',
                     'case_036', 'case_037', 'case_038', 'case_039', 'case_040',
                     'case_041', 'case_042', 'case_043', 'case_044', 'case_045',
                     'case_046', 'case_047', 'case_048', 'case_049', 'case_050',
                     'case_051', 'case_052', 'case_053', 'case_054', 'case_055',
                     'case_056', 'case_057', 'case_058', 'case_059', 'case_060',
     

In [337]:
splits[6]

OrderedDict([('train',
              array(['case_001', 'case_002', 'case_003', 'case_004', 'case_005',
                     'case_006', 'case_007', 'case_008', 'case_009', 'case_010',
                     'case_011', 'case_012', 'case_013', 'case_014', 'case_015',
                     'case_016', 'case_017', 'case_018', 'case_019', 'case_020',
                     'case_021', 'case_022', 'case_023', 'case_024', 'case_025',
                     'case_026', 'case_027', 'case_028', 'case_029', 'case_030',
                     'case_031', 'case_032', 'case_033', 'case_034', 'case_035',
                     'case_036', 'case_037', 'case_038', 'case_039', 'case_040',
                     'case_041', 'case_042', 'case_043', 'case_044', 'case_045',
                     'case_046', 'case_047', 'case_048', 'case_049', 'case_050',
                     'case_051', 'case_052', 'case_053', 'case_054', 'case_055',
                     'case_056', 'case_057', 'case_058', 'case_059', 'case_060',
     

### Save Splits

In [338]:
split_file_path = 'nnUNet_preprocessed/Task006_PancreasUHN/splits_custom_uhn.pkl'

# Save the splits to a pickle file
with open(split_file_path, 'wb') as f:
    pickle.dump(splits, f)

print(f"{len(splits)} splits saved to", split_file_path)

7 splits saved to nnUNet_preprocessed/Task006_PancreasUHN/splits_custom_uhn.pkl


In [339]:
# If running with original nnunetv1
split_file_path = 'nnUNet_preprocessed/Task006_PancreasUHN/splits_final.pkl'

# Save the splits to a pickle file
with open(split_file_path, 'wb') as f:
    pickle.dump(splits, f)

print(f"{len(splits)} splits saved to", split_file_path)

7 splits saved to nnUNet_preprocessed/Task006_PancreasUHN/splits_final.pkl


### Plans
https://github.com/MIC-DKFZ/nnUNet/blob/9cd9d80ab3d9542138422f33154ee58421a92088/documentation/explanation_plans_files.md

In [257]:
import pickle

plan_file_path = 'nnUNet_trained_models/nnUNet/3d_fullres/Task006_PancreasUHN/nnUNetTrainerV2_Custom__nnUNetPlansv2.1/plans.pkl'

with open(plan_file_path, 'rb') as f:
    plans = pickle.load(f)

In [258]:
plans.keys()

dict_keys(['num_stages', 'num_modalities', 'modalities', 'normalization_schemes', 'dataset_properties', 'list_of_npz_files', 'original_spacings', 'original_sizes', 'preprocessed_data_folder', 'num_classes', 'all_classes', 'base_num_features', 'use_mask_for_norm', 'keep_only_largest_region', 'min_region_size_per_class', 'min_size_per_class', 'transpose_forward', 'transpose_backward', 'data_identifier', 'plans_per_stage', 'preprocessor_name', 'conv_per_stage'])

In [259]:
plans['plans_per_stage']

{0: {'batch_size': 8,
  'num_pool_per_axis': [4, 5, 5],
  'patch_size': array([ 64, 128, 192]),
  'median_patient_size_in_voxels': array([ 59, 117, 181]),
  'current_spacing': array([2.        , 0.73242188, 0.73242188]),
  'original_spacing': array([2.        , 0.73242188, 0.73242188]),
  'do_dummy_2D_data_aug': False,
  'pool_op_kernel_sizes': [[1, 2, 2],
   [2, 2, 2],
   [2, 2, 2],
   [2, 2, 2],
   [2, 2, 2]],
  'conv_kernel_sizes': [[1, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3]]}}

In [260]:
plans['plans_per_stage'][0]['batch_size'] = 8

In [261]:
import pickle

plan_file_path = 'nnUNet_trained_models/nnUNet/3d_fullres/Task006_PancreasUHN/nnUNetTrainerV2_Custom__nnUNetPlansv2.1/plans.pkl'

with open(plan_file_path, 'wb') as f:
    pickle.dump(plans, f)

### 3D Plans

In [327]:
import pickle

plan_file_path = 'nnUNet_preprocessed/Task006_PancreasUHN/nnUNetPlansv2.1_plans_3D.pkl'

with open(plan_file_path, 'rb') as f:
    plans_3d = pickle.load(f)

In [328]:
plans_3d['plans_per_stage']

{0: {'batch_size': 6,
  'num_pool_per_axis': [4, 5, 5],
  'patch_size': array([ 64, 128, 192]),
  'median_patient_size_in_voxels': array([ 59, 117, 181]),
  'current_spacing': array([2.        , 0.73242188, 0.73242188]),
  'original_spacing': array([2.        , 0.73242188, 0.73242188]),
  'do_dummy_2D_data_aug': False,
  'pool_op_kernel_sizes': [[1, 2, 2],
   [2, 2, 2],
   [2, 2, 2],
   [2, 2, 2],
   [2, 2, 2]],
  'conv_kernel_sizes': [[1, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3],
   [3, 3, 3]]}}

In [325]:
plans_3d['plans_per_stage'][0]['batch_size'] = 6

In [326]:
with open(plan_file_path, 'wb') as f:
    pickle.dump(plans_3d, f)

### Plans 2D

In [224]:
import pickle

plan_file_path = 'nnUNet_preprocessed/Task006_PancreasUHN/nnUNetPlansv2.1_plans_2D.pkl'

with open(plan_file_path, 'rb') as f:
    plans_2d = pickle.load(f)

In [225]:
plans_2d['plans_per_stage']

{0: {'batch_size': 134,
  'num_pool_per_axis': [5, 5],
  'patch_size': array([128, 192]),
  'median_patient_size_in_voxels': array([ 59, 117, 181]),
  'current_spacing': array([2.        , 0.73242188, 0.73242188]),
  'original_spacing': array([2.        , 0.73242188, 0.73242188]),
  'pool_op_kernel_sizes': [[2, 2], [2, 2], [2, 2], [2, 2], [2, 2]],
  'conv_kernel_sizes': [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]],
  'do_dummy_2D_data_aug': False}}