# Hippocampus Dataset

In [83]:
def make_if_dont_exist(folder_path,overwrite=False):
    """
    creates a folder if it does not exists
    input: 
    folder_path : relative path of the folder which needs to be created
    over_write :(default: False) if True overwrite the existing folder 
    """
    if os.path.exists(folder_path):
        
        if not overwrite:
            print(f"{folder_path} exists.")
        else:
            print(f"{folder_path} overwritten")
            shutil.rmtree(folder_path)
            os.makedirs(folder_path)

    else:
      os.makedirs(folder_path)
      print(f"{folder_path} created!")

# Maybe move path of preprocessed data directly on content - this may be signifcantely faster!
print("Current Working Directory {}".format(os.getcwd()))
path_dict = {
    "nnUNet_raw" : os.path.join(os.getcwd(), "nnUNet_raw"), 
    "nnUNet_preprocessed" : os.path.join(os.getcwd(), "nnUNet_preprocessed"), # 1 experiment: 1 epoch took 112s
    "nnUNet_results" : os.path.join(os.getcwd(), "nnUNet_results"),
    "RAW_DATA_PATH" : os.path.join(os.getcwd(), "original_data"), # This is used here only for convenience (not necessary for nnU-Net)!
}

# Write paths to environment variables
for env_var, path in path_dict.items():
  os.environ[env_var] = path 

# Check whether all environment variables are set correct!
for env_var, path in path_dict.items():
  if os.getenv(env_var) != path:
    print("Error:")
    print("Environment Variable {} is not set correctly!".format(env_var))
    print("Should be {}".format(path))
    print("Variable is {}".format(os.getenv(env_var)))
  make_if_dont_exist(path, overwrite=False)

print("If No Error Occured Continue Forward. =)")

Current Working Directory /scratch/alif/nnUNet
/scratch/alif/nnUNet/nnUNet_raw exists.
/scratch/alif/nnUNet/nnUNet_preprocessed exists.
/scratch/alif/nnUNet/nnUNet_results exists.
/scratch/alif/nnUNet/original_data exists.
If No Error Occured Continue Forward. =)


In [90]:
# os.environ['nnUNet_raw_data_base'] = '/scratch/alif/nnUNet/original_data'
os.environ['nnUNet_raw_data_base'] = '/scratch/alif/nnUNet/nnUNet_raw_data_base'
os.environ['nnUNet_preprocessed'] = '/scratch/alif/nnUNet/nnUNet_preprocessed'
os.environ['RESULTS_FOLDER'] = '/scratch/alif/nnUNet/nnUNet_trained_models'

In [102]:
train_dir = '/scratch/alif/nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTr'
test_dir = '/scratch/alif/nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTs'

In [111]:
import os
def remove_suffix(images_dir):
    image_files = sorted(os.listdir(images_dir))
    
    for i, img_file in enumerate(image_files):
        new_image_name = img_file.replace('_0000.', '.')
        os.rename(os.path.join(images_dir, img_file), os.path.join(images_dir, new_image_name))
        
remove_suffix(train_dir)
# remove_suffix(test_dir)

In [128]:
import os
def add_suffix(images_dir):
    image_files = sorted(os.listdir(images_dir))
    
    for i, img_file in enumerate(image_files):
        new_image_name = img_file.replace('.nii.gz', '_0000.nii.gz')
        os.rename(os.path.join(images_dir, img_file), os.path.join(images_dir, new_image_name))
        
# add_suffix(train_dir)
add_suffix(test_dir)

In [118]:
! rm nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTs/._*

rm: cannot remove 'nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTs/._*': No such file or directory


In [119]:
! ls nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTr | wc -l
! ls nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTs | wc -l

260
130


**Examples** <br>
Only train and test (not labels) should have suffixes.
- nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTr/hippocampus_001_0000.nii.gz
- nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/labelsTr/hippocampus_001.nii.gz
- nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus/imagesTs/hippocampus_196_0000.nii.gz

<br>

**JSON** <br>
Should have no suffixes.

```json
{ 
 "name": "Hippocampus", 
 "description": "Left and right hippocampus segmentation",
"reference": " Vanderbilt University Medical Center",
"licence":"CC-BY-SA 4.0",
"relase":"1.0 04/05/2018",
"tensorImageSize": "3D",
"modality": { 
   "0": "MRI"
 }, 
 "labels": { 
   "0": "background", 
   "1": "Anterior", 
   "2": "Posterior"
 }, 
 "numTraining": 260, 
 "numTest": 130,
 "training":[{"image":"./imagesTr/hippocampus_367.nii.gz","label":"./labelsTr/hippocampus_367.nii.gz"},{"image":"./imagesTr/hippocampus_304.nii.gz","label":"./labelsTr/hippocampus_304.nii.gz"},{"image":"./imagesTr/hippocampus_204.nii.gz","label":"./labelsTr/hippocampus_204.nii.gz"},{"image":"./imagesTr/hippocampus_279.nii.gz","label":"./labelsTr/hippocampus_279.nii.gz"},{"image":"./imagesTr/hippocampus_308.nii.gz","label":"./labelsTr/hippocampus_308.nii.gz"},{"image":"./imagesTr/hippocampus_375.nii.gz","label":"./labelsTr/hippocampus_375.nii.gz"},
             ...]
 "test":["./imagesTs/hippocampus_267.nii.gz","./imagesTs/hippocampus_379.nii.gz","./imagesTs/hippocampus_208.nii.gz","./imagesTs/hippocampus_275.nii.gz", ...]
}

```

In [125]:
import os
import json

def create_dataset_json(base_dir, task_name, num_training, num_test):
    
    imagesTr_dir = os.path.join(base_dir, task_name, 'imagesTr')
    labelsTr_dir = os.path.join(base_dir, task_name, 'labelsTr')
    imagesTs_dir = os.path.join(base_dir, task_name, 'imagesTs')

    # Get list of training and test images
    training_images = sorted(os.listdir(imagesTr_dir))
    test_images = sorted(os.listdir(imagesTs_dir))

    # Ensure the number of requested training and test images does not exceed available images
    num_training = min(num_training, len(training_images))
    num_test = min(num_test, len(test_images))

    # Create the dataset dictionary
    dataset = {
        "name": "Hippocampus",
        "description": "Left and right hippocampus segmentation",
        "tensorImageSize": "3D",
        "reference": "",
        "licence": "",
        "release": "0.0",
        "modality": {
            "0": "MRI"
        },
        "labels": {
            "0":"background",
            "1":"Anterior",
            "2":"Posterior"
            # Add other structures as needed
        },
        "numTraining": num_training,
        "numTest": num_test,
        "training": [],
        "test": []
    }

    # Populate the training field
    for i in range(num_training):
        dataset['training'].append({
            # "image": f"./imagesTr/{training_images[i]}",
            "image": f"./imagesTr/{training_images[i].replace('_0000.nii.gz', '.nii.gz')}",
            "label": f"./labelsTr/{training_images[i].replace('_0000.nii.gz', '.nii.gz')}"
        })

    # Populate the test field
    for i in range(num_test):
        dataset['test'].append(f"./imagesTs/{test_images[i]}")
        # dataset['test'].append(f"./imagesTs/{test_images[i].replace('_0000.nii.gz', '.nii.gz')}")

    # Save the dataset.json file
    with open(os.path.join(base_dir, task_name, 'dataset.json'), 'w') as f:
        json.dump(dataset, f, indent=4)

# Define the parameters
base_dir = '/scratch/alif/nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/'
task_name = 'Task004_Hippocampus'  # Replace 002 with the actual task number
num_training = 260  # Set the number of training samples
num_test = 130  # Set the number of test samples

# Create the dataset.json file
create_dataset_json(base_dir, task_name, num_training, num_test)
print('Done')

Done


**WARNING** <br>
The raw data should be located in `nnUNet/nnUNet_raw_data_base/nnUNet_raw_data/Task004_Hippocampus` <br>
It must have 3 digits to specify the task ID.

In [129]:
! nnUNet_plan_and_preprocess -t 04 --verify_dataset_integrity



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

Verifying training set
checking case hippocampus_367
checking case hippocampus_304
checking case hippocampus_204
checking case hippocampus_279
checking case hippocampus_308
checking case hippocampus_375
checking case hippocampus_216
checking case hippocampus_316
checking case hippocampus_089
checking case hippocampus_189
checking case hippocampus_243
checking case hippocampus_343
checking case hippocampus_220
checking case hippocampus_097
checking case hippocampus_320
checking case hippocampus_197
checking case hippocampus_351
checking case hippocampus_251
checking case hippocampus_185
checking case hippocampus_332
checking case h

In [131]:
# ! nnUNet_plan_and_preprocess -t 04

In [133]:
! nnUNet_train 3d_fullres nnUNetTrainerV2 Task004_Hippocampus 0 --npz



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  2
modalities:  {0: 'MRI'}
use_mask_for_norm OrderedDict([(0, False)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT')])
stages...

stage:  0
{'batch_size': 9, 'num_pool_per_axis': [3, 3, 3], 'patch_size': array([40, 56, 40]), 'median_patient_size_in_voxels': array([36, 50, 35]), 'current_spaci