In [None]:
import os
import nibabel as nib
import numpy as np 
from collections import OrderedDict
import json
from pathlib import Path

from utils.helper import convert_nrrd_to_nifti, create_folder, plot_all_slices, plot_histogram, generate_binary_image, adjust_affine_for_spacing_and_origin, save_binary_image_with_adjusted_origin, make_if_dont_exist
from utils.metrics import dice_score_per_class, hausdorff_distance_per_class, ravd_per_class

In [None]:
# define dataset path
BASE_PATH = Path('./').resolve()
DATA_PATH = BASE_PATH / 'dataset'

project_name = 'HPC' #change here for different task name
task_name = 'Dataset003_' + project_name 

TRAINING_DATASET_PATH = BASE_PATH / 'dataset/nnUNet_raw_data' / task_name / 'imagesTr'
GT_TRAINING_DATASET_PATH = BASE_PATH / 'dataset/nnUNet_raw_data' / task_name / 'labelsTr'
TEST_DATASET_PATH = BASE_PATH / 'dataset/nnUNet_raw_data' / task_name / 'imagesTs'
GT_TEST_DATASET_PATH = BASE_PATH / 'dataset/nnUNet_raw_data' / task_name / 'labelsTs'
PREDICTION_RESULTS_PATH  = BASE_PATH / 'dataset/nnUNet_Prediction_Results' / task_name
TASK_PATH = BASE_PATH / 'dataset/nnUNet_raw_data' / task_name 

# setup environment variables
nnUNet_raw = BASE_PATH / 'dataset/nnUNet_raw_data'
nnUNet_preprocessed = BASE_PATH / 'dataset/nnUNet_preprocessed'
nnUNet_results = BASE_PATH / 'dataset/nnUNet_results'


In [None]:
make_if_dont_exist(TRAINING_DATASET_PATH,overwrite=False)
make_if_dont_exist(GT_TRAINING_DATASET_PATH)
make_if_dont_exist(TEST_DATASET_PATH)
make_if_dont_exist(GT_TEST_DATASET_PATH)
make_if_dont_exist(PREDICTION_RESULTS_PATH)

make_if_dont_exist(nnUNet_preprocessed)
make_if_dont_exist(nnUNet_results)

In [None]:
from typing import Tuple
import json
from os.path import join

def save_json(data, file_path, sort_keys=False):

    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4, sort_keys=sort_keys)

def generate_dataset_json(output_folder: str,
                          channel_names: dict,
                          labels: dict,
                          num_training_cases: int,
                          file_ending: str,
                          regions_class_order: Tuple[int, ...] = None,
                          dataset_name: str = None, reference: str = None, release: str = None, license: str = None,
                          description: str = None,
                          overwrite_image_reader_writer: str = None, **kwargs):
    
    has_regions: bool = any([isinstance(i, (tuple, list)) and len(i) > 1 for i in labels.values()])
    if has_regions:
        assert regions_class_order is not None, f"You have defined regions but regions_class_order is not set. " \
                                                f"You need that."
    # channel names need strings as keys
    keys = list(channel_names.keys())
    for k in keys:
        if not isinstance(k, str):
            channel_names[str(k)] = channel_names[k]
            del channel_names[k]

    # labels need ints as values
    for l in labels.keys():
        value = labels[l]
        if isinstance(value, (tuple, list)):
            value = tuple([int(i) for i in value])
            labels[l] = value
        else:
            labels[l] = int(labels[l])

    dataset_json = {
        'channel_names': channel_names,  # previously this was called 'modality'. I didn't like this so this is
        # channel_names now. Live with it.
        'labels': labels,
        'numTraining': num_training_cases,
        'file_ending': file_ending,
    }

    if dataset_name is not None:
        dataset_json['name'] = dataset_name
    if reference is not None:
        dataset_json['reference'] = reference
    if release is not None:
        dataset_json['release'] = release
    if license is not None:
        dataset_json['licence'] = license
    if description is not None:
        dataset_json['description'] = description
    if overwrite_image_reader_writer is not None:
        dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer
    if regions_class_order is not None:
        dataset_json['regions_class_order'] = regions_class_order

    dataset_json.update(kwargs)

    save_json(dataset_json, join(output_folder, 'dataset.json'), sort_keys=False)
    

# List all files in the training images and labels directories
image_files = os.listdir(TRAINING_DATASET_PATH)
label_files = os.listdir(GT_TRAINING_DATASET_PATH)
test_ids = os.listdir(TEST_DATASET_PATH)

channel_names = {"0": "microscopic"}
num_training_cases = len(image_files)  
file_ending = ".nii.gz"

generate_dataset_json(
    output_folder=TASK_PATH,
    channel_names=channel_names,
    labels={"background":0, "Hippocampus":1},
    num_training_cases=num_training_cases,
    file_ending=file_ending,
    dataset_name="Mouse Brain Segmentation",
    description="Mouse Brain Segmentation",
    reference="",
    release="0.0",
    license="",
    training = [{'image': f"./imagesTr/{image_file}", 'label': f"./labelsTr/{label_file}"} 
            for image_file, label_file in zip(sorted(image_files), sorted(label_files))],
    test=["./imagesTs/%s" % (i[:i.find("_0000")] + '.nii.gz') for i in test_ids]
)
