In [8]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import json, os
import seaborn as sns   
import pandas as pd
import copy
from ipywidgets import interact, IntSlider, Select, HBox
from utilities import apply_windowing, resize_to_user_resolution
from dicom_tools import DicomToolbox   
from tqdm import tqdm

In [9]:
DATA_DIRECTORY = "/home/ivazquez/Documents/REPOS/nnUNet/raw_data/neck_cta/anonymized"
DATASET_NAME = 'neck_cta'
RESAMPLING = {'apply': True, 'resolution': (1.0, 1.0, 1.0)}
EXPECTED_DATA =['ct', 'rtstruct']
LABELS = ["common carotid lt"]

In [10]:
dt = DicomToolbox(patient_data_directory = DATA_DIRECTORY)
dt.expected_data = EXPECTED_DATA
dt.uniform_slice_thickness = False

all_pat_ids = dt.identify_patient_files()

No user inputs were provided. Setting default values.


In [None]:
# for p in all_patient_ids:
#     dt.parse_dicom_files(p, mask_resolution='ct', mask_names_only=True)
#     if contour not in dt.contours:
#         print(f"Patient {p} does not have {contour}")

In [None]:
for p in [pat for pat in all_pat_ids if pat not in ['2' and '21']]: 
    dt.parse_dicom_files(p, mask_resolution='ct', mask_names_only=True)

In [None]:
for pat_id in tqdm(all_pat_ids, desc='Processing Patients'):
    for label in TARGETS:
        dt.dicom_files = dt.run_initial_check(pat_id)    
        dt.ct = dt.parse_ct_study_files(dt.dicom_files['ct'])
        all_contours = dt.parse_structure_files(files = sorted(dt.dicom_files['structures']), patient_id = pat_id, names_only = True)
        if label not in all_contours:
            print(f"Patient {pat_id} does not have {label}")
            continue
        contour = dt.parse_structure_files(files = sorted(self.dicom_files['structures']), patient_id = pat_id, mask_names = label, resolution = 'ct')
        
        
            
        
    


In [None]:
all_contours = [dt.parse_dicom_files(pat, mask_resolution='ct', mask_names_only=True) for pat in all_pat_ids]

# Assuming all_contours is a list of contour names
unique_contours, counts = np.unique(all_contours, return_counts=True)

# Printing unique contours and their counts
for contour, count in zip(unique_contours, counts):
    print(f"{contour}, Count: {count}")

In [None]:
import nibabel as nib
import numpy as np

# Your NumPy array (replace with your actual data)
array_data = np.random.rand(64, 64, 32)  # Example: 64x64x32 array

# Define voxel spacing: [x, y, z]
voxel_spacing = [2.0, 2.0, 2.5]

# Create an affine matrix with voxel spacing
affine = np.eye(4)
affine[0, 0] = voxel_spacing[0]  # Spacing in x
affine[1, 1] = voxel_spacing[1]  # Spacing in y
affine[2, 2] = voxel_spacing[2]  # Spacing in z

# Create a NIfTI1Image object with specified voxel spacing
nifti_image = nib.Nifti1Image(array_data, affine=affine)

# Save the image to a NIfTI file
nib.save(nifti_image, 'my_nifti_file_with_spacing.nii.gz')

In [None]:
from typing import Tuple

from batchgenerators.utilities.file_and_folder_operations import save_json, join


def generate_dataset_json(output_folder: str,
                          channel_names: dict,
                          labels: dict,
                          num_training_cases: int,
                          file_ending: str,
                          regions_class_order: Tuple[int, ...] = None,
                          dataset_name: str = None, reference: str = None, release: str = None, license: str = None,
                          description: str = None,
                          overwrite_image_reader_writer: str = None, **kwargs):
    """
    Generates a dataset.json file in the output folder

    channel_names:
        Channel names must map the index to the name of the channel, example:
        {
            0: 'T1',
            1: 'CT'
        }
        Note that the channel names may influence the normalization scheme!! Learn more in the documentation.

    labels:
        This will tell nnU-Net what labels to expect. Important: This will also determine whether you use region-based training or not.
        Example regular labels:
        {
            'background': 0,
            'left atrium': 1,
            'some other label': 2
        }
        Example region-based training:
        {
            'background': 0,
            'whole tumor': (1, 2, 3),
            'tumor core': (2, 3),
            'enhancing tumor': 3
        }

        Remember that nnU-Net expects consecutive values for labels! nnU-Net also expects 0 to be background!

    num_training_cases: is used to double check all cases are there!

    file_ending: needed for finding the files correctly. IMPORTANT! File endings must match between images and
    segmentations!

    dataset_name, reference, release, license, description: self-explanatory and not used by nnU-Net. Just for
    completeness and as a reminder that these would be great!

    overwrite_image_reader_writer: If you need a special IO class for your dataset you can derive it from
    BaseReaderWriter, place it into nnunet.imageio and reference it here by name

    kwargs: whatever you put here will be placed in the dataset.json as well

    """
    has_regions: bool = any([isinstance(i, (tuple, list)) and len(i) > 1 for i in labels.values()])
    if has_regions:
        assert regions_class_order is not None, f"You have defined regions but regions_class_order is not set. " \
                                                f"You need that."
    # channel names need strings as keys
    keys = list(channel_names.keys())
    for k in keys:
        if not isinstance(k, str):
            channel_names[str(k)] = channel_names[k]
            del channel_names[k]

    # labels need ints as values
    for l in labels.keys():
        value = labels[l]
        if isinstance(value, (tuple, list)):
            value = tuple([int(i) for i in value])
            labels[l] = value
        else:
            labels[l] = int(labels[l])

    dataset_json = {
        'channel_names': channel_names,  # previously this was called 'modality'. I didn't like this so this is
        # channel_names now. Live with it.
        'labels': labels,
        'numTraining': num_training_cases,
        'file_ending': file_ending,
    }

    if dataset_name is not None:
        dataset_json['name'] = dataset_name
    if reference is not None:
        dataset_json['reference'] = reference
    if release is not None:
        dataset_json['release'] = release
    if license is not None:
        dataset_json['licence'] = license
    if description is not None:
        dataset_json['description'] = description
    if overwrite_image_reader_writer is not None:
        dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer
    if regions_class_order is not None:
        dataset_json['regions_class_order'] = regions_class_order

    dataset_json.update(kwargs)

    save_json(dataset_json, join(output_folder, 'dataset.json'), sort_keys=False)

The scheme introduced above results in the following folder structure. Given is an example for the first Dataset of the MSD: BrainTumour. This dataset hat four input channels: FLAIR (0000), T1w (0001), T1gd (0002) and T2w (0003). Note that the imagesTs folder is optional and does not have to be present.

```
nnUNet_raw/Dataset001_BrainTumour/
├── dataset.json
├── imagesTr
│   ├── BRATS_001_0000.nii.gz
│   ├── BRATS_001_0001.nii.gz
│   ├── BRATS_001_0002.nii.gz
│   ├── BRATS_001_0003.nii.gz
│   ├── BRATS_002_0000.nii.gz
│   ├── BRATS_002_0001.nii.gz
│   ├── BRATS_002_0002.nii.gz
│   ├── BRATS_002_0003.nii.gz
│   ├── ...
├── imagesTs
│   ├── BRATS_485_0000.nii.gz
│   ├── BRATS_485_0001.nii.gz
│   ├── BRATS_485_0002.nii.gz
│   ├── BRATS_485_0003.nii.gz
│   ├── BRATS_486_0000.nii.gz
│   ├── BRATS_486_0001.nii.gz
│   ├── BRATS_486_0002.nii.gz
│   ├── BRATS_486_0003.nii.gz
│   ├── ...
└── labelsTr
    ├── BRATS_001.nii.gz
    ├── BRATS_002.nii.gz
    ├── ...
```

Example of data arrangement

```
nnUNet_raw/Dataset002_Heart/
├── dataset.json
├── imagesTr
│   ├── la_003_0000.nii.gz
│   ├── la_004_0000.nii.gz
│   ├── ...
├── imagesTs
│   ├── la_001_0000.nii.gz
│   ├── la_002_0000.nii.gz
│   ├── ...
└── labelsTr
    ├── la_003.nii.gz
    ├── la_004.nii.gz
    ├── ...
```


```json
{ 
 "channel_names": {  # formerly modalities
   "0": "T2", 
   "1": "ADC"
 }, 
 "labels": {  # THIS IS DIFFERENT NOW!
   "background": 0,
   "PZ": 1,
   "TZ": 2
 }, 
 "numTraining": 32, 
 "file_ending": ".nii.gz"
 "overwrite_image_reader_writer": "SimpleITKIO"  # optional! If not provided nnU-Net will automatically determine the ReaderWriter
 }
 ```
 The channel_names determine the normalization used by nnU-Net. If a channel is marked as 'CT', then a global normalization based on the intensities in the foreground pixels will be used. If it is something else, per-channel z-scoring will be used. Refer to the methods section in our paper for more details. nnU-Net v2 introduces a few more normalization schemes to choose from and allows you to define your own, see here for more information.