# DICOM dataset to MHA Dataset

In [1]:
import os
import pydicom
import itk
import SimpleITK as sitk

In [2]:
# Function to get the description of the first file in a series
def get_series_description(generator, series_id):
    file_names = generator.GetFileNames(series_id)
    if not file_names:
        return None
    image = itk.imread(file_names[0])
    metadata = image.GetMetaDataDictionary()
    return metadata['0008|103e'] # Tag for Series Description

In [6]:
def save_series_to_mha(root_dir, patient_folder, series_description, save_file_path):
    # Path to the directory containing the DICOMDIR file

    dicomdir_directory = "../../../data/ProstateData/BREST patients/" + patient_folder
    dicomdir_directory = os.path.join(root_dir, patient_folder)

    # Create an object to store the names of the files in each series
    names_generator = itk.GDCMSeriesFileNames.New()
    names_generator.SetUseSeriesDetails(True)
    names_generator.SetGlobalWarningDisplay(False)
    series_fixed = None
    series_id = None
    for subdir, dirs, files in os.walk(dicomdir_directory):
        try:
            names_generator.SetDirectory(subdir)

            # List all series in the directory
            series_uids = names_generator.GetSeriesUIDs()

            # Assuming you know some identifying feature of your T2 and ADC series,
            # like part of the series description, loop through series to find them
            for uid in series_uids:
                description = get_series_description(names_generator, uid).lower().strip()
                if series_description.lower().strip() == description:  # Replace with a more specific identifier if available
                    path_to_series = subdir
                    series_id = uid
        except Exception as e:
            continue

    # Check if series were found
    if series_id is None:
        return False

    # Set the image type
    PixelType = itk.ctype('float')
    Dimension = 3
    ImageType = itk.Image[PixelType, Dimension]

    names_generator.SetDirectory(path_to_series)
    file_names = names_generator.GetFileNames(series_id)
    reader = itk.ImageSeriesReader[ImageType].New()
    reader.SetFileNames(file_names)
    reader.Update()
    image = reader.GetOutput()

    itk.imwrite(image, os.path.join(save_file_path ,series_description+ ".mha"))
    return True

### Iterate over all patients and transform to mha files

In [7]:
root_dir = '../../../data/ProstateData/BREST patients/'
mha_images_path = '/local_ssd/practical_wise24/prostate_cancer/mha_raw_images'
# mha_images_path = './mha_raw_images'

In [8]:
series_to_register = [
    'Pelvis_t2_haste_fs_db_tra_p2_320',
    'Pelvis_t2_spc_rst_tra_p2_iso',
    '*MRAC_PET_mlaa_siemens_4BP TK_AC Images',
    '*Pelvis_MRAC_PET_mlaa_siemens_Becken_1BP_15min_LM_AC Images',
    '*Pelvis_MRAC_PET_siemens_Becken_1BP_15min_LM_AC Images',
    '*MRAC_PET_siemens_4BP TK_AC Images',
    'Pelvis_ep2d_diff_tra_ADC',
    'Pelvis_ep2d_diff_tra',
    'Pelvis_t1_tse_cor_p2',
]
for patient_folder in os.listdir(root_dir):
    save_files_path = os.path.join(mha_images_path, patient_folder)
    if not os.path.exists(save_files_path):
        os.makedirs(save_files_path)
    for description in series_to_register:
        save_series_to_mha(root_dir, patient_folder, description, save_files_path)


### Check all generated .mha files are readable

In [9]:
for patient_folder in os.listdir(root_dir):
    files_path = os.path.join(mha_images_path, patient_folder)
    for mha_file_name in os.listdir(files_path):
        file_path = os.path.join(files_path, mha_file_name)
        try:
            sitk.ReadImage(file_path, sitk.sitkFloat32)
        except Exception as e:
            print(file_path)