In [1]:
import os

# Define the HuggingFace repository and local path
ct_filename = "ct.nii.gz"
segmentations_dir = "segmentations"
pancreas_segmentations_filename = os.path.join(segmentations_dir, "pancreas.nii.gz")

data_folder = "./data"
healthy_pancreas_data_path = os.path.join(data_folder, "healthy-pancreas")
unhealthy_pancreas_data_path = os.path.join(data_folder, "pancreatic-tumor")

os.makedirs(healthy_pancreas_data_path, exist_ok=True)
os.makedirs(unhealthy_pancreas_data_path, exist_ok=True)

In [2]:
healthy_pancreas_data_path

'./data/healthy-pancreas'

In [3]:
import os
import nibabel as nib
import pydicom
from pydicom.uid import generate_uid
import numpy as np

# Function to load data and get patient folder names
def Load_Data(data_dir):
    """
    Loads NIfTI files from the specified directory.
    Args:
        data_dir (str): Path to the directory containing NIfTI files.
    Returns:
        list of tuples: List of (nifti_file_path, patient_folder_name)
    """
    nifti_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('ct.nii.gz'):
                nifti_file_path = os.path.join(root, file)
                patient_folder_name = os.path.basename(root)
                nifti_files.append((nifti_file_path, patient_folder_name))
    return nifti_files

# Define the paths to your data directories
healthy_data_dir = healthy_pancreas_data_path
unhealthy_data_dir = unhealthy_pancreas_data_path

# Load NIfTI files using your Load_Data function
healthy_nifti_files = Load_Data(healthy_data_dir)
unhealthy_nifti_files = Load_Data(unhealthy_data_dir)

# Define the output directories
healthy_output_dir = os.path.join(data_folder, 'dicom/healthy')
unhealthy_output_dir = os.path.join(data_folder, 'dicom/unhealthy')

# Create output directories if they don't exist
os.makedirs(healthy_output_dir, exist_ok=True)
os.makedirs(unhealthy_output_dir, exist_ok=True)


In [4]:
len(healthy_nifti_files)

42

In [18]:
def convert_nifti_to_dicom(nifti_file, output_dir, patient_name='Anonymous', patient_id='123456'):
    import os
    import nibabel as nib
    import pydicom
    from pydicom.uid import generate_uid, PYDICOM_IMPLEMENTATION_UID
    import numpy as np

    # Helper function to format DS values
    def format_ds(value):
        return ('%.10g' % value)[:16]

    # Load NIfTI file
    nifti_image = nib.load(nifti_file)
    image_data = nifti_image.get_fdata()
    affine = nifti_image.affine.copy()
    header = nifti_image.header

    # Flip the image data along the Y-axis
    image_data = np.flip(image_data, axis=0)

    # Adjust the affine matrix to account for the flip
    affine[:, 1] *= -1
    affine[:3, 3] += affine[:3, 1] * (image_data.shape[1] - 1)

    # Get the number of slices
    num_slices = image_data.shape[2]

    # Generate UIDs
    study_instance_uid = generate_uid()
    series_instance_uid = generate_uid()
    frame_of_reference_uid = generate_uid()

    # Compute the pixel spacings
    delta_i = np.linalg.norm(affine[:3, 0])  # Spacing along rows (axis 0)
    delta_j = np.linalg.norm(affine[:3, 1])  # Spacing along columns (axis 1)
    delta_k = np.linalg.norm(affine[:3, 2])  # Spacing between slices

    # Compute direction cosines
    row_cosines = affine[:3, 1] / delta_j  # Direction along increasing column index (axis 1)
    column_cosines = affine[:3, 0] / delta_i  # Direction along increasing row index (axis 0)

    # Set PixelSpacing and SliceThickness
    pixel_spacing = [format_ds(delta_j), format_ds(delta_i)]  # [spacing between columns, spacing between rows]
    slice_thickness = format_ds(delta_k)

    # ImageOrientationPatient
    image_orientation_patient = [
        format_ds(row_cosines[0]), format_ds(row_cosines[1]), format_ds(row_cosines[2]),
        format_ds(column_cosines[0]), format_ds(column_cosines[1]), format_ds(column_cosines[2])
    ]

    # Loop over slices
    for i in range(num_slices):
        slice_data = image_data[:, :, i]

        # Create a new DICOM dataset
        ds = pydicom.Dataset()

        # File Meta Information
        file_meta = pydicom.Dataset()
        file_meta.MediaStorageSOPClassUID = pydicom.uid.CTImageStorage
        sop_instance_uid = generate_uid()
        file_meta.MediaStorageSOPInstanceUID = sop_instance_uid
        file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
        file_meta.ImplementationClassUID = PYDICOM_IMPLEMENTATION_UID
        ds.file_meta = file_meta

        # Set the transfer syntax
        ds.is_little_endian = True
        ds.is_implicit_VR = False

        # Populate required values
        ds.SOPClassUID = file_meta.MediaStorageSOPClassUID
        ds.SOPInstanceUID = sop_instance_uid

        # Patient Module
        ds.PatientName = patient_name
        ds.PatientID = patient_id

        # General Study Module
        ds.StudyInstanceUID = study_instance_uid

        # General Series Module
        ds.Modality = 'CT'
        ds.SeriesInstanceUID = series_instance_uid
        ds.SeriesNumber = 1

        # Frame of Reference Module
        ds.FrameOfReferenceUID = frame_of_reference_uid

        # Image Plane Module
        voxel_coord = np.array([0, 0, i, 1])
        position = affine @ voxel_coord
        ds.ImagePositionPatient = [
            format_ds(position[0]), format_ds(position[1]), format_ds(position[2])
        ]
        ds.ImageOrientationPatient = image_orientation_patient
        ds.PixelSpacing = pixel_spacing
        ds.SliceThickness = slice_thickness

        # Image Pixel Module
        ds.SamplesPerPixel = 1
        ds.PhotometricInterpretation = 'MONOCHROME2'
        ds.Rows, ds.Columns = slice_data.shape
        ds.BitsAllocated = 16
        ds.BitsStored = 16
        ds.HighBit = 15
        ds.PixelRepresentation = 1  # 0 for unsigned, 1 for signed data

        # Instance Module
        ds.InstanceNumber = i + 1

        # Set Pixel Data
        pixel_array = slice_data.astype(np.int16)
        ds.PixelData = pixel_array.tobytes()

        # Save DICOM file
        base_filename = os.path.splitext(os.path.basename(nifti_file))[0]
        filename = os.path.join(output_dir, f'{base_filename}_slice_{i+1:04d}.dcm')
        ds.save_as(filename, write_like_original=False)

In [19]:
healthy_nifti_files[0]

('./data/healthy-pancreas/BDMAP_00000682/ct.nii.gz', 'BDMAP_00000682')

In [20]:
healthy_nifti_files[1][1]

'BDMAP_00004480'

In [21]:
print(nib.aff2axcodes(nib.load(healthy_nifti_files[0][0]).affine))

('R', 'A', 'S')


In [22]:
for f in healthy_nifti_files:
    assert nib.aff2axcodes(nib.load(f[0]).affine) == ('R', 'A', 'S')

In [23]:
import os
import shutil

for patient_id in healthy_nifti_files:
    patient_id = patient_id[1]
    dir_path = os.path.join(healthy_output_dir, patient_id)
    
    # Delete the directory if it exists
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    
    # Recreate the directory
    os.makedirs(dir_path)

In [24]:
import os
import shutil

for patient_id in unhealthy_nifti_files:
    patient_id = patient_id[1]
    dir_path = os.path.join(unhealthy_output_dir, patient_id)
    
    # Delete the directory if it exists
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    
    # Recreate the directory
    os.makedirs(dir_path)

In [25]:
from tqdm import tqdm

for patient in tqdm(healthy_nifti_files):
    nifti_file = patient[0]
    patient_id = patient[1]
    convert_nifti_to_dicom(nifti_file, os.path.join(healthy_output_dir, patient_id), patient_name=patient_id, patient_id=patient_id)
# convert_nifti_to_dicom(healthy_nifti_files[0][0], os.path.join(healthy_output_dir, "BDMAP_00000002"), patient_name='Healthy', patient_id='BDMAP_00000002')

100%|██████████| 42/42 [00:23<00:00,  1.79it/s]


In [None]:
# from tqdm import tqdm

# for patient in tqdm(unhealthy_nifti_files):
#     nifti_file = patient[0]
#     patient_id = patient[1]
#     convert_nifti_to_dicom(nifti_file, os.path.join(unhealthy_output_dir, patient_id), patient_name=patient_id, patient_id=patient_id)
# # convert_nifti_to_dicom(healthy_nifti_files[0][0], os.path.join(healthy_output_dir, "BDMAP_00000002"), patient_name='Healthy', patient_id='BDMAP_00000002')