In [2]:
!pip install pydicom

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m242.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os

# Define the HuggingFace repository and local path
ct_filename = "ct.nii.gz"
segmentations_dir = "segmentations"
pancreas_segmentations_filename = os.path.join(segmentations_dir, "pancreas.nii.gz")

data_folder = "./data"
healthy_pancreas_data_path = os.path.join(data_folder, "healthy-pancreas")
unhealthy_pancreas_data_path = os.path.join(data_folder, "pancreatic-tumor")

os.makedirs(healthy_pancreas_data_path, exist_ok=True)
os.makedirs(unhealthy_pancreas_data_path, exist_ok=True)

In [17]:
import os
import nibabel as nib
import pydicom
from pydicom.uid import generate_uid
import numpy as np

# Function to load data and get patient folder names
def Load_Data(data_dir):
    """
    Loads NIfTI files from the specified directory.
    Args:
        data_dir (str): Path to the directory containing NIfTI files.
    Returns:
        list of tuples: List of (nifti_file_path, patient_folder_name)
    """
    nifti_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('ct.nii.gz'):
                nifti_file_path = os.path.join(root, file)
                patient_folder_name = os.path.basename(root)
                nifti_files.append((nifti_file_path, patient_folder_name))
    return nifti_files

# Define the paths to your data directories
healthy_data_dir = healthy_pancreas_data_path
unhealthy_data_dir = unhealthy_pancreas_data_path

# Load NIfTI files using your Load_Data function
healthy_nifti_files = Load_Data(healthy_data_dir)
unhealthy_nifti_files = Load_Data(unhealthy_data_dir)

# Define the output directories
healthy_output_dir = os.path.join(data_folder, 'dicom/healthy')
unhealthy_output_dir = os.path.join(data_folder, 'dicom/unhealthy')

# Create output directories if they don't exist
os.makedirs(healthy_output_dir, exist_ok=True)
os.makedirs(unhealthy_output_dir, exist_ok=True)

import os
import nibabel as nib
import pydicom
from pydicom.uid import generate_uid, PYDICOM_IMPLEMENTATION_UID
import numpy as np

def convert_nifti_to_dicom(nifti_file, output_dir, patient_name='Anonymous', patient_id='123456'):
    import os
    import nibabel as nib
    import pydicom
    from pydicom.uid import generate_uid, PYDICOM_IMPLEMENTATION_UID
    import numpy as np

    # Load NIfTI file
    nifti_image = nib.load(nifti_file)
    image_data = nifti_image.get_fdata()
    affine = nifti_image.affine
    header = nifti_image.header

    # Get the number of slices
    num_slices = image_data.shape[2]

    # Generate UIDs once per NIfTI file
    study_instance_uid = generate_uid()
    series_instance_uid = generate_uid()
    frame_of_reference_uid = generate_uid()

    # Extract pixel spacing and slice thickness
    spacing = header.get_zooms()  # (pixdim[1], pixdim[2], pixdim[3])
    pixel_spacing = [str(spacing[0]), str(spacing[1])]
    slice_thickness = str(spacing[2])

    # Extract direction cosines from the affine matrix
    row_cosines = affine[:3, 0]
    column_cosines = affine[:3, 1]

    # Normalize the direction cosines
    row_cosines /= np.linalg.norm(row_cosines)
    column_cosines /= np.linalg.norm(column_cosines)

    image_orientation_patient = [
        str(row_cosines[0]), str(row_cosines[1]), str(row_cosines[2]),
        str(column_cosines[0]), str(column_cosines[1]), str(column_cosines[2])
    ]

    # Loop over slices
    for i in range(num_slices):
        slice_data = image_data[:, :, i]

        # Create a new DICOM dataset
        ds = pydicom.Dataset()

        # File Meta Information
        file_meta = pydicom.Dataset()
        file_meta.MediaStorageSOPClassUID = pydicom.uid.CTImageStorage
        sop_instance_uid = generate_uid()
        file_meta.MediaStorageSOPInstanceUID = sop_instance_uid
        file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
        file_meta.ImplementationClassUID = PYDICOM_IMPLEMENTATION_UID
        ds.file_meta = file_meta

        # Set the transfer syntax
        ds.is_little_endian = True
        ds.is_implicit_VR = False  # Explicit VR

        # Populate required values
        ds.SOPClassUID = file_meta.MediaStorageSOPClassUID
        ds.SOPInstanceUID = sop_instance_uid

        # Patient Module
        ds.PatientName = patient_name
        ds.PatientID = patient_id

        # General Study Module
        ds.StudyInstanceUID = study_instance_uid
        ds.StudyDate = ''  # Optional
        ds.StudyTime = ''  # Optional
        ds.ReferringPhysicianName = ''  # Optional

        # General Series Module
        ds.Modality = 'CT'
        ds.SeriesInstanceUID = series_instance_uid
        ds.SeriesNumber = 1
        ds.ProtocolName = ''  # Optional

        # Frame of Reference Module
        ds.FrameOfReferenceUID = frame_of_reference_uid

        # General Equipment Module
        ds.Manufacturer = ''  # Optional

        # Image Plane Module
        # Compute ImagePositionPatient for each slice
        voxel_coord = np.array([0, 0, i, 1])  # Homogeneous coordinate
        position = affine @ voxel_coord
        ds.ImagePositionPatient = [str(position[0]), str(position[1]), str(position[2])]

        ds.ImageOrientationPatient = image_orientation_patient
        ds.PixelSpacing = pixel_spacing
        ds.SliceThickness = slice_thickness

        # Image Pixel Module
        ds.SamplesPerPixel = 1
        ds.PhotometricInterpretation = 'MONOCHROME2'
        ds.Rows, ds.Columns = slice_data.shape
        ds.BitsAllocated = 16
        ds.BitsStored = 16
        ds.HighBit = 15
        ds.PixelRepresentation = 1  # 0 for unsigned, 1 for signed data

        # Content Date and Time
        ds.ContentDate = ''  # Optional
        ds.ContentTime = ''  # Optional

        # Instance Module
        ds.InstanceNumber = i + 1

        # Set Pixel Data
        pixel_array = slice_data.astype(np.int16)
        ds.PixelData = pixel_array.tobytes()

        # Save DICOM file
        base_filename = os.path.splitext(os.path.basename(nifti_file))[0]
        filename = os.path.join(
            output_dir,
            f'{base_filename}_slice_{i+1:04d}.dcm'
        )
        ds.save_as(filename, write_like_original=False)

In [7]:
# Example usage
nifti_file = '/path/to/your/ct.nii.gz'
output_dir = '/path/to/output/directory'
patient_name = 'Patient001'
patient_id = '001'

convert_nifti_to_dicom(nifti_file, output_dir, patient_name, patient_id)

'./data/healthy-pancreas'

In [5]:
healthy_nifti_files[0]

('./data/healthy-pancreas/BDMAP_00000682/ct.nii.gz', 'BDMAP_00000682')

In [11]:
healthy_nifti_files[1][1]

'BDMAP_00004480'

In [20]:
for patient_id in healthy_nifti_files:
    patient_id = patient_id[1]
    os.makedirs(os.path.join(healthy_output_dir, patient_id), exist_ok=True)

In [22]:
for patient_id in unhealthy_nifti_files:
    patient_id = patient_id[1]
    os.makedirs(os.path.join(unhealthy_output_dir, patient_id), exist_ok=True)

In [21]:
for patient in healthy_nifti_files:
    nifti_file = patient[0]
    patient_id = patient[1]
    convert_nifti_to_dicom(nifti_file, os.path.join(healthy_output_dir, patient_id), patient_name=patient_id, patient_id=patient_id)
# convert_nifti_to_dicom(healthy_nifti_files[0][0], os.path.join(healthy_output_dir, "BDMAP_00000002"), patient_name='Healthy', patient_id='BDMAP_00000002')

  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)


In [23]:
from tqdm import tqdm

for patient in tqdm(unhealthy_nifti_files):
    nifti_file = patient[0]
    patient_id = patient[1]
    convert_nifti_to_dicom(nifti_file, os.path.join(unhealthy_output_dir, patient_id), patient_name=patient_id, patient_id=patient_id)
# convert_nifti_to_dicom(healthy_nifti_files[0][0], os.path.join(healthy_output_dir, "BDMAP_00000002"), patient_name='Healthy', patient_id='BDMAP_00000002')

  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)
  warn_and_log(msg)


In [8]:
import os

def count_subfolders(directory):
    # List all entries in the directory
    entries = os.listdir(directory)
    
    # Filter out files, only count subdirectories
    subfolders = [entry for entry in entries if os.path.isdir(os.path.join(directory, entry))]
    
    return len(subfolders)

# Example usage
directory_path = healthy_pancreas_data_path
num_subfolders = count_subfolders(directory_path)
print(f"Number of subfolders: {num_subfolders}")

Number of subfolders: 43
