In [2]:
!pip install pydicom

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m242.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
import os

# Define the HuggingFace repository and local path
ct_filename = "ct.nii.gz"
segmentations_dir = "segmentations"
pancreas_segmentations_filename = os.path.join(segmentations_dir, "pancreas.nii.gz")

data_folder = "./data"
healthy_pancreas_data_path = os.path.join(data_folder, "healthy-pancreas")
unhealthy_pancreas_data_path = os.path.join(data_folder, "pancreatic-tumor")

os.makedirs(healthy_pancreas_data_path, exist_ok=True)
os.makedirs(unhealthy_pancreas_data_path, exist_ok=True)

In [None]:
import os
import nibabel as nib
import pydicom
from pydicom.uid import generate_uid
import numpy as np

# Function to load data and get patient folder names
def Load_Data(data_dir):
    """
    Loads NIfTI files from the specified directory.
    Args:
        data_dir (str): Path to the directory containing NIfTI files.
    Returns:
        list of tuples: List of (nifti_file_path, patient_folder_name)
    """
    nifti_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('ct.nii.gz'):
                nifti_file_path = os.path.join(root, file)
                patient_folder_name = os.path.basename(root)
                nifti_files.append((nifti_file_path, patient_folder_name))
    return nifti_files

# Define the paths to your data directories
healthy_data_dir = healthy_pancreas_data_path
unhealthy_data_dir = unhealthy_pancreas_data_path

# Load NIfTI files using your Load_Data function
healthy_nifti_files = Load_Data(healthy_data_dir)
unhealthy_nifti_files = Load_Data(unhealthy_data_dir)

# Define the output directories
healthy_output_dir = os.path.join(data_folder, 'dicom/healthy')
unhealthy_output_dir = os.path.join(data_folder, 'dicom/unhealthy')

# Create output directories if they don't exist
os.makedirs(healthy_output_dir, exist_ok=True)
os.makedirs(unhealthy_output_dir, exist_ok=True)

# Function to convert NIfTI to DICOM
def convert_nifti_to_dicom(nifti_file, output_dir, patient_name='Anonymous', patient_id='123456'):
    # Load NIfTI file
    nifti_image = nib.load(nifti_file)
    image_data = nifti_image.get_fdata()
    affine = nifti_image.affine
    header = nifti_image.header

    # Get the number of slices
    num_slices = image_data.shape[2]

    # Loop over slices
    for i in range(num_slices):
        slice_data = image_data[:, :, i]
        slice_data = np.flipud(slice_data)  # Adjust orientation if necessary

        # Create a new DICOM dataset
        ds = pydicom.Dataset()

        # File meta information
        file_meta = pydicom.Dataset()
        file_meta.MediaStorageSOPClassUID = pydicom.uid.CTImageStorage
        file_meta.MediaStorageSOPInstanceUID = generate_uid()
        file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
        ds.file_meta = file_meta

        # Populate required values for file meta information
        ds.SOPClassUID = file_meta.MediaStorageSOPClassUID
        ds.SOPInstanceUID = file_meta.MediaStorageSOPInstanceUID
        ds.PatientName = patient_name
        ds.PatientID = patient_id

        # Image data
        ds.Rows, ds.Columns = slice_data.shape
        ds.PixelSpacing = [str(header['pixdim'][1]), str(header['pixdim'][2])]
        ds.SliceThickness = str(header['pixdim'][3])
        # Calculate ImagePositionPatient
        origin = affine[:3, 3]
        spacing = header.get_zooms()
        ds.ImagePositionPatient = [
            str(origin[0]),
            str(origin[1]),
            str(origin[2] + i * spacing[2])
        ]
        # ImageOrientationPatient
        ds.ImageOrientationPatient = [
            str(affine[0, 0]), str(affine[0, 1]), str(affine[0, 2]),
            str(affine[1, 0]), str(affine[1, 1]), str(affine[1, 2])
        ]
        ds.PixelRepresentation = 1  # 0 for unsigned, 1 for signed data
        ds.SamplesPerPixel = 1
        ds.PhotometricInterpretation = 'MONOCHROME2'
        ds.HighBit = 15
        ds.BitsStored = 16
        ds.BitsAllocated = 16
        ds.Modality = 'CT'

        # Convert pixel data to the correct format
        pixel_array = slice_data.astype(np.int16)
        ds.PixelData = pixel_array.tobytes()

        # Set additional necessary tags
        ds.InstanceNumber = i + 1
        ds.SeriesInstanceUID = generate_uid()
        ds.StudyInstanceUID = generate_uid()
        ds.FrameOfReferenceUID = generate_uid()
        ds.ImagesInAcquisition = str(num_slices)

        # Save DICOM file
        base_filename = os.path.splitext(os.path.basename(nifti_file))[0]
        filename = os.path.join(
            output_dir,
            f'{base_filename}_slice_{i+1:04d}.dcm'
        )
        ds.save_as(filename)

# Process healthy files
for nifti_file, patient_folder_name in healthy_nifti_files:
    print(f"Converting {nifti_file} to DICOM...")
    # Create a patient-specific output directory
    patient_output_dir = os.path.join(healthy_output_dir, patient_folder_name)
    os.makedirs(patient_output_dir, exist_ok=True)
    convert_nifti_to_dicom(
        nifti_file,
        patient_output_dir,
        patient_name=patient_folder_name,
        patient_id=patient_folder_name
    )

# Process unhealthy files
for nifti_file, patient_folder_name in unhealthy_nifti_files:
    print(f"Converting {nifti_file} to DICOM...")
    # Create a patient-specific output directory
    patient_output_dir = os.path.join(unhealthy_output_dir, patient_folder_name)
    os.makedirs(patient_output_dir, exist_ok=True)
    convert_nifti_to_dicom(
        nifti_file,
        patient_output_dir,
        patient_name=patient_folder_name,
        patient_id=patient_folder_name
    )

print("Conversion complete.")

In [7]:
healthy_pancreas_data_path

'./data/healthy-pancreas'

In [8]:
import os

def count_subfolders(directory):
    # List all entries in the directory
    entries = os.listdir(directory)
    
    # Filter out files, only count subdirectories
    subfolders = [entry for entry in entries if os.path.isdir(os.path.join(directory, entry))]
    
    return len(subfolders)

# Example usage
directory_path = healthy_pancreas_data_path
num_subfolders = count_subfolders(directory_path)
print(f"Number of subfolders: {num_subfolders}")

Number of subfolders: 43
