In [1]:
!pip install pydicom

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/2.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/2.4 MB[0m [31m16.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-3.0.1


In [2]:
import pydicom
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import shutil
from concurrent.futures import ProcessPoolExecutor


In [12]:
def process_dicom(filepath, target_resolution=(1024, 1024), output_root="/content/processed_patients"):
    try:
        relative_path = os.path.relpath(filepath, folder_path)
        output_file_path = os.path.join(output_root, os.path.dirname(relative_path), "processed_resized")
        os.makedirs(output_file_path, exist_ok=True)

        filename = os.path.basename(filepath)
        dataset = pydicom.dcmread(filepath)

        if not hasattr(dataset, "pixel_array"):
            print(f"Skipping {filepath}: No pixel data found.")
            return

        image_data = dataset.pixel_array
        if image_data is None or image_data.size == 0:
            print(f"Skipping {filepath}: Empty pixel array.")
            return

        # Handle 4D DICOM
        if image_data.ndim == 4:
            for slice_idx in range(image_data.shape[0]):
                slice_image = image_data[slice_idx]
                resized_image = cv2.resize(slice_image, target_resolution, interpolation=cv2.INTER_LINEAR)
                output_filename = f"{filename}_slice_{slice_idx + 1}.jpg"
                output_path = os.path.join(output_file_path, output_filename)
                cv2.imwrite(output_path, resized_image, [cv2.IMWRITE_JPEG_QUALITY, 85])

        # Handle 3D DICOM
        elif image_data.ndim == 3:
            resized_image = cv2.resize(image_data, target_resolution, interpolation=cv2.INTER_LINEAR)
            output_filename = f"{filename}.jpg"
            output_path = os.path.join(output_file_path, output_filename)
            cv2.imwrite(output_path, resized_image, [cv2.IMWRITE_JPEG_QUALITY, 85])

    except Exception as e:
        print(f"Error processing {filepath}: {e}")


# Function to split files into chunks
def chunk_list(data, chunk_size):
    for i in range(0, len(data), chunk_size):
        yield data[i:i + chunk_size]

# Function to run with ProcessPoolExecutor
def run_with_processes(dicom_files, target_resolution, output_root):
    with ProcessPoolExecutor(max_workers=8) as executor:  # Adjust max_workers as needed
        futures = []
        for file in dicom_files:
            futures.append(
                executor.submit(process_dicom, file, target_resolution, output_root)
            )
        for future in futures:
            try:
                future.result()
            except Exception as e:
                print(f"Error in parallel processing: {e}")

# Function to run with ThreadPoolExecutor (fallback)
def run_with_threads(dicom_files, target_resolution, output_root):
    with ThreadPoolExecutor(max_workers=8) as executor:  # Adjust max_workers as needed
        executor.map(lambda file: process_dicom(file, target_resolution, output_root), dicom_files)

# # Step 1: Copy folder to local storage
# src_folder = "/content/drive/MyDrive/patients"
# dst_folder = "/content/patients"

# print("Copying files to local storage...")
#shutil.copytree(src_folder, dst_folder)
# print("Copy complete!")

# Step 2: Traverse all DICOM files
folder_path = "/content/patients"
output_root = "/content/processed_patients"
target_resolution = (1024, 1024)

# Collect all DICOM files
dicom_files = []
for root, _, files in os.walk(folder_path):
    for filename in files:
        if filename.endswith(".dcm"):
            dicom_files.append(os.path.join(root, filename))

print(f"Total DICOM files found: {len(dicom_files)}")

# Step 3: Parallel Processing
try:
    print("Starting parallel processing with processes...")
    run_with_processes(dicom_files, target_resolution, output_root)
except Exception as e:
    print(f"Process-based parallelization failed: {e}")
    print("Falling back to threads...")
    run_with_threads(dicom_files, target_resolution, output_root)

# Step 4: Zip processed results
print("Zipping processed files...")
output_zip_path = "/content/processed_results"
shutil.make_archive(output_zip_path, 'zip', output_root)
print("Zipping complete!")

# Step 5: Copy results back to Google Drive
print("Saving zip file to Google Drive...")
shutil.move(f"{output_zip_path}.zip", "/content/drive/MyDrive/processed_results.zip")
print("Upload complete! All processing done.")


Total DICOM files found: 4319
Starting parallel processing with processes...
Zipping processed files...
Zipping complete!
Saving zip file to Google Drive...
Upload complete! All processing done.


In [4]:
import os
import cv2
import pydicom

# Target resolution for resizing
target_resolution = (224, 224)

# Process each DICOM file recursively
for root, _, files in os.walk("/content/drive/MyDrive/annotations"):
    # Check if there are DICOM files in the current folder
    dicom_files = [f for f in files if f.endswith(".dcm")]
    if not dicom_files:
        continue

    # Create a "processed_resized_224" subfolder in the current folder
    output_folder = os.path.join(root, "processed_resized_224")
    os.makedirs(output_folder, exist_ok=True)

    # Process each DICOM file
    for filename in dicom_files:
        filepath = os.path.join(root, filename)
        dataset = pydicom.dcmread(filepath)
        image_data = dataset.pixel_array  # Shape: (num_slices, height, width, channels)

        if image_data.ndim == 4:  # For 4D DICOM files
            for slice_idx in range(image_data.shape[0]):
                slice_image = image_data[slice_idx]
                resized_image = cv2.resize(slice_image, target_resolution, interpolation=cv2.INTER_LINEAR)
                output_path = os.path.join(output_folder, f"{filename}_slice_{slice_idx + 1}.jpg")
                cv2.imwrite(output_path, resized_image, [cv2.IMWRITE_JPEG_QUALITY, 95])

        elif image_data.ndim == 3:  # For 3D DICOM files
            for slice_idx in range(image_data.shape[0]):
                slice_image = image_data[slice_idx]
                resized_image = cv2.resize(slice_image, target_resolution, interpolation=cv2.INTER_LINEAR)
                output_path = os.path.join(output_folder, f"{filename}_slice_{slice_idx + 1}.jpg")
                cv2.imwrite(output_path, resized_image, [cv2.IMWRITE_JPEG_QUALITY, 95])
