In [4]:
"""

@author: jakedkim

Simple script to convert dicom files to png files for VinDr

"""
# 1. environment Setup
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import pydicom
import pylibjpeg
import cv2


# VinDR image input consists of 2 folders - train and test. 
#parent path
image_source = ('/srv/store/jkim/peds_cxr/')  # edit this. this is where images are located.
parent = ('/home/jkim/research/peds_cxr/') # edit this. this is the parent folder. 


vindr_input_path = image_source + 'original_data/VinDR_PCXR_Peds_Chest_X-Ray_Data/vindr-pcxr-an-open-large-scale-pediatric-chest-x-ray-dataset-for-interpretation-of-common-thoracic-diseases-1.0.0/'
vindr_output_path = image_source + 'vindr_pcxr/'
metadata_paths = parent + 'peds_cxr_metadata/raw_metadata/vindr_raw_metadata_jk.csv'

def do_stuff(path):
  # Use image path to determine input and output paths
  output_path = vindr_output_path + path.split('.dcm')[0] + '.png'
  input_path =  vindr_input_path + path + '.dicom'

  # Create parent directory structure
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)

  # Read DICOM file and extract pixel array
  img = pydicom.dcmread(input_path).pixel_array

  # normalize image to the range 0 to 255 
  img = np.abs(cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype(float) - 255).astype(np.uint8)

  # Resize and save new image!
  cv2.imwrite(output_path, img)


# Load CSV labels. We only want to convert images that are in the VinDR dataset. 
metadata = pd.read_csv(metadata_paths)
metadata['path'] = metadata['Set'] + '/' + metadata['image_id']
paths = metadata['path'].tolist()

# Use multithreading to speed things up otherwise we'd be waiting for a month
count = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=count) as pool:
  pool.map(do_stuff, paths)




In [5]:
# Merge test and train folders into one VinDR_Peds/small_png folder. 

import os
import shutil

source_base_path = image_source + 'vindr_pcxr'
source_folders = [os.path.join(source_base_path, 'train'), os.path.join(source_base_path, 'test')]

# Define absolute path to destination folder
destination_folder = image_source + 'vindr_pcxr'

# Create the destination folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Iterate through the source folders
for source_folder in source_folders:
    # List all files in the source folder
    files = os.listdir(source_folder)
    
    # Copy each file to the destination folder
    for file in files:
        source = os.path.join(source_folder, file)
        destination = os.path.join(destination_folder, file)
        
        # If a file with the same name already exists in the destination folder,
        # shutil.move() will overwrite it.
        shutil.move(source, destination)
    
    # Delete the source folder after moving all files
    shutil.rmtree(source_folder)

print(f'Contents of "train" and "test" folders have been merged into "{destination_folder}" folder and the original folders have been deleted.')

Contents of "train" and "test" folders have been merged into "/srv/store/jkim/peds_cxr/vindr_pcxr" folder and the original folders have been deleted.


In [6]:
# copying to the aggregate folder
import shutil
import os

# Define the source folder and target folder
source_folder = image_source + 'vindr_pcxr/'
target_folder = image_source + 'aggregate'

# Create target folder if it doesn't exist
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Loop through all files in the source folder
for file_name in os.listdir(source_folder):
    
    # Construct full file path
    source = os.path.join(source_folder, file_name)
    target = os.path.join(target_folder, file_name)
    
    # Copy the file to the target folder
    shutil.copy2(source, target)
    