### Convert the images from DICOM to PNG format 

The CNN models in this study do not support DICOM formatting, making conversion to PNG the first necessary step in data pre-processing. The script used will iterate through the images and systematically convert them to .png files in a new folder. The script will also ensure that the original folder structure is preserved, to retain access as intended to the associated categorical data found in accompanying csv files. The images will be individually normalized as part of this process to stretch the range of pixel intensities, improving contrast and readability.

In [None]:
import os
import cv2
import pydicom

# Input folder - DICOM files
input_folder = r'E:\vindr-mammo-1.0.0\images'
# Output folder - PNG files
output_folder = r'E:\vindr-mammo-1.0.0\png_images'

# Iterate through sub-folders and files
for root, folders, files in os.walk(input_folder):
    for file in files:
        if file.endswith(".dicom"):
            dicom_path = os.path.join(root, file)
            relative_path = os.path.relpath(root, input_folder)
            output_subfolder = os.path.join(output_folder, relative_path)  # Maintain original folder structure
            os.makedirs(output_subfolder, exist_ok=True)  # Create output subfolder
            output_path = os.path.join(output_subfolder, file.replace('.dicom', '.png'))
            dicom_file = pydicom.dcmread(dicom_path)
            img = dicom_file.pixel_array

            if img.max() > 255:
                img = (img - img.min()) / (img.max() - img.min()) * 255    # Normalize images (Stretch range of pixel intensities)
                img = img.astype('uint8')

            cv2.imwrite(output_path, img)  # Save as PNG