# Setup

In [None]:
!pip install pydicom -q

In [None]:
# General imports.
import os
import pydicom

import cv2
import pandas as pd
import numpy as np

# Specific imports.
from pydicom.pixel_data_handlers.util import apply_voi_lut

# Converting to PNGs and Extracting Meta DataFrames

In [None]:
# Data is here: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/data.

mode = "test"
meta_df_name = "test_meta"
png_image_path_root = "./images/"
comp_data_root = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/"
meta_df_root = "./"

os.makedirs(png_image_path_root, exist_ok=True)
os.makedirs(meta_df_root, exist_ok=True)

class ME:
    def __init__(self, file_path, ImageID, PatientID, mpMRI_type):
        self.file_path = file_path
        self.ImageID = ImageID
        self.PatientID = PatientID
        self.mpMRI_type = mpMRI_type

        
def dicom2image(ele):
    dcm_file = pydicom.read_file(ele.file_path)
    
    PatientID = dcm_file.PatientID
    StudyInstanceUID = dcm_file.StudyInstanceUID
    SeriesInstanceUID = dcm_file.SeriesInstanceUID
    SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.

    assert PatientID == ele.PatientID, "DCM Image patientid and file path patientid do not match!"
    assert SeriesDescription == ele.mpMRI_type, "SeriesDescription and mpMRI scan type do not match!"

    data = apply_voi_lut(dcm_file.pixel_array, dcm_file)

    if dcm_file.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)

    image_path = os.path.join(png_image_path_root, f"{PatientID}_{SeriesDescription}_{ele.ImageID}.png")
    cv2.imwrite(image_path, data)
    
    return [ele.file_path, image_path, PatientID, SeriesDescription, ele.ImageID, StudyInstanceUID, SeriesInstanceUID]

images_meta = []
for root, dirs, files in os.walk(os.path.join(comp_data_root, f"{mode}/")):
    if len(files) != 0 and (".dcm" in files[0] or ".dicom" in files[0]):
        split = root.split("/")
        patientid = split[-2]
        mpMRI_type = split[-1]
        for file in files:
            full_path = os.path.join(root, file)
            ImageID = file.split(".")[0]  # Get the image file name.
            
            dcm_file = pydicom.read_file(full_path)
            PatientID = dcm_file.PatientID
            SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.
            
            images_meta.append(ME(full_path, ImageID, PatientID, SeriesDescription))
    
p = Pool(16)
results = p.map(func=dicom2image, iterable=images_meta)
meta_df = pd.DataFrame(
        data=np.array(results), 
        columns=["dicom_filepath", "png_filepath", "PatientID", "SeriesDescription", "ImageID", "StudyInstanceUID", "SeriesInstanceUID"])

# This part is for when the PatientIDs are turned into ints (for some weird reason).
patientids = [x.split("/")[-3] for x in meta_df.dicom_filepath.values]
meta_df.PatientID = patientids

meta_df.to_csv(os.path.join(meta_df_root, f"{meta_df_name}.csv"), index=False)

In [None]:
from IPython.display import FileLink, FileLinks
FileLink("test_meta.csv")