# Setup

In [None]:
!pip install pydicom -q

In [None]:
# General imports.
import os
import pydicom

import cv2
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

# Specific imports.
from glob import glob
from multiprocessing import Pool
from pydicom.pixel_data_handlers.util import apply_voi_lut

# Define a LUT Function

In [None]:
# Make a simple linear VOI LUT from the raw (stored) pixel data
def make_lut(pixels, width, center, p_i):
    
    # Slope and Intercept set to 1 and 0 for MR. Get these from DICOM tags instead if using 
    # on a modality that requires them (CT, PT etc)
    slope = 1.0
    intercept = 0.0
    min_pixel = int(np.amin(pixels))
    max_pixel = int(np.amax(pixels))

    # Make an empty array for the LUT the size of the pixel 'width' in the raw pixel data
    lut = [0] * (max_pixel + 1)
    
    # Invert pixels and cent for MONOCHROME1. We invert the specified center so that 
    # increasing the center value makes the images brighter regardless of photometric intrepretation
    invert = False
    if p_i == "MONOCHROME1":
        invert = True
    else:
        center = (max_pixel - min_pixel) - center
        
    # Loop through the pixels and calculate each LUT value
    for pix_value in range(min_pixel, max_pixel):
        lut_value = pix_value * slope + intercept
        voi_value = (((lut_value - center) /  width + 0.5) * 255.0)
        clamped_value = min(max(voi_value, 0), 255)
        if invert:
            lut[pix_value] = round(255 - clamped_value)
        else:
            lut[pix_value] = round(clamped_value)
        
    return lut

In [None]:
# Apply the LUT to a pixel array
def apply_lut(pixels_in, lut):
    pixels = pixels_in.flatten()
    pixels_out = [0] * len(pixels)
    for i in range(0, len(pixels)):
        pixel = pixels[i]
        if pixel > 0:
            pixels_out[i] = int(lut[pixel])
    return np.reshape(pixels_out, (pixels_in.shape[0],pixels_in.shape[1]))

# Converting to PNGs and Extracting Meta DataFrames

In [None]:
# Data is here: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/data.

mode = "train"
meta_df_name = f"{mode}_meta"
png_image_path_root = "./images/"
comp_data_root = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/"
meta_df_root = "./"

os.makedirs(png_image_path_root, exist_ok=True)
os.makedirs(meta_df_root, exist_ok=True)

class ME:
    def __init__(self, file_path, ImageID, PatientID, mpMRI_type):
        self.file_path = file_path
        self.ImageID = ImageID
        self.PatientID = PatientID
        self.mpMRI_type = mpMRI_type

class cnt:
    def __init__(self):
        self.count = 0
        
c = cnt()
        
def dicom2image(ele):
    print(f"cnt: {c.count}/348641", end="\r")
    
    dcm_file = pydicom.dcmread(ele.file_path)
    raw_pixels = dcm_file.pixel_array
    
    # Calculate the width and center of the pixels to make a LUT
    auto_lut_window_width = np.max(raw_pixels)
    auto_lut_window_center = (np.max(raw_pixels) - np.min(raw_pixels)) / 2 + np.min(raw_pixels)
    
    lut = make_lut(raw_pixels, auto_lut_window_width, auto_lut_window_center, dcm_file.PhotometricInterpretation)
    data = apply_lut(raw_pixels, lut)
    
    PatientID = dcm_file.PatientID
    StudyInstanceUID = dcm_file.StudyInstanceUID
    SeriesInstanceUID = dcm_file.SeriesInstanceUID
    SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.

    assert int(PatientID) == ele.PatientID, f"DCM Image patientid {PatientID} and file path patientid {ele.PatientID} do not match!"
    assert SeriesDescription == ele.mpMRI_type, f"SeriesDescription {SeriesDescription} and mpMRI scan type {ele.mpMRI_type} do not match!"

#     data = apply_voi_lut(dcm_file.pixel_array, dcm_file)

#     if dcm_file.PhotometricInterpretation == "MONOCHROME1":
#         data = np.amax(data) - data

#     data = data - np.min(data)
#     data = data / np.max(data)
#     data = (data * 255).astype(np.uint8)

    image_path = os.path.join(png_image_path_root, f"{PatientID}_{SeriesDescription}_{ele.ImageID}.png")
    cv2.imwrite(image_path, data)
    
    c.count += 1
    
    return [ele.file_path, image_path, PatientID, SeriesDescription, ele.ImageID, StudyInstanceUID, SeriesInstanceUID]

# images_meta = []
# for root, dirs, files in os.walk(os.path.join(comp_data_root, f"{mode}/")):
#     if len(files) != 0 and (".dcm" in files[0] or ".dicom" in files[0]):
#         split = root.split("/")
#         patientid = split[-2]
#         mpMRI_type = split[-1]
#         for file in files:
#             full_path = os.path.join(root, file)
#             ImageID = file.split(".")[0]  # Get the image file name.
            
#             dcm_file = pydicom.read_file(full_path)
#             PatientID = dcm_file.PatientID
#             SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.
            
#             images_meta.append(ME(full_path, ImageID, PatientID, SeriesDescription))
    
images_meta_df = pd.read_csv(r"../input/images-meta-df/images_meta_df.csv")
images_meta = []
for idx, row in images_meta_df.iterrows():
    images_meta.append(ME(row.full_paths, 
       row.ImageIDs,
       row.PatientIDs,
       row.SeriesDescriptions))
    
print("Starting dicom to png conversion...")

In [None]:
images_meta[0].file_path

In [None]:
p = Pool(16)
results = p.map(func=dicom2image, iterable=images_meta)
p.close()

In [None]:
meta_df = pd.DataFrame(
        data=np.array(results), 
        columns=["dicom_filepath", "png_filepath", "PatientID", "SeriesDescription", "ImageID", "StudyInstanceUID", "SeriesInstanceUID"])

# This part is for when the PatientIDs are turned into ints (for some weird reason).
patientids = [x.split("/")[-3] for x in meta_df.dicom_filepath.values]
meta_df.PatientID = patientids

meta_df.to_csv(os.path.join(meta_df_root, f"{meta_df_name}_mod.csv"), index=False)

In [None]:
%ls ../

In [None]:
images_meta_df = pd.DataFrame({
    "full_paths": full_paths,
    "ImageIDs": ImageIDs,
    "PatientIDs": PatientIDs,
    "SeriesDescriptions": SeriesDescriptions
})

In [None]:
images_meta_df.to_csv("images_meta_df.csv", index=False)

In [None]:
from IPython.display import FileLink
FileLink("images_meta_df.csv")

In [None]:
!zip ./rsna_miccai_pngs.zip ./images/

In [None]:
from IPython.display import FileLink, FileLinks
FileLink("train_meta_mod.csv")

# Sanity Check and Small Test Case

In [None]:
# Ref: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/271524.

p = glob(r"../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00998/FLAIR/*")

In [None]:
plt.imshow(pydicom.dcmread(p[10]).pixel_array)

In [None]:
a = pydicom.dcmread(p[10]).pixel_array

In [None]:
# Calculate the width and center of the pixels to make a LUT
auto_lut_window_width = np.max(a)
auto_lut_window_center = (np.max(a) - np.min(a)) / 2 + np.min(a)
    
lut = make_lut(a, auto_lut_window_width, auto_lut_window_center, pydicom.dcmread(p[10]).PhotometricInterpretation)
image_autolut = apply_lut(a, lut)

In [None]:
plt.imshow(image_autolut, cmap=mpl.cm.bone)

In [None]:
plt.imshow(image_autolut/255.0, cmap=mpl.cm.bone)

In [None]:
data = apply_voi_lut(pydicom.dcmread(p[10]).pixel_array, pydicom.dcmread(p[10]))

if pydicom.dcmread(p[10]).PhotometricInterpretation == "MONOCHROME1":
    data = np.amax(data) - data

data = data - np.min(data)
data = data / np.max(data)
data = (data * 255).astype(np.uint8)

In [None]:
plt.imshow(data, cmap=mpl.cm.bone)

In [None]:
plt.imshow(data/255.0, cmap=mpl.cm.bone)