# Converting the DICOM images to PNG

In [46]:
import pandas as pd
import numpy as np
import pydicom
import png
import math
from PIL import Image
from pathlib import Path
import boto3
import matplotlib.pyplot as plt

data_path = '/home/szelesteya/projects/EMBED_Open_Data/'
tables_path = data_path + 'tables/'
image_root_path = '/media/szelesteya/F824D4D024D492CC/EMBED-images/'
image_dcm_path_pos = image_root_path + 'dicom-positive/'
image_dcm_path_neg = image_root_path + 'dicom-negative/'
image_png_path_pos = image_root_path + 'positive-full'
image_png_path_neg = image_root_path + 'negative-full'

In [75]:
# Rescale the intensity of the image to get heterogene images with the bit depth of 14
def rescale_to_8bit(image_array):
    upper_percentile = np.percentile(image_array.flatten(), 98) # original_max = np.max(image_array)
    lower_percentile = np.percentile(image_array.flatten(), 2) # original_min = np.min(image_array)
    # max_on_14bit = 16383
    max = 255
    rescaled_array = (image_array - lower_percentile) / (upper_percentile - lower_percentile)
    rescaled_array[rescaled_array < 0] = 0
    rescaled_array[rescaled_array > 1] = 1
    # rescaled_array = np.round((image_array - original_min) / (original_max - original_min) * max_on_14bit).astype(int)
    return np.round(rescaled_array * 255).astype(np.uint8)

def generate_png_path(dcm_path):
    # Get new file name
    split_fn = dcm_path[:-4].split('/')
    new_fn = f"{split_fn[-1]}_conv.png"
    return image_path + new_fn

# Save DICOM pixel array as PNG
def save_dcm_image_as_png(image, png_filename, bitdepth=8):
    cut_length = []
    with open(png_filename, 'wb') as f:
        rescaled = rescale_to_8bit(image)
        # img_cut = cut_dark_part(rescaled)
        writer = png.Writer(height=rescaled.shape[0], 
                            width=rescaled.shape[1], 
                            bitdepth=bitdepth, 
                            greyscale=True)
        writer.write(f, rescaled.tolist())

        # cut_length = rescaled.shape[1] - img_cut.shape[1]

    return cut_length

def generate_png_path(acc_anon, side, png_dir):
    # Get new file name
    new_fn = f"{acc_anon}_conv.png"
    return f'{png_dir}/{new_fn}'

# Convert list of DICOMs to PNGs
def process_dcm_list(dcm_list, png_list):
    cut_lengths = []
    for i, dcm_path in enumerate(dcm_list):
        print(f"Processing DICOM #{i}...")
        
        dcm = pydicom.dcmread(dcm_path)
        img = dcm.pixel_array
                  
        cut_lengths.append(save_dcm_image_as_png(img, png_list[i]))

def cut_dark_part(image_array):
    cut_column = []
    cut_image = []
    for column in image_array.T:
        dark = np.sum(column == 0)
        ratio = dark / len(column)
        if ratio < 0.95:
            cut_image.append(column)

    return np.array(cut_image).T

def extract_images(df, dcm_dir, png_dir):
    # Provide a list of DICOM paths and a target directory
    dcm_list = []
    
    for index, row in df.iterrows():
        path = dcm_dir  + row['relative_dcm_path']
        if Path(path).exists():
            dcm_list.append(path)

    # Insert png path
    # print(df)
    png_paths = df.apply(lambda row: generate_png_path(row['acc_anon'], row['side'], png_dir))

    # Convert DICOMs
    cut_lengths = process_dcm_list(dcm_list, png_paths)

    return png_paths, cut_lengths

In [76]:
# Reading the DataFrame containing positive samples
df_pos = pd.read_csv(data_path + 'positive_empirical.csv')

# Convert, resize and cut images
paths = extract_images(df_pos[0:15], image_dcm_path_pos, image_png_path_pos)

# Save path
df_pos_path = df_pos.copy()
df_pos_path['png_path'] = paths

with open(data_path + 'positive_path', 'w') as f:
    df_pos_path.to_csv(f)
# df_pos['cut_lengths'] = cut_lengths

# print(cut_lengths)

KeyError: 'acc_anon'