In [69]:
import pandas as pd
import os
from tqdm import tqdm
import re
from numpy import nan as Nan
import pydicom as dicom
import cv2

In [2]:
def split_mpr_name(mpr_name):
    return \
        "".join(mpr_name.split()).replace('*', '').replace('original', '') \
        .replace('LIMA-', '').replace('Branchof','').replace('TOPDA', '').replace('PDATO', '')

def get_patient_dictionary(path_to_patient_folder):
    """
    
    Returns dict of different types of images in the folder of patient. 
    
    Returns:
        dict: key - type of images; value - list of DICOM files, which sorted in the ascending order with restepct to the
                    depth of the image slice.
    """
    patient_dict = {}
    
    dicom_file_names = os.listdir(path_to_patient_folder)
    
    for i in range(len(dicom_file_names)):
        cur_dicom_obj = dicom.dcmread(os.path.join(path_to_patient_folder, dicom_file_names[i]))
        
        if cur_dicom_obj.SeriesDescription not in patient_dict.keys():
            patient_dict[cur_dicom_obj.SeriesDescription] = []
        patient_dict[cur_dicom_obj.SeriesDescription].append(cur_dicom_obj)
        
    # sort each type of images with respect to their depth in ascending order
    for i in patient_dict:
        patient_dict[i].sort(key=lambda x: x.InstanceNumber)
    
    return patient_dict

def get_pixels_hu(list_of_imgs):
    """
    Convert stack of the images into Houndsfeld units
    """
    image = np.stack([s.pixel_array for s in list_of_imgs])
    # Convert to int16 (from sometimes int16), 
    # should be possible as values should always be low enough (<32k)
    image = image.astype(np.int16)

    # Set outside-of-scan pixels to 1
    # The intercept is usually -1024, so air is approximately 0
    image[image == -2000] = 0
    
    # Convert to Hounsfield units (HU)
    intercept = list_of_imgs[0].RescaleIntercept
    slope = list_of_imgs[0].RescaleSlope
    
    if slope != 1:
        image = slope * image.astype(np.float64)
        image = image.astype(np.int16)
        
    image += np.int16(intercept)
    
    return np.array(image, dtype=np.int16)

In [44]:
path_to_data = r'D:\coronaryProject\dataset\binary_classification_MPR\images'
path_to_new_data = r'E:\ONLY_LAD\\'

list_of_patients = os.listdir(path_to_data)
# patient_dictionary = get_patient_dictionary(path_to_data + '\\'+ list_of_patients[0])

In [None]:
for i in tqdm(range(len(list_of_patients))):
    patient_dictionary = get_patient_dictionary(path_to_data + '\\'+ list_of_patients[i])
    splited_mpr_names = [split_mpr_name(x) for x in patient_dictionary.keys()]
    splited_mpr_names_filtered = [split_mpr_name(x).replace('LAD-', '').replace('D','D-').replace('AD-', 'AD') for x in patient_dictionary.keys() 
                                  if 'LAD' in split_mpr_name(x)]
    dict_keys = list(patient_dictionary.keys())
    
    # change keys in the dict to the corresponding labels in the reports
    for key_element in dict_keys:
        patient_dictionary[split_mpr_name(key_element).replace('LAD-', '').replace('D','D-').replace('AD-', 'AD')] = \
            patient_dictionary[key_element]
        del patient_dictionary[key_element]
    
    if not os.path.exists(os.path.join(path_to_new_data, list_of_patients[i])):
        os.mkdir(os.path.join(path_to_new_data, list_of_patients[i]))
    
    for key in patient_dictionary.keys():
        if key not in splited_mpr_names_filtered:
            continue
        
        for dicom_file in patient_dictionary[key]:
            if not os.path.exists(os.path.join(path_to_new_data, list_of_patients[i])):
                os.mkdir(os.path.join(path_to_new_data, list_of_patients[i]))
            
            if not os.path.exists(os.path.join(path_to_new_data, list_of_patients[i], key)):
                os.mkdir(os.path.join(path_to_new_data, list_of_patients[i], key))
#             dicom_file.save_as(os.path.join(path_to_new_data, 
#                                             list_of_patients[i], 
#                                             key,
#                                             list_of_patients[i]+'_'+str(dicom_file.InstanceNumber)
#                                            )
#                               )
            cv2.imwrite(os.path.join(path_to_new_data, 
                                            list_of_patients[i], 
                                            key,
                                            list_of_patients[i]+'_'+str(dicom_file.InstanceNumber)+'.png'
                                           ),
                        cv2.normalize(dicom_file.pixel_array, None, alpha = 0, 
                                      beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
                       )

    

 16%|████████████▍                                                                 | 118/743 [34:53<2:26:42, 14.08s/it]

In [75]:
dicom_file.pixel_array.()

4095

# Rename patients folders with respect to the report names