In [1]:
import pandas as pd
import os
from tqdm import tqdm
import re
from numpy import nan as Nan
import pydicom as dicom
import cv2
import shutil
import numpy as np

In [2]:

p_all = '/home/marichka/Documents/CardioVision/'
p_mpr = p_all + 'ONLY_MPR/'
p_lad = p_all + 'ONLY_LAD/' 
p_lcx = p_all + 'ONLY_LCX/'   # empty_folder, not necessarily should exist, will be created
p_rca = p_all + 'ONLY_RCA/'   # empty_folder, not necessarily should exist, will be created

In [9]:
# GENERATED LABELS TABLES
reports_lcx = pd.read_excel('lcx_labels_titlesreports.xlsx')
reports_rca = pd.read_excel('rca_labels_titlesreports.xlsx')

In [4]:
# CREATE DIRECTORIES FOR PATIENTS LCX data
if not os.path.exists(p_lcx):
    os.mkdir(p_lcx)
for patient in reports_lcx['PATIENT_ID'].values:
    if not os.path.exists(p_lcx+patient):
        os.mkdir(p_lcx+patient)

# CREATE DIRECTORIES FOR PATIENTS PCA data
if not os.path.exists(p_rca):
    os.mkdir(p_rca)
for patient in reports_rca['PATIENT_ID'].values:
    if not os.path.exists(p_rca+patient):
        os.mkdir(p_rca+patient)

In [5]:
folders_ids = os.listdir(p_mpr)
folders_ids = [x.split(' ')[1] for x in folders_ids]
folders_ids_lad = os.listdir(p_lad)
len(folders_ids), len(folders_ids_lad), len(np.intersect1d(reports_lcx['REPORT_ID'], folders_ids))

(743, 720, 653)

In [6]:
def split_mpr_name(mpr_name):
    return \
        "".join(mpr_name.split()).replace('*', '').replace('original', '') \
        .replace('LIMA-', '').replace('Branchof','').replace('TOPDA', '').replace('PDATO', '')


def get_patient_dictionary(path_to_patient_folder):
    """
    
    Returns dict of different types of images in the folder of patient. 
    
    Returns:
        dict: key - type of images; value - list of DICOM files, which sorted in the ascending order with restepct to the
                    depth of the image slice.
    """
    patient_dict = {}
    
    dicom_file_names = os.listdir(path_to_patient_folder)
    
    for i in range(len(dicom_file_names)):
        cur_dicom_obj = dicom.dcmread(os.path.join(path_to_patient_folder, dicom_file_names[i]))

        if cur_dicom_obj.SeriesDescription not in patient_dict.keys():
            patient_dict[cur_dicom_obj.SeriesDescription] = []
        patient_dict[cur_dicom_obj.SeriesDescription].append(cur_dicom_obj)
        
    # sort each type of images with respect to their depth in ascending order
    for i in patient_dict:
        patient_dict[i].sort(key=lambda x: x.InstanceNumber)
    
    return patient_dict

## CREATE LCX DATASET

In [20]:
possible_labels = ['PROXIMAL', 'DISTAL', 'OM', 'OM1', 'OM2', 'OM3'] #, 'PDA', 'PLV']
skip_counter = 0

for index, row in tqdm(reports_lcx.iterrows()):
    labels_dict = {}
    patient = row['PATIENT_ID']
    mpr_patients = os.listdir(p_mpr)
    try:
        mpr_path_patient = p_mpr + [s for s in mpr_patients if patient in s][0]
    except:
        continue   # no according mpr folder
    
    # RETRIEVING LABELS
    for pos_label in possible_labels:
        if row[pos_label] != '-':
            annotation = row[pos_label]
            annotation = 0 if 'normal' in annotation.lower() else 1
            labels_dict[pos_label] = annotation
    
    # MPR IMAGES
    patient_dictionary = get_patient_dictionary(mpr_path_patient)
    dict_keys = list(patient_dictionary.keys())

    for key_element in dict_keys:
        patient_dictionary[split_mpr_name(key_element).replace('LCX-', '').replace('X','X-').replace('CX-', 'CX')]= patient_dictionary[key_element]
        del patient_dictionary[key_element]
            
    labels_dict['LCX'] = 1 if row['PROXIMAL'] != 'NORMAL' or row['DISTAL'] != 'NORMAL' else 0 

    for k in labels_dict.keys():
        try:
            for dicom_file in patient_dictionary[k]:
                path_to_label = os.path.join(p_lcx + patient, k)

                if not os.path.exists(path_to_label):
                    os.mkdir(path_to_label)
                cv2.imwrite(os.path.join(path_to_label, 
                                                patient+'_'+str(dicom_file.InstanceNumber)+'_label'+str(labels_dict[k])+
                                         '.png'),
                            cv2.normalize(dicom_file.pixel_array, None, alpha = 0, 
                                          beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
                           )
        except:
            skip_counter += 1
            

## CREATE RCA DATASET

In [13]:
possible_labels = ['PROXIMAL', 'DISTAL', 'MID', ]   # 'PDA', 'PLV']
skip_counter_rca = 0

for index, row in tqdm(reports_rca.iterrows()):
    labels_dict = {}
    patient = row['PATIENT_ID']
    mpr_patients = os.listdir(p_mpr)
    try:
        mpr_path_patient = p_mpr + [s for s in mpr_patients if patient in s][0]
    except:
        continue   # no according mpr folder

    # RETRIEVING LABELS FOR BINARY CLASSIFICATION
    for pos_label in possible_labels:
        if row[pos_label] != '-':
            annotation = row[pos_label]
            annotation = 0 if 'normal' in annotation.lower() else 1
            labels_dict[pos_label] = annotation
    
    # MPR IMAGES
    patient_dictionary = get_patient_dictionary(mpr_path_patient)
    dict_keys = list(patient_dictionary.keys())

    for key_element in dict_keys:
        patient_dictionary[split_mpr_name(key_element).replace('RCA-', '').replace('A','A-').replace('CA-', 'CA')]= patient_dictionary[key_element]
        del patient_dictionary[key_element]

    labels_dict['RCA'] = 1 if row['PROXIMAL'] != 'NORMAL' or row['DISTAL'] != 'NORMAL' or  row['MID'] != 'NORMAL' else 0 

    for k in labels_dict.keys():
        try:
            for dicom_file in patient_dictionary[k]:
                path_to_label = os.path.join(p_rca + patient, k)
                if not os.path.exists(path_to_label):
                    os.mkdir(path_to_label)
                cv2.imwrite(os.path.join(path_to_label, 
                                                patient+'_'+str(dicom_file.InstanceNumber)+'_label'+str(labels_dict[k])+
                                         '.png'),
                            cv2.normalize(dicom_file.pixel_array, None, alpha = 0, 
                                          beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
                           )
        except:
            skip_counter_rca += 1