In [1]:
import os
import dicom2jpg
import shutil
from tqdm.notebook import tqdm
import numpy as np

In [2]:
root = 'D:\\CBIS-DDSM\\'

# root = './CBIS-DDSM/'

TRAIN = 'TRAIN'
TEST = 'TEST'

IMAGES = 'IMAGES'
MASKS = 'MASKS'
CROPS = 'CROPS'

root_input = os.path.join(root, 'INPUT') # No borrar carpeta de salida
root_output = os.path.join(root, 'OUTPUT')

root_out_jpg = os.path.join(root, 'CBIS-JPG')
root_out_jpg_train = os.path.join(root_out_jpg, TRAIN)
root_out_jpg_test = os.path.join(root_out_jpg, TEST)

root_out_dcm = os.path.join(root, 'CBIS-DCM')
root_out_dcm_train = os.path.join(root_out_dcm, TRAIN)
root_out_dcm_test = os.path.join(root_out_dcm, TEST)



In [3]:
def listdir(dir):
    files = os.listdir(dir)
    files = [f for f in files if not f.startswith('.')]
    return files


def subset(dcm_folder):
    if 'Training' in dcm_folder:
        return TRAIN
    else:
        return TEST
    
def next_listdir(folder):
    subfolder = listdir(folder)[0]
    path = os.path.join(folder, subfolder)
    return path
    

def relocate_jpg(dcm_folder, img_type):
    date_folder = next_listdir(root_output)
    subfolder = next_listdir(os.path.join(date_folder, dcm_folder + '_jpg'))
    jpg_img_file = next_listdir(subfolder)
    
    file_name = dcm_folder.split('_P_')[1]
    jpg_img_final_path = os.path.join(root_out_jpg, subset(dcm_folder), img_type, file_name + '.jpg')
    shutil.move(jpg_img_file, jpg_img_final_path)
    shutil.rmtree(subfolder)
    
    
def copy_dcm(dcm_path, dcm_folder, img_type):
    file_name = dcm_folder.split('_P_')[1]
    dcm_img_final_path = os.path.join(root_out_dcm, subset(dcm_path), img_type, file_name + '.dcm')
    shutil.copy(dcm_path, dcm_img_final_path)
    

In [4]:
# Create Folder Structure

folders_to_create = [
    root_output, 
    root_out_jpg,
    root_out_jpg_train,
    root_out_jpg_test,
    root_out_dcm,
    root_out_dcm_train,
    root_out_dcm_test,
    os.path.join(root_out_jpg_train, IMAGES),
    os.path.join(root_out_jpg_train, MASKS),
    os.path.join(root_out_jpg_train, CROPS),
    os.path.join(root_out_jpg_test, IMAGES),
    os.path.join(root_out_jpg_test, MASKS),
    os.path.join(root_out_jpg_test, CROPS),
    os.path.join(root_out_dcm_train, IMAGES),
    os.path.join(root_out_dcm_train, MASKS),
    os.path.join(root_out_dcm_train, CROPS),
    os.path.join(root_out_dcm_test, IMAGES),
    os.path.join(root_out_dcm_test, MASKS),
    os.path.join(root_out_dcm_test, CROPS),
]

for folder in folders_to_create:
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder, mode=0o777)


# DCM Folders

all_folders = listdir(root_input)

for dcm_folder in tqdm(all_folders):
    dcm_folder_path = os.path.join(root_input, dcm_folder)
    is_img = (dcm_folder.endswith('CC') or dcm_folder.endswith('MLO'))
    
    if is_img:
    
        # DCM IMG to JPG
        
        first_folder = next_listdir(dcm_folder_path)
        second_folder = next_listdir(first_folder)
        dcm_path = next_listdir(second_folder)
        dicom2jpg.dicom2jpg(origin=dcm_path, target_root=root_output)
        
        # Relocate JPG and DCM
        
        copy_dcm(dcm_path, dcm_folder, IMAGES)
        relocate_jpg(dcm_folder, IMAGES)
        
    else:
        
        subfolders = listdir(dcm_folder_path)
        mask_and_crop_mixed = (len(subfolders) == 1)
        
        if mask_and_crop_mixed:  
            first_folder = next_listdir(dcm_folder_path)
            second_folder = next_listdir(first_folder)
            dcm_files = listdir(second_folder)
            dcm_paths = [os.path.join(second_folder, f) for f in dcm_files]
            dcm_sizes = [os.path.getsize(p) for p in dcm_paths]
            
            for dcm_file, dcm_size in zip(dcm_files, dcm_sizes):
                
                # DCM CROP or MASK to JPG
                
                img_type = CROPS if dcm_size < np.mean(dcm_sizes) else MASKS
                
                dcm_path = os.path.join(second_folder, dcm_file)
                dicom2jpg.dicom2jpg(origin=dcm_path, target_root=root_output)
                
                # Relocate JPG and DCM
                
                copy_dcm(dcm_path, dcm_folder, img_type)
                relocate_jpg(dcm_folder, img_type)

            
        else:
            for f in subfolders:
                
                first_folder = os.path.join(dcm_folder_path, f)
                second_folder = next_listdir(first_folder)
                
                img_type = CROPS if 'cropped' in second_folder else MASKS
                    
                # DCM MASK or CROP to JPG
                
                dcm_path = next_listdir(second_folder)
                dicom2jpg.dicom2jpg(origin=dcm_path, target_root=root_output)
                
                # Relocate JPG and DCM
                
                copy_dcm(dcm_path, dcm_folder, img_type)
                relocate_jpg(dcm_folder, img_type)
                        

    # break

  0%|          | 0/3288 [00:00<?, ?it/s]