# 将从医生那里拿到的DICOM序列拆分成不同文件夹
## 所有*_all_*的函数都遍历下述的结构
- dir_name
    - parent_id
        - check_id
            - NC
            - ART
            - ...

## 添加新数据的一般步骤
### 从医院那里下载数据，格式如下：
    - Cyst
        - patient_id
            - check_id
                - DICOM series
        - ...
    - ...
### 接下来按照DICOM的序列的文件名的格式，分为0001~000x个子文件夹，使用split_dicom_series函数
### 接下来人为的将0001~000x子文件夹命名为NC、ART、PV、PVH等格式
### 接下来创建 SignificantLayers、MHD、Mask格式的子文件夹， 使用check_all_format函数
### 将NC、ART、PV的DICOM序列转化为MHD格式的文件，使用conver_all_dir函数
### 接下来，人为的标记每幅图像，Mask文件存在 Mask 文件夹中，命名方式是PhaseName.upper()_Mask.mhd，如果包含其他类型的病灶，则其他类型的病灶命名方式是PhaseName.upper()_LesionName.upper().mhd
### 接下来将提取根据Mask文件提取显著层数据，存入SignificantLayers文件夹中

In [1]:
import os
import numpy as np
import shutil
from glob import glob

In [17]:
run medicalImage.ipynb

In [3]:
def split_dicom_series(dir_path):
    check_id = os.path.basename(dir_path)
    patient_id = os.path.basename(os.path.dirname(dir_path))
    prefix = patient_id + '-' + check_id
    cur_index = 1;
    print dir_path
    while True:
        cur_index_str = '%04d' % cur_index
        file_paths = glob(os.path.join(dir_path, prefix + '-' + cur_index_str+'-*'))
        if len(file_paths) == 0:
            break
        save_dir = os.path.join(dir_path, cur_index_str)
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        for file_path in file_paths:
            shutil.move(
                file_path,
                os.path.join(
                    save_dir, os.path.basename(file_path)
                )
            )
        cur_index += 1
        print cur_index_str

In [11]:
split_dicom_series(
    'F:\\FNH\\FNH\\1026414\\3472586'
)

F:\FNH\FNH\1026414\3472586


In [15]:
def split_all_pcid(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            split_dicom_series(os.path.join(dir_name, patient_id, check_id))

In [21]:
split_all_pcid('F:\\HEM\\HEM\\')

F:\HEM\HEM\1011447\3675387
F:\HEM\HEM\1100989\3671713
F:\HEM\HEM\1510797\3471963
F:\HEM\HEM\1691892\3777607
0001
0002
0003
0004
0005
0006
F:\HEM\HEM\1920841\3495821
F:\HEM\HEM\1945955\3282452
F:\HEM\HEM\2390820\3531368
F:\HEM\HEM\2439389\3514941
F:\HEM\HEM\2452057\3528388
F:\HEM\HEM\2723866\3770829
F:\HEM\HEM\2835381\3745302
F:\HEM\HEM\2920009\3573265
F:\HEM\HEM\3186062\3645214
F:\HEM\HEM\3275969\3575777
F:\HEM\HEM\3295057\3670528
F:\HEM\HEM\3329491\3739733
F:\HEM\HEM\3475948\3591129
F:\HEM\HEM\3535113\3691092
F:\HEM\HEM\3674602\3692500
F:\HEM\HEM\3723017\3647711
F:\HEM\HEM\3819731\3693746
F:\HEM\HEM\3819731\ART
F:\HEM\HEM\3819731\NC
F:\HEM\HEM\3819731\PV
F:\HEM\HEM\3819731\PVH
F:\HEM\HEM\3837602\3516115
F:\HEM\HEM\3843010\3739766
F:\HEM\HEM\3845498\3526836
F:\HEM\HEM\3916006\3626978
F:\HEM\HEM\3933321\3650216
F:\HEM\HEM\3977301\3746721
F:\HEM\HEM\4028365\3788824
F:\HEM\HEM\792426\3546610
F:\HEM\HEM\8126959\3497605


In [3]:
def check_format(dir_name):
    '''
        判断该dir_name是否符合条件，具备所有的essential phase names
    '''
    essential_phase_names = ['NC', 'ART', 'PV']
    for phase_name in essential_phase_names:
        if not os.path.exists(os.path.join(dir_name, phase_name)):
            print 'Not the format in ', dir_name
            break

In [4]:
def check_all_format(dir_name):
    '''
        判断所有的dir_name是否符合条件
    '''
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            check_format(os.path.join(dir_name, patient_id, check_id))

In [12]:
check_all_format('F:\\third_data\\METS\\')

Not the format in  F:\third_data\METS\2544579\3494702
Not the format in  F:\third_data\METS\3560291\3437977
Not the format in  F:\third_data\METS\3918800\3673706


In [13]:
def mk_dir(parent_dir, dir_name):
    '''
        在parent_dir 目录下创建名为dir_name的子文件夹
    '''
    if not os.path.exists(os.path.join(parent_dir, dir_name)):
        os.mkdir(os.path.join(parent_dir, dir_name))

In [38]:
def make_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            mk_dir(os.path.join(dir_name, patient_id, check_id), 'Mask')

In [40]:
make_all_dir('F:\\third_data\\ordered\\FNH')

In [33]:
def convert_DICOM2MHD(dir_name):
    '''
        将一个检查目录下面的DICOM序列变成MHD格式的文件
    '''
    essential_phase_names = ['NC', 'ART', 'PV']
    for phase_name in essential_phase_names:
        cur_dir = os.path.join(dir_name, phase_name)
        save_path = os.path.join(dir_name, 'MHD', phase_name + '.mhd')
        images = read_dicom_series(cur_dir)
        save_mhd_image(images, save_path)
        print 'saveing ', save_path

In [37]:
convert_DICOM2MHD('F:\\third_data\\ordered\\FNH\\3226933\\3502177')

image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\NC.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\ART.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\PV.mhd


In [30]:
def conver_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            convert_DICOM2MHD(os.path.join(dir_name, patient_id, check_id))

In [32]:
conver_all_dir('F:\\third_data\\ordered\\METS')

image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\102067\3513875\MHD\NC.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\102067\3513875\MHD\ART.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\102067\3513875\MHD\PV.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2058569\3562470\MHD\NC.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2058569\3562470\MHD\ART.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2058569\3562470\MHD\PV.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2697013\3594662\MHD\NC.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2697013\3594662\MHD\ART.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\2697013\3594662\MHD\PV.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\METS\3180792\3634403\M

In [46]:
def mv_SignificantLayer2Mask(dir_name):
    cur_dir = os.path.join(dir_name, 'SignificantLayer')
    names = os.listdir(cur_dir)
    for name in names:
        cur_path = os.path.join(cur_dir, name)
        target_path = os.path.join(dir_name, 'Mask', name)
        shutil.move(cur_path, target_path)
def mv_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            mv_SignificantLayer2Mask(os.path.join(dir_name, patient_id, check_id))
# mv_all_dir('F:\\third_data\\ordered\\FNH')

In [13]:
def extract_SignificantLayer(dir_name):
    cur_dir = os.path.join(dir_name, 'SignificantLayer')
    mhd_dir = os.path.join(dir_name, 'MHD')
    mask_dir = os.path.join(dir_name, 'Mask')
    names = glob(os.path.join(mask_dir, '*_Mask.mhd'))
    for name in names:
        name = os.path.basename(name)
        phase_name = name.split('_Mask')[0]
        file_path = os.path.join(mhd_dir, phase_name + '.mhd')
        mhd_image = read_mhd_image(file_path)
        mask_image = read_mhd_image(os.path.join(mask_dir, name))
        zs, _, _ = np.where(mask_image != 0)
        zs = list(set(zs))
        for index, z in enumerate(zs):
            mask_image_slice = mask_image[z, :, :]
            image_slice = mhd_image[z, :, :]
            save_mhd_image(image_slice, os.path.join(cur_dir, phase_name + '_Image_'+ str(index) + '.mhd'))
            save_mhd_image(mask_image_slice, os.path.join(cur_dir, phase_name + '_Mask_'+ str(index) + '.mhd'))
            print 'saveing ', os.path.join(cur_dir, phase_name + '_Mask_'+ str(index) + '.mhd')

In [14]:
def extract_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            extract_SignificantLayer(os.path.join(dir_name, patient_id, check_id))

In [16]:
extract_all_dir('F:\\third_data\\ordered\\FNH')

saveing  F:\third_data\ordered\FNH\1026414\3472586\SignificantLayer\ART_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\1026414\3472586\SignificantLayer\NC_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\1026414\3472586\SignificantLayer\PV_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2107239\3676523\SignificantLayer\ART_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2107239\3676523\SignificantLayer\NC_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2107239\3676523\SignificantLayer\PV_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2452057\3528388\SignificantLayer\ART_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2452057\3528388\SignificantLayer\NC_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2452057\3528388\SignificantLayer\PV_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2925508\3379669\SignificantLayer\ART_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2925508\3379669\SignificantLayer\NC_Mask_0.mhd
saveing  F:\third_data\ordered\FNH\2925508\3379669\SignificantLayer\PV_Mask_0.mhd
saveing  F:\

In [None]:
def mv_SL2target(dir_name, save_dir):
    '''
        将dir_name文件夹下面的SignificantLayer文件夹的数据按照一定格式拷贝到指定目录中
        一定格式：一般是SignificantLayer文件夹下面创建一个子文件夹命名方式patientid_checkid_lesionid_sliceid_typeid
    '''
    checkid = os.path.basename(dir_name)
    patientid = os.path.basename(os.path.dirname(dir_name))
    typename = os.path.basename(os.path.dirname(os.path.dirname(dir_name)))
    typeid = return_typeid_byname(typename)
    cur_path = os.path.join(dir_name, 'SignificantLayer')
    
    slice_num = len(glob(os.path.join(cur_path, 'ART_Mask_*.mhd')))
    for sliceid in range(slice_num):
        target_dir = os.path.join(save_dir, patientid + '_' + checkid + '_' + '0' + '_' + str(sliceid) + '_' + str(typeid))
        if not os.path.exists(target_dir):
            os.mkdir(target_dir)
        names = os.listdir(os.path.join(cur_path, '*_'+str(sliceid)+'*'))
        for name in names:
            shutil.copy(
                os.path.join(cur_path, name),
                os.path.join(target_dir, name)
            )
        print target_dir, ' finish copying'