# 将从医生那里拿到的DICOM序列拆分成不同文件夹
## 所有*_all_*的函数都遍历下述的结构
- dir_name
    - parent_id
        - check_id
            - NC
            - ART
            - ...

## 添加新数据的一般步骤
### 从医院那里下载数据，格式如下：
    - Cyst
        - patient_id
            - check_id
                - DICOM series
        - ...
    - ...
### 接下来按照DICOM的序列的文件名的格式，分为0001~000x个子文件夹，使用split_dicom_series函数
### 接下来人为的将0001~000x子文件夹命名为NC、ART、PV、PVH等格式
### 接下来创建 SignificantLayers、MHD、Mask格式的子文件夹， 使用check_all_format函数
### 将NC、ART、PV的DICOM序列转化为MHD格式的文件，使用conver_all_dir函数
### 接下来，人为的标记每幅图像，Mask文件存在 Mask 文件夹中，命名方式是PhaseName.upper()_Mask.mhd，如果包含多个病灶，则复制该文件修改病例号和病人号，再次标注。如果包含其他类型的病灶，则其他类型的病灶命名方式是PhaseName.upper()_LesionName.upper().mhd
### 接下来将提取根据Mask文件提取显著层数据，存入SignificantLayers文件夹中

In [54]:
import os
import numpy as np
import shutil
from glob import glob

In [55]:
run medicalImage.ipynb

In [16]:
def split_dicom_series(dir_path):
    check_id = os.path.basename(dir_path)
    patient_id = os.path.basename(os.path.dirname(dir_path))
    # prefix = patient_id + '-' + check_id
    prefix = ''# 有些格式是没有pcid前缀的
    cur_index = 1;
    print dir_path
    while cur_index < 10:
        cur_index_str = '%04d' % cur_index
        file_paths = glob(os.path.join(dir_path, prefix + '-' + cur_index_str+'-*'))
#         if len(file_paths) == 0:
#             break
        save_dir = os.path.join(dir_path, cur_index_str)
        if not os.path.exists(save_dir) and len(file_paths) != 0:
            os.mkdir(save_dir)
        for file_path in file_paths:
            shutil.move(
                file_path,
                os.path.join(
                    save_dir, os.path.basename(file_path)
                )
            )
        cur_index += 1
        print cur_index_str

In [20]:
split_dicom_series(
    'G:\\fourth_data\\FNH\\3616352\\3857878'
)

G:\fourth_data\FNH\3616352\3857878
0001
0002
0003
0004
0005
0006
0007
0008
0009


In [10]:
def split_all_pcid(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
            
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            split_dicom_series(os.path.join(dir_name, patient_id, check_id))

In [22]:
split_all_pcid('G:\\fourth_data\\HEM')

G:\fourth_data\HEM\1027687\4308893
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\1159786\4313878
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\1696681\4309065
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\1800995\4308948
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\183816\4303188
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2078812\4308941
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2193320\4313586
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2415334\4306619
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2487706\4311304
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2544817\4314830
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2557665\4312292
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2583698\4311319
0001
0002
0003
0004
0005
0006
0007
0008
0009
G:\fourth_data\HEM\2617591\4292118
0001
0

In [24]:
def check_format(dir_name):
    '''
        判断该dir_name是否符合条件，具备所有的essential phase names
    '''
    essential_phase_names = ['NC', 'ART', 'PV']
    for phase_name in essential_phase_names:
        if not os.path.exists(os.path.join(dir_name, phase_name)):
            print 'Not the format in ', dir_name
            break

In [32]:
def check_all_format(dir_name):
    '''
        判断所有的dir_name是否符合条件
    '''
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            check_format(os.path.join(dir_name, patient_id, check_id))

In [35]:
check_all_format('F:\\fourth_data\\order\\HEM\\')

Not the format in  F:\fourth_data\order\HEM\2583698\4311319
Not the format in  F:\fourth_data\order\HEM\3910785\4306658
Not the format in  F:\fourth_data\order\HEM\3913918\4314446
Not the format in  F:\fourth_data\order\HEM\4056663\4317490


In [66]:
def mk_dir(parent_dir, dir_name):
    '''
        在parent_dir 目录下创建名为dir_name的子文件夹
    '''
    if not os.path.exists(os.path.join(parent_dir, dir_name)):
        os.mkdir(os.path.join(parent_dir, dir_name))

In [69]:
def make_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
        if patient_id.startswith('not'):
            continue
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            mk_dir(os.path.join(dir_name, patient_id, check_id), 'SignificantLayer')

In [73]:
make_all_dir('F:\\fourth_data\\ordered\\CYST')

In [49]:
def convert_DICOM2MHD(dir_name):
    '''
        将一个检查目录下面的DICOM序列变成MHD格式的文件
    '''
    essential_phase_names = ['NC', 'ART', 'PV']
    for phase_name in essential_phase_names:
        cur_dir = os.path.join(dir_name, phase_name)
        save_path = os.path.join(dir_name, 'MHD', phase_name + '.mhd')
        images = read_dicom_series(cur_dir)
        save_mhd_image(images, save_path)
        print 'saveing ', save_path

In [37]:
# convert_DICOM2MHD('F:\\third_data\\ordered\\FNH\\3226933\\3502177')

image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\NC.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\ART.mhd
image type is  <type 'numpy.ndarray'>
saveing  F:\third_data\ordered\FNH\3226933\3502177\MHD\PV.mhd


In [50]:
def conver_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
        if patient_id.startswith('not'):
            continue
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            convert_DICOM2MHD(os.path.join(dir_name, patient_id, check_id))

In [53]:
conver_all_dir('F:\\fourth_data\\ordered\\HEM')

saveing  F:\fourth_data\ordered\HEM\1027687\4308893\MHD\NC.mhd
saveing  F:\fourth_data\ordered\HEM\1027687\4308893\MHD\ART.mhd
saveing  F:\fourth_data\ordered\HEM\1027687\4308893\MHD\PV.mhd
saveing  F:\fourth_data\ordered\HEM\1159786\4313878\MHD\NC.mhd
saveing  F:\fourth_data\ordered\HEM\1159786\4313878\MHD\ART.mhd
saveing  F:\fourth_data\ordered\HEM\1159786\4313878\MHD\PV.mhd
saveing  F:\fourth_data\ordered\HEM\1696681\4309065\MHD\NC.mhd
saveing  F:\fourth_data\ordered\HEM\1696681\4309065\MHD\ART.mhd
saveing  F:\fourth_data\ordered\HEM\1696681\4309065\MHD\PV.mhd
saveing  F:\fourth_data\ordered\HEM\1800995\4308948\MHD\NC.mhd
saveing  F:\fourth_data\ordered\HEM\1800995\4308948\MHD\ART.mhd
saveing  F:\fourth_data\ordered\HEM\1800995\4308948\MHD\PV.mhd
saveing  F:\fourth_data\ordered\HEM\183816\4303188\MHD\NC.mhd
saveing  F:\fourth_data\ordered\HEM\183816\4303188\MHD\ART.mhd
saveing  F:\fourth_data\ordered\HEM\183816\4303188\MHD\PV.mhd
saveing  F:\fourth_data\ordered\HEM\2078812\4308941\M

In [14]:
def mv_SignificantLayer2Mask(dir_name):
    cur_dir = os.path.join(dir_name, 'SignificantLayer')
    names = os.listdir(cur_dir)
    for name in names:
        cur_path = os.path.join(cur_dir, name)
        target_path = os.path.join(dir_name, 'Mask', name)
        shutil.move(cur_path, target_path)
def mv_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            mv_SignificantLayer2Mask(os.path.join(dir_name, patient_id, check_id))
# mv_all_dir('F:\\third_data\\ordered\\FNH')

In [59]:
def extract_SignificantLayer(dir_name):
    cur_dir = os.path.join(dir_name, 'SignificantLayer')
    mhd_dir = os.path.join(dir_name, 'MHD')
    mask_dir = os.path.join(dir_name, 'Mask')
    names = glob(os.path.join(mask_dir, '*_Mask.mhd'))
    for name in names:
        name = os.path.basename(name)
        phase_name = name.split('_Mask')[0]
        file_path = os.path.join(mhd_dir, phase_name + '.mhd')
        mhd_image = read_mhd_image(file_path)
        mask_image = read_mhd_image(os.path.join(mask_dir, name))
        zs, _, _ = np.where(mask_image != 0)
        zs = list(set(zs))
        for index, z in enumerate(zs):
            mask_image_slice = mask_image[z, :, :]
            image_slice = mhd_image[z, :, :]
            save_mhd_image(image_slice, os.path.join(cur_dir, phase_name + '_Image_'+ str(index) + '.mhd'))
            save_mhd_image(mask_image_slice, os.path.join(cur_dir, phase_name + '_Mask_'+ str(index) + '.mhd'))
            print 'saveing ', os.path.join(cur_dir, phase_name + '_Mask_'+ str(index) + '.mhd')

In [64]:
def extract_all_dir(dir_name):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
        if patient_id.startswith('not'):
            continue
        print patient_id
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            extract_SignificantLayer(os.path.join(dir_name, patient_id, check_id))

In [78]:
extract_all_dir('F:\\fourth_data\\ordered\\CYST')

183816
saveing  F:\fourth_data\ordered\CYST\183816\4303188\SignificantLayer\ART_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\183816\4303188\SignificantLayer\NC_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\183816\4303188\SignificantLayer\PV_Mask_0.mhd
2687491
saveing  F:\fourth_data\ordered\CYST\2687491\4311067\SignificantLayer\ART_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2687491\4311067\SignificantLayer\NC_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2687491\4311067\SignificantLayer\PV_Mask_0.mhd
2687492
saveing  F:\fourth_data\ordered\CYST\2687492\4311068\SignificantLayer\ART_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2687492\4311068\SignificantLayer\NC_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2687492\4311068\SignificantLayer\PV_Mask_0.mhd
2985674
saveing  F:\fourth_data\ordered\CYST\2985674\4307349\SignificantLayer\ART_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2985674\4307349\SignificantLayer\NC_Mask_0.mhd
saveing  F:\fourth_data\ordered\CYST\2985674

In [79]:
def mv_SL2target(dir_name, save_dir):
    '''
        将dir_name文件夹下面的SignificantLayer文件夹的数据按照一定格式拷贝到指定目录中
        一定格式：一般是SignificantLayer文件夹下面创建一个子文件夹命名方式patientid_checkid_lesionid_sliceid_typeid
    '''
    checkid = os.path.basename(dir_name)
    patientid = os.path.basename(os.path.dirname(dir_name))
    typename = os.path.basename(os.path.dirname(os.path.dirname(dir_name)))
    typeid = return_typeid_byname(typename)
    cur_path = os.path.join(dir_name, 'SignificantLayer')
    
    slice_num = len(glob(os.path.join(cur_path, 'ART_Mask_*.mhd')))
    for sliceid in range(slice_num):
        target_dir = os.path.join(save_dir, patientid + '_' + checkid + '_' + '0' + '_' + str(sliceid) + '_' + str(typeid))
        if not os.path.exists(target_dir):
            os.mkdir(target_dir)
        paths = glob(os.path.join(cur_path, '*_'+str(sliceid)+'*'))
        for path in paths:
            shutil.copy(
                path,
                os.path.join(target_dir, os.path.basename(path))
            )
        print target_dir, ' finish copying'

In [83]:
def mvSL2_all_dir(dir_name, save_dir):
    patient_ids = os.listdir(dir_name)
    for patient_id in patient_ids:
        if not os.path.isdir(os.path.join(dir_name, patient_id)):
            continue
        if patient_id.startswith('not'):
            continue
        check_ids = os.listdir(os.path.join(dir_name, patient_id))
        for check_id in check_ids:
            mv_SL2target(os.path.join(dir_name, patient_id, check_id), save_dir)

In [87]:
mvSL2_all_dir(
    'F:\\fourth_data\\ordered\\CYST',
    'G:\\ICPR\\SignificantLayers2'
)

G:\ICPR\SignificantLayers2\183816_4303188_0_0_0  finish copying
G:\ICPR\SignificantLayers2\2687491_4311067_0_0_0  finish copying
G:\ICPR\SignificantLayers2\2687492_4311068_0_0_0  finish copying
G:\ICPR\SignificantLayers2\2985674_4307349_0_0_0  finish copying
G:\ICPR\SignificantLayers2\3410577_4295598_0_0_0  finish copying
G:\ICPR\SignificantLayers2\8008445_4290669_0_0_0  finish copying
G:\ICPR\SignificantLayers2\8144970_4316673_0_0_0  finish copying
