# 说明
* 功能: 预处理, 提取数据集中动脉瘤病灶的BBOX坐标
  * 序列/标注均以PID+Study_UID+Series_UID/三级路径形式命名

In [1]:
import os
import numpy as np
import nibabel as nib
from skimage import measure
from joblib import Parallel, delayed
from multiprocessing import cpu_count

In [2]:
# os.chdir('../0331_DOCJ')
mask_root = './mask'

In [3]:
file_names = [_ for _ in os.listdir(mask_root) if _.endswith('mask.nii.gz')]
assert len(file_names) > 0, mask_root
print('待处理MASk个数:', len(file_names))

待处理MASk个数: 1


In [4]:
def process_one(mask_path):
    # pid = os.path.basename(mask_path).split('_')[0]
    pid = os.path.basename(mask_path).rsplit('_', maxsplit=1)[0] # _mask.nii.gz
    nii_mask = nib.load(mask_path)
    mask_data = np.asarray(nii_mask.dataobj)

    mask_inst = measure.label(mask_data, connectivity=2)
    mask_props = measure.regionprops(mask_inst)

    infos = []
    for prop in mask_props:
        x1,y1,z1,x2,y2,z2 = prop.bbox
        # 开始点是真实的起始点,结束点并不实际被包含, 例如Z轴分布1层,则求得的Z=[1,2) = 2-1=1层
        infos += [(pid, z1, x1, y1, z2-z1, x2-x1, y2-y1)]  
    # assert len(infos) > 0, mask_path # 允许MASK内容为空/无病灶
    return infos

# test
# mask_path = os.path.join(mask_root, file_names[0])
# process_one(mask_path)

In [5]:
mask_box_infos = Parallel(n_jobs=cpu_count()//4, verbose=10, backend='multiprocessing')(\
    delayed(process_one)(os.path.join(mask_root, _)) for _ in file_names[::])

[Parallel(n_jobs=6)]: Using backend MultiprocessingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done   1 tasks      | elapsed:    0.8s
[Parallel(n_jobs=6)]: Done   1 out of   1 | elapsed:    0.8s finished


In [6]:
mask_box_info_save_path = './mask_bbox.txt'
with open(mask_box_info_save_path, 'w') as fout:
    mask_box_info_format = ['%s %d %d %d %d %d %d' % _ for grp in mask_box_infos for _ in grp]
    print('\n'.join(mask_box_info_format))
    fout.write('\n'.join(mask_box_info_format))

PID0014297_1.2.276.0.7230010.3.1.2.396775365.6052.1379291461.6233_2.25.266836558535444967533867872517828591362 158 148 121 5 8 7


: 