### 下一单元格不可用。

In [None]:
import os
import shutil
import sys
import warnings
import numpy as np
import pandas as pd
import SimpleITK as sitk
import scipy
from scipy.ndimage.interpolation import zoom
from scipy.ndimage.morphology import binary_dilation,generate_binary_structure
from skimage import measure
from skimage.morphology import convex_hull_image
from multiprocessing import Pool
from functools import partial
from config import config

"""
以下函数的涉及到图像的输入参数及返回值均是numpy矩阵，其形状均为：(z,y,x)
"""

def load_itk_image(filename):
    """
    加载图像函数
    """
    
    with open(filename) as f:
        contents = f.readlines()
        # 获取TransformMatrix开头的这一行
        line = [k for k in contents if k.startswith('TransformMatrix')][0]
        # 获取图像的转换矩阵
        transformM = np.array(line.split(' = ')[1].split(' ')).astype('float')
        # 取整，有0.99999999就可以取值为1
        transformM = np.round(transformM)
        # 判断是否翻转，正常的矩阵为[1，0，0，0，1，0，0，0，1]
        if np.any( transformM!=np.array([1, 0, 0, 0, 1, 0, 0, 0, 1])):
            isflip = True
        else:
            isflip = False

    itkimage = sitk.ReadImage(filename)
    numpyImage = sitk.GetArrayFromImage(itkimage) # (z,y,x)<=>(D,H,W)，以pixel为单位
     
    numpyOrigin = np.array(list(reversed(itkimage.GetOrigin()))) # z,y,x，以mm为单位
    numpySpacing = np.array(list(reversed(itkimage.GetSpacing()))) # z,y,x，以mm为单位
     
    return numpyImage, numpyOrigin, numpySpacing, isflip

def lumTrans(img):
    """
    将HU值归一化到[-1200,600]，再转换为灰度值[0,255]
    """
    
    lungwin = np.array([-1200.,600.])
    newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
    newimg[newimg<0]=0
    newimg[newimg>1]=1
    # 缩放到0~255
    newimg = (newimg*255).astype('uint8')
    return newimg

def resample(imgs, spacing, new_spacing, order=2):
    """
    原始CT分辨率往往不一致，为便于应用网络，需要统一分辨率
    """
    # shape为3个维度
    if len(imgs.shape)==3:
        # new_shape: 新的size
        new_shape = np.round(imgs.shape * spacing / new_spacing)
        # 真实的间距，因为shape必须为整数，所以指定的new_spacing不一定是真实的spacing
        true_spacing = spacing * imgs.shape / new_shape
        # 重构比例
        resize_factor = new_shape / imgs.shape
        # zoom()方法进行最近邻采样
        imgs = zoom(imgs, resize_factor, mode = 'nearest',order=order)
        return imgs, true_spacing
    # shape为4个维度   
    elif len(imgs.shape)==4:
        n = imgs.shape[-1]
        newimg = []
        for i in range(n):
            slice = imgs[:,:,:,i]
            newslice,true_spacing = resample(slice,spacing,new_spacing)
            newimg.append(newslice)
        newimg=np.transpose(np.array(newimg),[1,2,3,0])
        return newimg,true_spacing
    else:
        raise ValueError('wrong shape')

def worldToVoxelCoord(worldCoord, origin, spacing):
    """
    世界坐标转换为体素坐标
    """
    
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    # voxel应该是整数
    voxelCoord = stretchedVoxelCoord / spacing
    # 像素坐标必须为整型
    voxelCoord = voxelCoord.astype(np.int32)
    return voxelCoord

def process_mask(mask):
    """
    肺部Mask处理: 膨胀
    """
    convex_mask = np.copy(mask)
    for i_layer in range(convex_mask.shape[0]):
        # ascontiguousarray函数是将数组以连续数组的形式返回
        mask1  = np.ascontiguousarray(mask[i_layer])
        if np.sum(mask1)>0:
            # mask2是原mask的凸包mask
            mask2 = convex_hull_image(mask1)
            # 如果凸包mask比原mask大了50%，则用原mask继续计算
            if np.sum(mask2)>1.5*np.sum(mask1):
                mask2 = mask1
        else:
            mask2 = mask1
        convex_mask[i_layer] = mask2
    # 结构元素
    struct = generate_binary_structure(3,1)  
    # 用大小为struct来膨胀
    dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=10) 
    return dilatedMask

def extract_fake_nodule_from_luna(id, annos, filelist, luna_segment, luna_data, savepath):
    # 从一个病例CT中提取假阳性结节

    islabel = True
    isClean = True
    
    # 重采样的分辨率：1mm * 1mm * 1mm
    resolution = np.array([1,1,1])
    name = filelist[id]
    full_name = os.path.join(luna_data, name + ".mhd")
    
    """
    1、加载原始数据和肺部Mask
    """
    sliceim, origin, spacing, isflip = load_itk_image(full_name) # 获取3D图像，原点，间距，是否翻转
    
    Mask, origin, spacing, isflip = load_itk_image(os.path.join(luna_segment, name + ".mhd")) # 获取3D Mask
    
    if isflip:
        # 若原始图像有翻转(在x,y方向发生了翻转)，则Mask也发生了翻转，将其翻转回来
        Mask = Mask[:,::-1,::-1]
    # LUNA16的掩码有两种值，3和4,代表左肺和右肺
    m1 = Mask == 3
    m2 = Mask == 4
    Mask = m1+m2 # 将两种掩码合并为1
    
    """
    2、将图像与掩码叠加，并将数据HU值归一化到[-1200,600]，再转换为灰度值[0-255]
    """
    if isClean:
        # 将左肺和右肺mask分别进行膨胀
        dm1 = process_mask(m1)
        dm2 = process_mask(m2)
        dilatedMask = dm1 + dm2
        
        extramask = dilatedMask ^ Mask #异或操作，求出相比于原始掩码膨胀后多出来的区域
        bone_thresh = 210
        pad_value = 170
        
        if isflip:
            # 翻转原始图像
            sliceim = sliceim[:,::-1,::-1]
            print("flip!")
        sliceim = lumTrans(sliceim) # 对原始数据阈值化，并归一化
        sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype("uint8") # 170对应归一化后的水，掩码外的区域补充为水
        bones = (sliceim*extramask)>bone_thresh # 210对应归一化后的骨头,由膨胀操作多出来的区域如果灰度值大于210，也被填充成水
        sliceim[bones] = pad_value
        
    """
    3、对上一步得到的图像重采样分辨率为1mm * 1mm * 1mm
    """
    sliceim1, _ = resample(sliceim, spacing, resolution, order=1) # 这里sliceim1与sliceim的大小已经不一样了
    # print(sliceim1.dtype) # 验证输出sliceim1数据的类型,经确认还是uint8类型。

    """
    4、读取标签，将其转换新的分辨率下的像素坐标
    """
    if islabel:
        # 注意: (name,x_mm,y_mm,z_mm,class)
        this_annos = np.copy(annos[annos[:,0]==(name)]) # 一行代表一个非结节，所以一个病例可能对应多行标签或病例无标签
        pix_pos = []
        
        if len(this_annos)>0:
            for c in this_annos:
                # 新分辨率下的坐标(记得先换为z,y,x)
                pos = worldToVoxelCoord(c[1:4][::-1], origin=origin, spacing=resolution) 
                if isflip:
                    pos[1:] = sliceim1.shape[1:3]-pos[1:]
                pix_pos.append(pos)
        pix_pos = np.array(pix_pos).astype(np.int32) # (z,y,x)
    
    """
    5、按坐标提取非结节patch
    """
    num_z, height, width = sliceim1.shape # 重采样后的CT尺寸
    p = 64  # patch的尺寸 p * p
    
    if(len(pix_pos)>0):
        ids = 1
        for nodepos, old_nodepos in zip(pix_pos, this_annos):
            center_z, center_y, center_x = nodepos[0], nodepos[1], nodepos[2] # 结节中心坐标（新）
            
            up_y = int(center_y-p/2) # 上界
            down_y = int(center_y+p/2) # 下界
            left_x = int(center_x-p/2) # 左界
            right_x = int(center_x+p/2) # 右界
            
            try:
                node = np.ndarray([3,p,p],dtype=np.uint8)
                
                # 判断坐标z
                if (center_z <= 0 or center_z >= num_z-1):    # z不符合要求
                    # 输出full_name + 原z坐标 + 新z坐标 + 新z轴大小
                    print("坐标z不符合要求：",full_name, "old_z: ", old_nodepos[3], "new_z: ", center_z, "new_z_num: ", num_z)
                else:   # z符合要求
                    # 判断坐标x和y 
                    if (center_y<0 or center_y>= height or center_x<0 or center_x>= width): # 坐标x或y不符合要求
                    # 输出full_name + 原z坐标 + 新的x坐标 + 新的y坐标
                        print("坐标x或y不符合要求：",full_name, "old_z: ", old_nodepos[3], "center_x: ", x, "center_y: ", y)
                    else:
                        # 不超出边界
                        if(up_y>=0 and down_y<=height and left_x>=0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, up_y:down_y, left_x:right_x]
                        # y超出上界
                        elif (up_y<0 and down_y<=height and left_x>=0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, 0:p, left_x:right_x]
                        # y超出下界
                        elif (up_y>=0 and down_y>height and left_x>=0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, height-p:height, left_x:right_x]
                        # x超出左界
                        elif (up_y>=0 and down_y<=height and left_x<0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, up_y:down_y, 0:p]
                        # x超出右界
                        elif (up_y>=0 and down_y<=height and left_x>=0 and right_x>width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, up_y:down_y, width-p:width] 
                        # y超出上界，x超出左界
                        elif (up_y<0 and down_y<=height and left_x<0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, 0:p, 0:p] 
                        # y超出上界，x超出右界
                        elif (up_y<0 and down_y<=height and left_x>=0 and right_x>width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, 0:p, width-p:width]
                        # y超出下界，x超出左界
                        elif (up_y>=0 and down_y>height and left_x<0 and right_x<=width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, height-p:height, 0:p]
                        # y超出下届，x超出右界
                        elif (up_y>=0 and down_y>height and left_x>=0 and right_x>width):
                            node[:,:,:] = sliceim1[center_z-1:center_z+2, height-p:height, width-p:width] 
                        # 保存结节为npy
                        np.save(os.path.join(savepath,name+"_real_nodule_"+str(ids)+".npy"), node)
                ids = ids+1
            except Exception as e:
                print("process images %s error..."%str(name))
                print(Exception,":", e)

def preprocess_luna_fake():
    luna_data = config["luna_data"] # 存放Luna16的原始数据
    luna_segment = config["luna_segment"]  # 存放CT肺部掩码的路径
    savepath = config["fake_result_path"]  # 存放处理后阳性结节的路径
    luna_non_nodule = config["luna_non_nodule"] # 存放所有病例阳性结节的标签文件
    
    finished_flag = ".flag_preprocessluna"
    print("starting preprocessing luna fake nodule...")
    
    if not os.path.exists(finished_flag):
        
        # 读取luna_non_nodule标签文件
        df_node = pd.read_csv(luna_non_nodule)
        # 筛选出class为0的行
        annos = np.array(df_node[df_node["class"]==0].dropna()) # (name,x,y,z,class)
        
        # 开启线程池
        pool = Pool()
        if not os.path.exists(savepath):
            os.mkdir(savepath)
        for setidx in range(10):
            print("process subset", setidx)
            
            filelist = [f.split(".mhd")[0] for f in os.listdir(os.path.join(luna_data,"subset"+str(setidx))) if f.endswith(".mhd")]
            if not os.path.exists(os.path.join(savepath,"subset"+str(setidx))):
                os.mkdir(os.path.join(savepath,"subset"+str(setidx)))
                
            partial_extract_fake_nodule_from_luna = partial(extract_fake_nodule_from_luna, annos=annos, 
                                                            filelist=filelist, luna_segment=luna_segment, 
                                                            luna_data=os.path.join(luna_data,"subset"+str(setidx)), 
                                                            savepath=os.path.join(savepath,"subset"+str(setidx)))
            
            N = len(filelist)
            _ = pool.map(partial_extract_fake_nodule_from_luna, range(N))
        pool.close()
        pool.join()
        
    print("end preprocessing luna fake nodule")
    f = open(finished_flag,"w+")

if __name__=='__main__':
    preprocess_luna_fake()

### 以下可用

In [None]:
import os
import shutil
import sys
import warnings
import numpy as np
import pandas as pd
import SimpleITK as sitk
import scipy
from scipy.ndimage.interpolation import zoom
from scipy.ndimage.morphology import binary_dilation,generate_binary_structure
from skimage import measure
from skimage.morphology import convex_hull_image
from multiprocessing import Pool
from functools import partial
from config import config

"""
以下函数的涉及到图像的输入参数及返回值均是numpy矩阵，其形状均为：(z,y,x)
最后保存是保存为了(H,W,D)的形状。
"""

def load_itk_image(filename):
    """
    加载图像函数
    """
    
    with open(filename) as f:
        contents = f.readlines()
        # 获取TransformMatrix开头的这一行
        line = [k for k in contents if k.startswith('TransformMatrix')][0]
        # 获取图像的转换矩阵
        transformM = np.array(line.split(' = ')[1].split(' ')).astype('float')
        # 取整，有0.99999999就可以取值为1
        transformM = np.round(transformM)
        # 判断是否翻转，正常的矩阵为[1，0，0，0，1，0，0，0，1]
        if np.any( transformM!=np.array([1, 0, 0, 0, 1, 0, 0, 0, 1])):
            isflip = True
        else:
            isflip = False

    itkimage = sitk.ReadImage(filename)
    numpyImage = sitk.GetArrayFromImage(itkimage) # (z,y,x)<=>(D,H,W)，以pixel为单位
     
    numpyOrigin = np.array(list(reversed(itkimage.GetOrigin()))) # z,y,x，以mm为单位
    numpySpacing = np.array(list(reversed(itkimage.GetSpacing()))) # z,y,x，以mm为单位
     
    return numpyImage, numpyOrigin, numpySpacing, isflip

def lumTrans(img):
    """
    将HU值归一化到[-1000,400]，再转换为灰度值[0,255]
    """
    
    lungwin = np.array([-1000.,400.])
    newimg = (img-lungwin[0])/(lungwin[1]-lungwin[0])
    newimg[newimg<0]=0
    newimg[newimg>1]=1
    # 缩放到0~255
    newimg = (newimg*255).astype('uint8')
    return newimg

def resample(imgs, spacing, new_spacing, order=2):
    """
    原始CT分辨率往往不一致，为便于应用网络，需要统一分辨率
    """
    # shape为3个维度
    if len(imgs.shape)==3:
        # new_shape: 新的size
        new_shape = np.round(imgs.shape * spacing / new_spacing)
        # 真实的间距，因为shape必须为整数，所以指定的new_spacing不一定是真实的spacing
        true_spacing = spacing * imgs.shape / new_shape
        # 重构比例
        resize_factor = new_shape / imgs.shape
        # zoom()方法进行最近邻采样
        imgs = zoom(imgs, resize_factor, mode = 'nearest',order=order)
        return imgs, true_spacing
    # shape为4个维度   
    elif len(imgs.shape)==4:
        n = imgs.shape[-1]
        newimg = []
        for i in range(n):
            slice = imgs[:,:,:,i]
            newslice,true_spacing = resample(slice,spacing,new_spacing)
            newimg.append(newslice)
        newimg=np.transpose(np.array(newimg),[1,2,3,0])
        return newimg,true_spacing
    else:
        raise ValueError('wrong shape')

def worldToVoxelCoord(worldCoord, origin, spacing):
    """
    世界坐标转换为体素坐标
    """
    
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    # voxel应该是整数
    voxelCoord = stretchedVoxelCoord / spacing
    # 像素坐标必须为整型
    voxelCoord = voxelCoord.astype(np.int32)
    return voxelCoord

def process_mask(mask):
    """
    肺部Mask处理: 膨胀
    """
    convex_mask = np.copy(mask)
    for i_layer in range(convex_mask.shape[0]):
        # ascontiguousarray函数是将数组以连续数组的形式返回
        mask1  = np.ascontiguousarray(mask[i_layer])
        if np.sum(mask1)>0:
            # mask2是原mask的凸包mask
            mask2 = convex_hull_image(mask1)
            # 如果凸包mask比原mask大了50%，则用原mask继续计算
            if np.sum(mask2)>1.5*np.sum(mask1):
                mask2 = mask1
        else:
            mask2 = mask1
        convex_mask[i_layer] = mask2

    # 结构元素
    struct = generate_binary_structure(3,1)  
    # 用大小为struct来膨胀
    dilatedMask = binary_dilation(convex_mask,structure=struct,iterations=5) 
    return dilatedMask

def extract_fake_nodule_from_luna(id, annos, filelist, margin, luna_segment, luna_data, savepath):
    # 从一个病例CT中提取假阳性结节

    islabel = True
    isClean = True
    
    # 重采样的分辨率：1mm * 1mm * 1mm
    resolution = np.array([1,1,1])
    name = filelist[id]
    full_name = os.path.join(luna_data, name + ".mhd")
    
    """
    1、加载原始数据和肺部Mask
    """
    sliceim, origin, spacing, isflip = load_itk_image(full_name) # 获取3D图像，原点，间距，是否翻转
    
    Mask, origin, spacing, isflip = load_itk_image(os.path.join(luna_segment, name + ".mhd")) # 获取3D Mask
    
    if isflip:
        # 若原始图像有翻转(在x,y方向发生了翻转)，则Mask也发生了翻转，将其翻转回来
        Mask = Mask[:,::-1,::-1]
    # LUNA16的掩码有两种值，3和4,代表左肺和右肺
    m1 = Mask == 3
    m2 = Mask == 4
    Mask = m1+m2 # 将两种掩码合并为1
    
    """
    2、将图像与掩码叠加，并将数据HU值归一化到[-1200,600]，再转换为灰度值[0-255]
    """
    if isClean:
        # 将左肺和右肺mask分别进行膨胀
        dm1 = process_mask(m1)
        dm2 = process_mask(m2)
        dilatedMask = dm1 + dm2
        
        extramask = dilatedMask ^ Mask #异或操作，求出相比于原始掩码膨胀后多出来的区域
        bone_thresh = 210
        pad_value = 170
        
        if isflip:
            # 翻转原始图像
            sliceim = sliceim[:,::-1,::-1]
            print("flip!")
        sliceim = lumTrans(sliceim) # 对原始数据阈值化，并归一化
        sliceim = sliceim*dilatedMask+pad_value*(1-dilatedMask).astype("uint8") # 170对应归一化后的水，掩码外的区域补充为水
        bones = (sliceim*extramask)>bone_thresh # 210对应归一化后的骨头,由膨胀操作多出来的区域如果灰度值大于210，也被填充成水
        sliceim[bones] = pad_value
        
    """
    
    3、对上一步得到的图像重采样分辨率为1mm * 1mm * 1mm
    """
    sliceim1, _ = resample(sliceim, spacing, resolution, order=1) # 这里sliceim1与sliceim的大小已经不一样了
    # print(sliceim1.dtype) # 验证输出sliceim1数据的类型,经确认还是uint8类型。

    """
    4、读取标签，将其转换新的分辨率下的像素坐标
    """
    if islabel:
        # 注意: (name,x_mm,y_mm,z_mm,class)
        this_annos = np.copy(annos[annos[:,0]==(name)]) # 一行代表一个非结节，所以一个病例可能对应多行标签或病例无标签
        pix_pos = []
        
        if len(this_annos)>0:
            for c in this_annos:
                # 新分辨率下的坐标(记得先换为z,y,x)
                pos = worldToVoxelCoord(c[1:4][::-1], origin=origin, spacing=resolution) 
                if isflip:
                    pos[1:] = sliceim1.shape[1:3]-pos[1:]
                pix_pos.append(pos)
        pix_pos = np.array(pix_pos).astype(np.int32) # (z,y,x)
    
    """
    5、按坐标提取非结节patch
    """
    num_z, height, width = sliceim1.shape # 重采样后的CT尺寸
    
    if(len(pix_pos)>0):
        ids = 1
        for nodepos, old_nodepos in zip(pix_pos, this_annos):
            center_z, center_y, center_x = nodepos[0], nodepos[1], nodepos[2] # 结节中心坐标（新）
            
            up_y = int(center_y - margin) # 上界
            down_y = int(center_y + margin) # 下界
            left_x = int(center_x - margin) # 左界
            right_x = int(center_x + margin) # 右界

            H_min = max(up_y, 0)
            H_max = min(down_y, height)
            W_min = max(left_x, 0)
            W_max = min(right_x , width)
            
            try:
                # node = np.ndarray([3,p,p],dtype=np.uint8)
                
                # 判断坐标z
                if (center_z <= 0 or center_z >= num_z-1):    # z不符合要求
                    # 输出full_name + 原z坐标 + 新z坐标 + 新z轴大小
                    print("坐标z不符合要求：",full_name, "old_z: ", old_nodepos[3], "new_z: ", center_z, "new_z_num: ", num_z)
                else:   
                    # 判断坐标x和y 
                    if (center_y<0 or center_y>= height or center_x<0 or center_x>= width): # 坐标x或y不符合要求
                    # 输出full_name + 原z坐标 + 新的x坐标 + 新的y坐标
                        print("坐标x或y不符合要求：",full_name, "old_z: ", old_nodepos[3], "center_x: ", x, "center_y: ", y)
                    else:
                        node = sliceim1[center_z-1:center_z+2, H_min:H_max, W_min:W_max]
                        # y超出上界
                        if (up_y<0 and down_y<=height and left_x>=0 and right_x<=width):
                            node = np.pad(node, ((0,0), (abs(up_y),0), (0,0)), "constant", constant_values=182)
                        # y超出下界
                        elif (up_y>=0 and down_y>height and left_x>=0 and right_x<=width):
                            node = np.pad(node, ((0,0), (0, abs(down_y)), (0,0)), "constant", constant_values=182)
                        # x超出左界
                        elif (up_y>=0 and down_y<=height and left_x<0 and right_x<=width):
                            node = np.pad(node, ((0,0), (0,0), (abs(left_x),0)), "constant", constant_values=182)
                        # x超出右界
                        elif (up_y>=0 and down_y<=height and left_x>=0 and right_x>width):
                            node = np.pad(node, ((0,0), (0,0), (0,abs(right_x))), "constant", constant_values=182)
                        # y超出上界，x超出左界
                        elif (up_y<0 and down_y<=height and left_x<0 and right_x<=width):
                            node = np.pad(node, ((0,0), (abs(up_y),0), (abs(left_x),0)), "constant", constant_values=182)
                        # y超出上界，x超出右界
                        elif (up_y<0 and down_y<=height and left_x>=0 and right_x>width):
                            node = np.pad(node, ((0,0), (abs(up_y),0), (0,abs(right_x))), "constant", constant_values=182)
                        # y超出下界，x超出左界
                        elif (up_y>=0 and down_y>height and left_x<0 and right_x<=width):
                            node = np.pad(node, ((0,0), (0,abs(down_y)), (abs(left_x),0)), "constant", constant_values=182)
                        # y超出下届，x超出右界
                        elif (up_y>=0 and down_y>height and left_x>=0 and right_x>width):
                            node = np.pad(node, ((0,0), (0,abs(down_y)), (0,abs(right_x))), "constant", constant_values=182)  
                        # 转换方向z,y,x转为y,x,z
                        node = node.transpose((1,2,0))
                        # 保存结节为npy
                        np.save(os.path.join(savepath,name+"_fake_nodule_"+str(ids)+".npy"), node)
                ids = ids+1
            except Exception as e:
                print("process images %s error..."%str(name))
                print(Exception,":", e)

def preprocess_luna_fake():
    luna_data = config["luna_data"] # 存放Luna16的原始数据
    luna_segment = config["luna_segment"]  # 存放CT肺部掩码的路径
    savepath = config["fake_result_path"]  # 存放处理后阳性结节的路径
    # luna_non_nodule = config["luna_non_nodule"] # 存放所有病例阳性结节的标签文件
    luna_non_nodule = config["luna_class_nodule"]
    
    print("starting preprocessing luna fake nodule...")
    
    # 读取luna_non_nodule标签文件
    df_node = pd.read_csv(luna_non_nodule)
    # 筛选出class为0的行
    annos = np.array(df_node[df_node["class"]==0].dropna())

    # annos = np.array(pd.read_csv(luna_non_nodule))
    
    # 开启线程池
    pool = Pool()
    if not os.path.exists(savepath):
        os.mkdir(savepath)
    for setidx in range(10):
        print("process subset", setidx)
        
        filelist = [f.split(".mhd")[0] for f in os.listdir(os.path.join(luna_data,"subset"+str(setidx))) if f.endswith(".mhd")]
        if not os.path.exists(os.path.join(savepath,"subset"+str(setidx))):
            os.mkdir(os.path.join(savepath,"subset"+str(setidx)))
            
        partial_extract_fake_nodule_from_luna = partial(extract_fake_nodule_from_luna, annos=annos, 
                                                        filelist=filelist, margin=32, luna_segment=luna_segment, 
                                                        luna_data=os.path.join(luna_data,"subset"+str(setidx)), 
                                                        savepath=os.path.join(savepath,"subset"+str(setidx)))
        
        N = len(filelist)
        _ = pool.map(partial_extract_fake_nodule_from_luna, range(N))
    pool.close()
    pool.join()
        
    print("end preprocessing luna fake nodule")

if __name__=='__main__':
    preprocess_luna_fake()