In [57]:
datadownloadpath = './datadownload/KiTS/'
datasavepath = './datapreprocessed/KiTS/'

In [58]:
import glob
import SimpleITK as sitk
import numpy as np
from scipy.ndimage import label, binary_fill_holes
import os

In [59]:
if os.path.exists(datasavepath + 'Image') == False:
    os.mkdir(datasavepath + 'Image')
    os.mkdir(datasavepath + 'Label')
    os.mkdir(datasavepath + 'processed')
    os.mkdir(datasavepath + 'mask')

In [60]:
# helper functions copy pasted
def resample_by_res(mov_img_obj, new_spacing, interpolator = sitk.sitkLinear, logging = True):
    resample = sitk.ResampleImageFilter()
    resample.SetInterpolator(interpolator)
    resample.SetOutputDirection(mov_img_obj.GetDirection())
    # fix the direction.
#     resample.SetOutputDirection(tuple(np.abs(mov_img_obj.GetDirection())))
    resample.SetOutputOrigin(mov_img_obj.GetOrigin())
    resample.SetUseNearestNeighborExtrapolator(True)
    mov_spacing = mov_img_obj.GetSpacing()

    resample.SetOutputSpacing(new_spacing)
    RES_COE = np.array(mov_spacing) * 1.0 / np.array(new_spacing)
    new_size = np.array(mov_img_obj.GetSize()) *  RES_COE 

    resample.SetSize( [int(sz+1) for sz in new_size] )
    if logging:
        print("Spacing: {} -> {}".format(mov_spacing, new_spacing))
        print("Size {} -> {}".format( mov_img_obj.GetSize(), new_size ))

    return resample.Execute(mov_img_obj)

def resample_lb_by_res(mov_lb_obj, new_spacing, interpolator = sitk.sitkLinear, ref_img = None, logging = True):
    src_mat = sitk.GetArrayFromImage(mov_lb_obj)
    lbvs = np.unique(src_mat)
    if logging:
        print("Label values: {}".format(lbvs))
    for idx, lbv in enumerate(lbvs):
        _src_curr_mat = np.float32(src_mat == lbv) 
        _src_curr_obj = sitk.GetImageFromArray(_src_curr_mat)
        _src_curr_obj.CopyInformation(mov_lb_obj)
        _tar_curr_obj = resample_by_res( _src_curr_obj, new_spacing, interpolator, logging )
        _tar_curr_mat = np.rint(sitk.GetArrayFromImage(_tar_curr_obj)) * lbv
        if idx == 0:
            out_vol = _tar_curr_mat
        else:
            out_vol[_tar_curr_mat == lbv] = lbv
    out_obj = sitk.GetImageFromArray(out_vol)
    out_obj.SetSpacing( _tar_curr_obj.GetSpacing() )
    if ref_img != None:
        out_obj.CopyInformation(ref_img)
    return out_obj

def remove_all_but_the_largest_connected_component(image: np.ndarray, for_which_classes: list, volume_per_voxel: float,
                                                   minimum_valid_object_size: dict = None):
    """
    removes all but the largest connected component, individually for each class
    :param image:
    :param for_which_classes: can be None. Should be list of int. Can also be something like [(1, 2), 2, 4].
    Here (1, 2) will be treated as a joint region, not individual classes (example LiTS here we can use (1, 2)
    to use all foreground classes together)
    :param minimum_valid_object_size: Only objects larger than minimum_valid_object_size will be removed. Keys in
    minimum_valid_object_size must match entries in for_which_classes
    :return:
    """
    if for_which_classes is None:
        for_which_classes = np.unique(image)
        for_which_classes = for_which_classes[for_which_classes > 0]

    assert 0 not in for_which_classes, "cannot remove background"
    largest_removed = {}
    kept_size = {}
    for c in for_which_classes:
        if isinstance(c, (list, tuple)):
            c = tuple(c)  # otherwise it cant be used as key in the dict
            mask = np.zeros_like(image, dtype=bool)
            for cl in c:
                mask[image == cl] = True
        else:
            mask = image == c
        # get labelmap and number of objects
        lmap, num_objects = label(mask.astype(int))

        # collect object sizes
        object_sizes = {}
        for object_id in range(1, num_objects + 1):
            object_sizes[object_id] = (lmap == object_id).sum() * volume_per_voxel

        largest_removed[c] = None
        kept_size[c] = None

        if num_objects > 0:
            # we always keep the largest object. We could also consider removing the largest object if it is smaller
            # than minimum_valid_object_size in the future but we don't do that now.
            maximum_size = max(object_sizes.values())
            kept_size[c] = maximum_size

            for object_id in range(1, num_objects + 1):
                # we only remove objects that are not the largest
                if object_sizes[object_id] != maximum_size:
                    # we only remove objects that are smaller than minimum_valid_object_size
                    remove = True
                    if minimum_valid_object_size is not None:
                        remove = object_sizes[object_id] < minimum_valid_object_size[c]
                    if remove:
                        image[(lmap == object_id) & mask] = 0
                        if largest_removed[c] is None:
                            largest_removed[c] = object_sizes[object_id]
                        else:
                            largest_removed[c] = max(largest_removed[c], object_sizes[object_id])
    return image, largest_removed, kept_size

In [61]:
IMG_FOLDER = datadownloadpath
SEG_FOLDER = datadownloadpath
imgs = glob.glob(IMG_FOLDER + "case_*/imaging.nii.gz")
imgs = [ fid for fid in sorted(imgs) ]
segs = [ fid for fid in sorted(glob.glob(SEG_FOLDER + "case_*/segmentation.nii.gz")) ]

pids = [pid.split("_")[-1].split("/")[0] for pid in imgs]

In [62]:
imgs

['./datadownload/KiTS/case_00000/imaging.nii.gz',
 './datadownload/KiTS/case_00001/imaging.nii.gz',
 './datadownload/KiTS/case_00002/imaging.nii.gz',
 './datadownload/KiTS/case_00003/imaging.nii.gz',
 './datadownload/KiTS/case_00004/imaging.nii.gz',
 './datadownload/KiTS/case_00005/imaging.nii.gz',
 './datadownload/KiTS/case_00006/imaging.nii.gz',
 './datadownload/KiTS/case_00007/imaging.nii.gz',
 './datadownload/KiTS/case_00008/imaging.nii.gz',
 './datadownload/KiTS/case_00009/imaging.nii.gz',
 './datadownload/KiTS/case_00010/imaging.nii.gz',
 './datadownload/KiTS/case_00011/imaging.nii.gz',
 './datadownload/KiTS/case_00012/imaging.nii.gz',
 './datadownload/KiTS/case_00013/imaging.nii.gz',
 './datadownload/KiTS/case_00014/imaging.nii.gz',
 './datadownload/KiTS/case_00015/imaging.nii.gz',
 './datadownload/KiTS/case_00016/imaging.nii.gz',
 './datadownload/KiTS/case_00017/imaging.nii.gz',
 './datadownload/KiTS/case_00018/imaging.nii.gz',
 './datadownload/KiTS/case_00019/imaging.nii.gz',


In [63]:
segs

['./datadownload/KiTS/case_00000/segmentation.nii.gz',
 './datadownload/KiTS/case_00001/segmentation.nii.gz',
 './datadownload/KiTS/case_00002/segmentation.nii.gz',
 './datadownload/KiTS/case_00003/segmentation.nii.gz',
 './datadownload/KiTS/case_00004/segmentation.nii.gz',
 './datadownload/KiTS/case_00005/segmentation.nii.gz',
 './datadownload/KiTS/case_00006/segmentation.nii.gz',
 './datadownload/KiTS/case_00007/segmentation.nii.gz',
 './datadownload/KiTS/case_00008/segmentation.nii.gz',
 './datadownload/KiTS/case_00009/segmentation.nii.gz',
 './datadownload/KiTS/case_00010/segmentation.nii.gz',
 './datadownload/KiTS/case_00011/segmentation.nii.gz',
 './datadownload/KiTS/case_00012/segmentation.nii.gz',
 './datadownload/KiTS/case_00013/segmentation.nii.gz',
 './datadownload/KiTS/case_00014/segmentation.nii.gz',
 './datadownload/KiTS/case_00015/segmentation.nii.gz',
 './datadownload/KiTS/case_00016/segmentation.nii.gz',
 './datadownload/KiTS/case_00017/segmentation.nii.gz',
 './datado

In [64]:
pids

['00000',
 '00001',
 '00002',
 '00003',
 '00004',
 '00005',
 '00006',
 '00007',
 '00008',
 '00009',
 '00010',
 '00011',
 '00012',
 '00013',
 '00014',
 '00015',
 '00016',
 '00017',
 '00018',
 '00019',
 '00020',
 '00021',
 '00022',
 '00023',
 '00024',
 '00025',
 '00026',
 '00027',
 '00028',
 '00029',
 '00030',
 '00031',
 '00032',
 '00033',
 '00034',
 '00035',
 '00036',
 '00037',
 '00038',
 '00039',
 '00040',
 '00041',
 '00042',
 '00043',
 '00044',
 '00045',
 '00046',
 '00047',
 '00048',
 '00049',
 '00050',
 '00051',
 '00052',
 '00053',
 '00054',
 '00055',
 '00056',
 '00057',
 '00058',
 '00059',
 '00060',
 '00061',
 '00062',
 '00063',
 '00064',
 '00065',
 '00066',
 '00067',
 '00068',
 '00069',
 '00070',
 '00071',
 '00072',
 '00073',
 '00074',
 '00075',
 '00076',
 '00077',
 '00078',
 '00079',
 '00080',
 '00081',
 '00082',
 '00083',
 '00084',
 '00085',
 '00086',
 '00087',
 '00088',
 '00089',
 '00090',
 '00091',
 '00092',
 '00093',
 '00094',
 '00095',
 '00096',
 '00097',
 '00098',
 '00099',


## Step 1, Data resample

In [65]:
targetspacing = [1.6, 1.6, 3.2]

In [66]:
pid = pids[0]
imgfile = imgs[0]
img_obj = sitk.ReadImage( imgfile )

In [67]:
for (imgfile, lblfile, pid) in zip(imgs, segs, pids):
    savename = 'kidney_' + pid + '.nii.gz'
    img_obj = sitk.ReadImage( imgfile )
    seg_obj = sitk.ReadImage( lblfile )
    
    pa = sitk.PermuteAxesImageFilter()
    pa.SetOrder([2,1,0])
    img_obj = pa.Execute(img_obj)
    seg_obj = pa.Execute(seg_obj)
    
    img_spa_ori = img_obj.GetSpacing()
    res_img_o = resample_by_res(img_obj, targetspacing, interpolator = sitk.sitkLinear,
                                    logging = True)
    res_lb_o = resample_lb_by_res(seg_obj, targetspacing, interpolator = sitk.sitkLinear,
                                  ref_img = res_img_o, logging = True)
    
    res_img_o = sitk.Flip(res_img_o, [False, False, True])
    res_lb_o = sitk.Flip(res_lb_o, [False, False, True])
    
    sitk.WriteImage(res_img_o, datasavepath + 'Image/' + savename, True) 
    sitk.WriteImage(res_lb_o, datasavepath + 'Label/' + savename, True) 

Spacing: (0.919921875, 0.919921875, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 611) -> [294.375   294.375    95.46875]
Label values: [0 1 2]
Spacing: (0.919921875, 0.919921875, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 611) -> [294.375   294.375    95.46875]
Spacing: (0.919921875, 0.919921875, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 611) -> [294.375   294.375    95.46875]
Spacing: (0.919921875, 0.919921875, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 611) -> [294.375   294.375    95.46875]
Spacing: (0.798828125, 0.798828125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 602) -> [255.625  255.625   94.0625]
Label values: [0 1 2]
Spacing: (0.798828125, 0.798828125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 602) -> [255.625  255.625   94.0625]
Spacing: (0.798828125, 0.798828125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 602) -> [255.625  255.625   94.0625]
Spacing: (0.798828125, 0.798828125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 602) -> [255.625  255.625   94.0625]
Spacing: (0.939453125, 0.939453125, 1.0)

Spacing: (0.8613280057907104, 0.8613280057907104, 2.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 121) -> [275.62496185 275.62496185  94.53125   ]
Label values: [0 1 2]
Spacing: (0.8613280057907104, 0.8613280057907104, 2.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 121) -> [275.62496185 275.62496185  94.53125   ]
Spacing: (0.8613280057907104, 0.8613280057907104, 2.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 121) -> [275.62496185 275.62496185  94.53125   ]
Spacing: (0.8613280057907104, 0.8613280057907104, 2.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 121) -> [275.62496185 275.62496185  94.53125   ]
Spacing: (0.9375, 0.9375, 3.75) -> [1.6, 1.6, 3.2]
Size (512, 512, 129) -> [300.       300.       151.171875]
Label values: [0 1 2]
Spacing: (0.9375, 0.9375, 3.75) -> [1.6, 1.6, 3.2]
Size (512, 512, 129) -> [300.       300.       151.171875]
Spacing: (0.9375, 0.9375, 3.75) -> [1.6, 1.6, 3.2]
Size (512, 512, 129) -> [300.       300.       151.171875]
Spacing: (0.9375, 0.9375, 3.75) -> [1.6, 1.6, 3.2]
Size (512, 512, 129

Spacing: (0.96484375, 0.96484375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 98) -> [308.75  308.75  153.125]
Label values: [0 1 2]
Spacing: (0.96484375, 0.96484375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 98) -> [308.75  308.75  153.125]
Spacing: (0.96484375, 0.96484375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 98) -> [308.75  308.75  153.125]
Spacing: (0.96484375, 0.96484375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 98) -> [308.75  308.75  153.125]
Spacing: (0.798828125, 0.798828125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 163) -> [255.625  255.625  152.8125]
Label values: [0 1 2]
Spacing: (0.798828125, 0.798828125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 163) -> [255.625  255.625  152.8125]
Spacing: (0.798828125, 0.798828125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 163) -> [255.625  255.625  152.8125]
Spacing: (0.798828125, 0.798828125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 163) -> [255.625  255.625  152.8125]
Spacing: (0.9375, 0.9375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 97) -> [30

Spacing: (0.95703125, 0.95703125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 673) -> [306.25    306.25    105.15625]
Label values: [0 1 2]
Spacing: (0.95703125, 0.95703125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 673) -> [306.25    306.25    105.15625]
Spacing: (0.95703125, 0.95703125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 673) -> [306.25    306.25    105.15625]
Spacing: (0.95703125, 0.95703125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 673) -> [306.25    306.25    105.15625]
Spacing: (0.7734375, 0.7734375, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 553) -> [247.5     247.5      86.40625]
Label values: [0 1 2]
Spacing: (0.7734375, 0.7734375, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 553) -> [247.5     247.5      86.40625]
Spacing: (0.7734375, 0.7734375, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 553) -> [247.5     247.5      86.40625]
Spacing: (0.7734375, 0.7734375, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 553) -> [247.5     247.5      86.40625]
Spacing: (0.9765625, 0.9765625, 5.0) -> [1.6, 1.6, 3

Label values: [0 1 2]
Spacing: (0.68359375, 0.68359375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 90) -> [218.75  218.75  140.625]
Spacing: (0.68359375, 0.68359375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 90) -> [218.75  218.75  140.625]
Spacing: (0.68359375, 0.68359375, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 90) -> [218.75  218.75  140.625]
Spacing: (0.732421875, 0.732421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 57) -> [234.375  234.375   89.0625]
Label values: [0 1 2]
Spacing: (0.732421875, 0.732421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 57) -> [234.375  234.375   89.0625]
Spacing: (0.732421875, 0.732421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 57) -> [234.375  234.375   89.0625]
Spacing: (0.732421875, 0.732421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 57) -> [234.375  234.375   89.0625]
Spacing: (0.703125, 0.703125, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 612) -> [225.    225.     95.625]
Label values: [0 1 2]
Spacing: (0.703125, 0.703125, 0.5) -> [1.6, 1.6, 3.2]
Size (5

Spacing: (0.7890620231628418, 0.7890620231628418, 2.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 199) -> [252.49984741 252.49984741 155.46875   ]
Spacing: (0.69140625, 0.69140625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 49) -> [221.25   221.25    76.5625]
Label values: [0 1 2]
Spacing: (0.69140625, 0.69140625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 49) -> [221.25   221.25    76.5625]
Spacing: (0.69140625, 0.69140625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 49) -> [221.25   221.25    76.5625]
Spacing: (0.69140625, 0.69140625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 49) -> [221.25   221.25    76.5625]
Spacing: (0.4375, 0.4375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 99) -> [140.     140.      92.8125]
Label values: [0 1 2]
Spacing: (0.4375, 0.4375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 99) -> [140.     140.      92.8125]
Spacing: (0.4375, 0.4375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 99) -> [140.     140.      92.8125]
Spacing: (0.4375, 0.4375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 99) -

Label values: [0 1 2]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 114) -> [250.    250.    178.125]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 114) -> [250.    250.    178.125]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 114) -> [250.    250.    178.125]
Spacing: (0.892578125, 0.892578125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 153) -> [285.625  285.625  143.4375]
Label values: [0 1 2]
Spacing: (0.892578125, 0.892578125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 153) -> [285.625  285.625  143.4375]
Spacing: (0.892578125, 0.892578125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 153) -> [285.625  285.625  143.4375]
Spacing: (0.892578125, 0.892578125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 153) -> [285.625  285.625  143.4375]
Spacing: (0.806640625, 0.806640625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 102) -> [258.125 258.125 159.375]
Label values: [0 1 2]
Spacing: (0.806640625, 0.806640625, 5.0) -> [1.6, 1.6, 3.2]
Size (

Label values: [0 1 2]
Spacing: (0.6859999895095825, 0.6859999895095825, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 79) -> [219.51999664 219.51999664 123.4375    ]
Spacing: (0.6859999895095825, 0.6859999895095825, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 79) -> [219.51999664 219.51999664 123.4375    ]
Spacing: (0.6859999895095825, 0.6859999895095825, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 79) -> [219.51999664 219.51999664 123.4375    ]
Spacing: (0.72265625, 0.72265625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 50) -> [231.25  231.25   78.125]
Label values: [0 1 2]
Spacing: (0.72265625, 0.72265625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 50) -> [231.25  231.25   78.125]
Spacing: (0.72265625, 0.72265625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 50) -> [231.25  231.25   78.125]
Spacing: (0.72265625, 0.72265625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 50) -> [231.25  231.25   78.125]
Spacing: (0.8691409826278687, 0.8691409826278687, 1.25) -> [1.6, 1.6, 3.2]
Size (512, 512, 389) -> [278.12511444 2

Spacing: (0.68359375, 0.68359375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 76) -> [218.75 218.75  71.25]
Label values: [0 1 2]
Spacing: (0.68359375, 0.68359375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 76) -> [218.75 218.75  71.25]
Spacing: (0.68359375, 0.68359375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 76) -> [218.75 218.75  71.25]
Spacing: (0.68359375, 0.68359375, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 76) -> [218.75 218.75  71.25]
Spacing: (0.80078125, 0.80078125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 103) -> [256.25   256.25   160.9375]
Label values: [0 1 2]
Spacing: (0.80078125, 0.80078125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 103) -> [256.25   256.25   160.9375]
Spacing: (0.80078125, 0.80078125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 103) -> [256.25   256.25   160.9375]
Spacing: (0.80078125, 0.80078125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 103) -> [256.25   256.25   160.9375]
Spacing: (0.82421875, 0.82421875, 1.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 279) -> [263.75   263.

Spacing: (0.810546875, 0.810546875, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 538) -> [259.375  259.375   84.0625]
Spacing: (0.9765625, 0.9765625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 987) -> [312.5     312.5     154.21875]
Label values: [0 1 2]
Spacing: (0.9765625, 0.9765625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 987) -> [312.5     312.5     154.21875]
Spacing: (0.9765625, 0.9765625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 987) -> [312.5     312.5     154.21875]
Spacing: (0.9765625, 0.9765625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 987) -> [312.5     312.5     154.21875]
Spacing: (0.8515625, 0.8515625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 548) -> [272.5   272.5    85.625]
Label values: [0 1 2]
Spacing: (0.8515625, 0.8515625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 548) -> [272.5   272.5    85.625]
Spacing: (0.8515625, 0.8515625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 548) -> [272.5   272.5    85.625]
Spacing: (0.8515625, 0.8515625, 0.5) -> [1.6, 1.6, 3.2]
Size (512, 512, 548) 

Spacing: (0.765999972820282, 0.765999972820282, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 91) -> [245.1199913 245.1199913 142.1875   ]
Spacing: (0.82421875, 0.82421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 97) -> [263.75   263.75   151.5625]
Label values: [0 1 2]
Spacing: (0.82421875, 0.82421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 97) -> [263.75   263.75   151.5625]
Spacing: (0.82421875, 0.82421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 97) -> [263.75   263.75   151.5625]
Spacing: (0.82421875, 0.82421875, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 97) -> [263.75   263.75   151.5625]
Spacing: (0.736328125, 0.736328125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 69) -> [235.625  235.625   64.6875]
Label values: [0 1 2]
Spacing: (0.736328125, 0.736328125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 69) -> [235.625  235.625   64.6875]
Spacing: (0.736328125, 0.736328125, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 69) -> [235.625  235.625   64.6875]
Spacing: (0.736328125, 0.736328125, 3.0) -> [1.

Label values: [0 1 2]
Spacing: (0.72265625, 0.72265625, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 161) -> [231.25   231.25   150.9375]
Spacing: (0.72265625, 0.72265625, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 161) -> [231.25   231.25   150.9375]
Spacing: (0.72265625, 0.72265625, 3.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 161) -> [231.25   231.25   150.9375]
Spacing: (0.7421879768371582, 0.7421879768371582, 1.25) -> [1.6, 1.6, 3.2]
Size (512, 512, 349) -> [237.50015259 237.50015259 136.328125  ]
Label values: [0 1 2]
Spacing: (0.7421879768371582, 0.7421879768371582, 1.25) -> [1.6, 1.6, 3.2]
Size (512, 512, 349) -> [237.50015259 237.50015259 136.328125  ]
Spacing: (0.7421879768371582, 0.7421879768371582, 1.25) -> [1.6, 1.6, 3.2]
Size (512, 512, 349) -> [237.50015259 237.50015259 136.328125  ]
Spacing: (0.7421879768371582, 0.7421879768371582, 1.25) -> [1.6, 1.6, 3.2]
Size (512, 512, 349) -> [237.50015259 237.50015259 136.328125  ]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 

Spacing: (0.884765625, 0.884765625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 89) -> [283.125  283.125  139.0625]
Label values: [0 1 2]
Spacing: (0.884765625, 0.884765625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 89) -> [283.125  283.125  139.0625]
Spacing: (0.884765625, 0.884765625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 89) -> [283.125  283.125  139.0625]
Spacing: (0.884765625, 0.884765625, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 89) -> [283.125  283.125  139.0625]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 102) -> [250.    250.    159.375]
Label values: [0 1 2]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 102) -> [250.    250.    159.375]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 102) -> [250.    250.    159.375]
Spacing: (0.78125, 0.78125, 5.0) -> [1.6, 1.6, 3.2]
Size (512, 512, 102) -> [250.    250.    159.375]


## Step 2, Intensity Normalization

In [68]:
IMG_FOLDER_SAVE = datasavepath + 'Image/'
imgs = glob.glob(IMG_FOLDER_SAVE + "/kidney_*.nii.gz")
imgs = [ fid for fid in sorted(imgs) ]

In [69]:
pixel_mean = 101
pixel_std = 76.9
intensity_05percent = -79
intensity_995percent = 304

In [70]:
for (imgfile, pid) in zip(imgs, pids):
    savename = 'kidney_' + pid + '.nii.gz'
    img_obj = sitk.ReadImage( imgfile )
    
    array = sitk.GetArrayFromImage(img_obj)

    array[array < intensity_05percent] = intensity_05percent
    array[array > intensity_995percent] = intensity_995percent
    array = (array - pixel_mean) / pixel_std

    normalized_img = sitk.GetImageFromArray(array)
    normalized_img.CopyInformation(img_obj)
    
    sitk.WriteImage(normalized_img, datasavepath + 'processed/' + savename, True) 
    print('Id ' + pid + ' normalized')

Id 00000 normalized
Id 00001 normalized
Id 00002 normalized
Id 00003 normalized
Id 00004 normalized
Id 00005 normalized
Id 00006 normalized
Id 00007 normalized
Id 00008 normalized
Id 00009 normalized
Id 00010 normalized
Id 00011 normalized
Id 00012 normalized
Id 00013 normalized
Id 00014 normalized
Id 00015 normalized
Id 00016 normalized
Id 00017 normalized
Id 00018 normalized
Id 00019 normalized
Id 00020 normalized
Id 00021 normalized
Id 00022 normalized
Id 00023 normalized
Id 00024 normalized
Id 00025 normalized
Id 00026 normalized
Id 00027 normalized
Id 00028 normalized
Id 00029 normalized
Id 00030 normalized
Id 00031 normalized
Id 00032 normalized
Id 00033 normalized
Id 00034 normalized
Id 00035 normalized
Id 00036 normalized
Id 00037 normalized
Id 00038 normalized
Id 00039 normalized
Id 00040 normalized
Id 00041 normalized
Id 00042 normalized
Id 00043 normalized
Id 00044 normalized
Id 00045 normalized
Id 00046 normalized
Id 00047 normalized
Id 00048 normalized
Id 00049 normalized


## Step 3, Generate Body Masks

In [71]:
IMG_FOLDER_SAVE = datasavepath + 'Image/'
imgs = glob.glob(IMG_FOLDER_SAVE + "/kidney_*.nii.gz")
imgs = [ fid for fid in sorted(imgs) ]

In [72]:
threshold = -200

In [73]:
for (imgfile, pid) in zip(imgs, pids):
    savename = 'kidney_' + pid + '.nii.gz'
    img_obj = sitk.ReadImage( imgfile )
    array = sitk.GetArrayFromImage(img_obj)
    array = array > threshold

    array_component = remove_all_but_the_largest_connected_component(array, [1], targetspacing)

    array_mask = array_component[0].astype(int)
    array_mask = binary_fill_holes(array_mask).astype(float)

    mask_img = sitk.GetImageFromArray(array_mask)
    mask_img.CopyInformation(img_obj)
    
    sitk.WriteImage(mask_img, datasavepath + 'mask/' + savename, True) 