In [2]:
import os, glob
import nibabel as nib
import numpy as np
import cv2
import shutil

from patchify import patchify
from PIL import Image

In [37]:
'''
Folders structure:

basepath contains train and test folders

in my case .../basepath/code/current_code_folder

CT - contains CT .nii files
slices_path - contains slices 512x512 png files
patches_path - contains patches 128x128 png files
_msk - contains corresponding CT/slices/patches masks files

_msk_ - contains masks that have white pixel 
_img_ - contains corresponding images

You can remove slices and immediately use patches,
this will use less memory, but slices makes it easier
to visualize and understand the process
'''

basepath = os.path.dirname(os.path.dirname((os.getcwd()).replace('\\', '/'))) + '/'

### train
train_CT_path = basepath + 'train/CT/'
train_CT_msk_path = basepath + 'train/CT_msk/' 

train_slices_path = basepath + 'train/slices/'
train_slices_msk_path = basepath + 'train/slices_msk/' 

train_patches_path = basepath + 'train/patches/'
train_patches_msk_path = basepath + 'train/patches_msk/' 

train_img_path = train_patches_path + 'img/'
train_msk_path = train_patches_msk_path + 'img/'

### test
test_CT_path = basepath + 'test/CT/'
test_CT_msk_path = basepath + 'test/CT_msk/'

test_slices_path = basepath + 'test/slices/'
test_slices_msk_path = basepath + 'test/slices_msk/'

test_patches_path = basepath + 'test/patches/'
test_patches_msk_path = basepath + 'test/patches_msk/' 

test_img_path = test_patches_path + 'img/'
test_msk_path = test_patches_msk_path + 'img/'

In [41]:
'''
Declaring constants.
Hounsfield scale Range
and Patch Sizes
'''
HOUNSFIELD_MIN = -200
HOUNSFIELD_MAX = 2000
HOUNSFIELD_RANGE = HOUNSFIELD_MAX - HOUNSFIELD_MIN

SLICE_X = False
SLICE_Y = False
SLICE_Z = True

IMG_SIZE = 128
PATCH_STEP = 128

SLICE_DECIMATE_IDENTIFIER = 3

In [52]:
def read_nii(filepath):
    '''
    Reads .nii file and returns pixel array
    '''
    ct_scan = nib.load(filepath)
    array   = ct_scan.get_fdata()
    array   = np.rot90(np.array(array))
    return(array)

def normalizeImageIntensityRange(img):
    '''
    CT normalization before slicing
    '''
    img[img < HOUNSFIELD_MIN] = HOUNSFIELD_MIN
    img[img > HOUNSFIELD_MAX] = HOUNSFIELD_MAX
    return (img - HOUNSFIELD_MIN) / HOUNSFIELD_RANGE

def readImageVolume(imgPath, normalize = False):
    '''
    Read nii and convert to normalized array if True,
    for msk set False because they dont need to be normalized
    Return numpy.ndarray
    '''
    img = read_nii(imgPath)
    #img = img[:, : , 30:-10]#to reduse dataset
    if(normalize):
        return normalizeImageIntensityRange(img)
    else:
        return img
    
def saveSlice(img, fname, path):
    '''
    Save slice as png
    img - image array (slice)
    fname - file name that will be use for saving
    path - saving dir
    '''
    img = np.uint8(img * 255)
    fout = os.path.join(path, f'{fname}.png')
    cv2.imwrite(fout, img)
    print(f'[+] Slice saved: {fout}', end='\r')
    
def sliceAndSaveVolumeImage(vol, fname, path):
    '''
    slice 3d image to 2d slices
    vol - 3d array of inital image
    fname - name of output slice without counter
    path - output path
    '''
    (dimx, dimy, dimz) = vol.shape
    print(dimx, dimy, dimz)
    cnt = 0
    if SLICE_X:
        cnt += dimx
        print('Slicing X: ')
        for i in range(dimx):
            saveSlice(vol[i,:,:], fname+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_x', path)
            
    if SLICE_Y:
        cnt += dimy
        print('Slicing Y: ')
        for i in range(dimy):
            saveSlice(vol[:,i,:], fname+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_y', path)
            
    if SLICE_Z:
        cnt += dimz
        print('Slicing Z: ')
        for i in range(dimz):
            saveSlice(vol[:,:,i], fname+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_z', path)
    return cnt
               
def calcium_msks(path):
    '''
    return list of masks, that have white pixels
    '''
    white = []
    # return files name list, with files that have white pixels
    for path_image in os.listdir(path):
        if os.path.isfile(path + path_image):
            image = Image.open(path + path_image) #Открываем изображение.
            a = np.array(image)
            if(np.unique(a).shape[0] == 2):
                white.append(path_image)           
    return white

def slice_ct(input_path, output_path, normalize = False):
    '''
    slice all CT from input_path to output_path
    if CT set normalize = True
    if masks set normalize = False
    '''
    for index, filename in enumerate(sorted(glob.iglob(input_path+'*.nii'))):
        img = readImageVolume(filename, normalize)
        print(filename, img.shape, np.sum(img.shape), np.min(img), np.max(img))
        numOfSlices = sliceAndSaveVolumeImage(img, 'lungh'+str(index), output_path)
        print(f'\n{filename}, {numOfSlices} slices created \n')
        
def split_slices_to_patches_128(input_path, output_path):
    '''
    This will facilitate the learning process for the GPU.
    And it will allow you to cut off the parts of the CT that do not contain the chest.
    For example, for all 512x512 slices, there is a black stripe on top, it's just air.
    '''
    counter = 0
    size = 128
    step = 128
    for path_image in os.listdir(input_path):
        if os.path.isfile(input_path + path_image):
            image = Image.open(input_path + '/'+ path_image) #Открываем изображение.
            a = np.array(image)
            a = a[63:447,:]# cut up and down 64 line
            patches_img = patchify(a, (size, size), step=step)
            for i in range(patches_img.shape[0]):
                for j in range(patches_img.shape[1]):
                    single_patch_img = patches_img[i,j,:,:]
                    #print(single_patch_img.shape)
                    img = Image.fromarray(single_patch_img)
                    img.save(output_path + 'image_' + str(counter) + '_' + f'-slice{(str(i)+str(j)).zfill(SLICE_DECIMATE_IDENTIFIER)}_z' + ".png")
            counter = counter + 1
            
def whitedataset(input_img_path, input_msk_path, output_img_path, output_msk_path):
    white_names = calcium_msks(input_msk_path)
    #сохраняем все белые маски
    for i in range(0, len(white_names)):
        shutil.copy(input_img_path + white_names[i], output_img_path)
        shutil.copy(input_msk_path + white_names[i], output_msk_path)

In [48]:
slice_ct(train_CT_path, train_slices_path, normalize = True)
slice_ct(train_CT_msk_path, train_slices_msk_path, normalize = False)

slice_ct(test_CT_path, test_slices_path, normalize = True)
slice_ct(test_CT_msk_path, test_slices_msk_path, normalize = False)

C:/Users/exebeche/Desktop/basepath/train/CT\024.nii (512, 512, 141) 1165 0.0 1.0
512 512 141
Slicing Z: 
[+] Slice saved: C:/Users/exebeche/Desktop/basepath/train/slices/lungh0-slice140_z.png
C:/Users/exebeche/Desktop/basepath/train/CT\024.nii, 141 slices created 

C:/Users/exebeche/Desktop/basepath/train/CT\102.nii (512, 512, 151) 1175 0.0 1.0
512 512 151
Slicing Z: 
[+] Slice saved: C:/Users/exebeche/Desktop/basepath/train/slices/lungh1-slice150_z.png
C:/Users/exebeche/Desktop/basepath/train/CT\102.nii, 151 slices created 

C:/Users/exebeche/Desktop/basepath/train/CT\109.nii (512, 512, 138) 1162 0.0 1.0
512 512 138
Slicing Z: 
[+] Slice saved: C:/Users/exebeche/Desktop/basepath/train/slices/lungh2-slice137_z.png
C:/Users/exebeche/Desktop/basepath/train/CT\109.nii, 138 slices created 

C:/Users/exebeche/Desktop/basepath/train/CT\113.nii (512, 512, 124) 1148 0.0 1.0
512 512 124
Slicing Z: 
[+] Slice saved: C:/Users/exebeche/Desktop/basepath/train/slices/lungh3-slice123_z.png
C:/Users/e

In [51]:
split_slices_to_patches_128(train_slices_path, train_patches_path)
split_slices_to_patches_128(train_slices_msk_path, train_patches_msk_path)

split_slices_to_patches_128(test_slices_path, test_patches_path)
split_slices_to_patches_128(test_slices_msk_path, test_patches_msk_path)

In [53]:
whitedataset(train_patches_path, train_patches_msk_path, train_img_path, train_msk_path)
whitedataset(test_patches_path, test_patches_msk_path, test_img_path, test_msk_path)

In [57]:
# sanity check
print(len(os.listdir(train_CT_path)))
print(len(os.listdir(train_CT_msk_path)))
print(len(os.listdir(train_slices_path)))
print(len(os.listdir(train_slices_msk_path)))
print(len(os.listdir(train_patches_path)))
print(len(os.listdir(train_patches_msk_path)))
print(len(os.listdir(train_img_path)))
print(len(os.listdir(train_msk_path)))

print(len(os.listdir(test_CT_path)))
print(len(os.listdir(test_CT_msk_path)))
print(len(os.listdir(test_slices_path)))
print(len(os.listdir(test_slices_msk_path)))
print(len(os.listdir(test_patches_path)))
print(len(os.listdir(test_patches_msk_path)))
print(len(os.listdir(test_img_path)))
print(len(os.listdir(test_msk_path)))

16
16
2097
2097
25165
25165
1952
1952
4
4
512
512
6145
6145
508
508
