data from http://medicaldecathlon.com/

First add a shortcut of the dataset to your google drive. Then substitute the path with your dataset path

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
!tar -xvf "/content/drive/MyDrive/FYP_colab/Task06_Lung.tar" -C "/content/Lung"     #[run this cell to extract tar files]

In [None]:
!pip install SimpleITK
!pip install monai

In [None]:
import SimpleITK as sitk  #We can also use other libraries. e.g., NiBabel
from monai.transforms import CropForeground

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io 
import keras
from tensorflow.keras.layers.experimental import preprocessing
import os,cv2
from tensorflow.keras.utils import normalize

from skimage.transform import resize
from tqdm import tqdm

# Data Aquizision

No need to run this cell every time, directly import the saved data volume next time

In [None]:
def threshold_at_one(x):
    # threshold at 1
    return x >= 1

cropper = CropForeground(select_fn=threshold_at_one, margin=0) # Instance the Cropforground

In [None]:
import glob

final_images = np.ones((1,512,512))
final_masks = np.ones((1,512,512))

for image_path in tqdm(glob.glob('/content/Lung/Task06_Lung/imagesTr/*')):
    filename = image_path[-15:-1] + image_path[-1]
    mask_path = '/content/Lung/Task06_Lung/labelsTr/' + filename

    itk_image = sitk.ReadImage(image_path)
    #Extract and save image data in numpy format
    image = sitk.GetArrayFromImage(itk_image)  #multidimensional array

    itk_mask = sitk.ReadImage(mask_path)
    #Extract and save image data in numpy format
    mask = sitk.GetArrayFromImage(itk_mask)  #multidimensional array

    # Filter the non-zero mask slices out
    index = np.where(np.sum(np.sum(mask, axis=1), axis = 1) > 0)
    filtered_mask = mask[index[0],:,:] # (N,512,512)
    filtered_image = image[index[0],:,:] # (N,512,512)

    # Concatenate the cropped masks/imgs to the mask/img array
    final_masks = np.concatenate((final_masks, filtered_mask), axis=0)
    final_images = np.concatenate((final_images, filtered_image), axis=0)

final_masks = final_masks[1:]
final_images = final_images[1:]

In [None]:
# Foreground cut and resize
cropped_images = np.ones((1,256,256))
cropped_masks = np.ones((1,256,256))

for i in tqdm(range(final_images.shape[0])):
    ToCropi = np.reshape(final_images[i], (1,512,512))
    ToCropm = np.reshape(final_masks[i], (1,512,512))

    bbox = cropper.compute_bounding_box(ToCropi)

    cropped_i = cropper.crop_pad(ToCropi, bbox[0], bbox[1], mode=None)
    cropped_m = cropper.crop_pad(ToCropm, bbox[0], bbox[1], mode=None)

    # plt.figure()
    # plt.subplot(121)
    # plt.imshow(cropped_i[0], cmap='gray')
    # plt.subplot(122)
    # plt.imshow(cropped_m[0], cmap='gray')
    # plt.show()

    cropped_resized_i = resize(cropped_i, (1, 256, 256),
                        anti_aliasing=True)
    cropped_resized_m = resize(cropped_m, (1, 256, 256),
                        anti_aliasing=True)

    cropped_images = np.concatenate((cropped_images, cropped_resized_i), axis=0)
    cropped_masks = np.concatenate((cropped_masks, cropped_resized_m), axis=0)

cropped_images = cropped_images[1:]
cropped_masks = cropped_masks[1:]

In [None]:
import random 
image_number = random.randint(0,len(final_masks))

fig,ax = plt.subplots(1,2,figsize=(5,3))

ax[0].imshow(cropped_masks[image_number,:,:], cmap=plt.cm.gray)
ax[0].axis('off')

ax[1].imshow(cropped_images[image_number,:,:], cmap=plt.cm.gray)
ax[1].axis('off')

plt.tight_layout()
#plt.savefig("../images/mask_bbox.png", bbox_inches="tight")
plt.show()

# Save data

In [None]:
np.save('Lung_image_volume', cropped_images)
np.save('Lung_mask_volume', cropped_masks)