In [None]:
import os
from os import listdir
from os.path import join
import logging
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
from dcm_to_nrrd import dcm_to_nrrd
from interpolate import interpolate
import SimpleITK as sitk

np.set_printoptions(threshold=np.inf)

In [None]:
!cat /content/drive/My\ Drive/config/awscli.ini
!export AWS_SHARED_CREDENTIALS_FILE=/content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"
os.environ['AWS_SHARED_CREDENTIALS_FILE'] = path

!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/train.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/val.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/test.zip .

In [None]:
!unzip train.zip
!unzip val.zip
!unzip test.zip

In [None]:
def convert_split_to_nrrd(split):
    """
    Converts dcm files to nrrd file format
    Args:
        split (str): train, test or val - specifies dataset to convert
    Returns:
        None
    """
    try:
        os.mkdir("{}-nrrd".format(split))
    except:
        logging.debug("split already created")
    for patient in listdir(split):
        dcm_to_nrrd(split, "LCTSC", patient, "CT", join(split, patient, "images"),
                    join("{}-nrrd".format(split)))

In [None]:
convert_split_to_nrrd("train")
convert_split_to_nrrd("val")
convert_split_to_nrrd("test")

In [None]:
def save_resampled_nrrd(path, dims):
    """
    Resamples nrrd files to new voxel spacing determined by dims
    Args:
        path (str): path to folder containing nrrd files
        dims (tuple): length 3 tuple containing resampled nrrd pixel dims (x, y, z)
    Returns:
        None
    """
    try:
        os.mkdir("{}-resampled".format(path))
    except:
        logging.debug('directory already exists')
    for patient in listdir(path):
        logging.info("resampling:" + patient)
        for file in listdir(join(path, patient)):
            if "image" in file:
                interpolate("LCTSC", patient, "CT", join(path, patient, file), "linear", dims,
                            "sitk_object", "image", join("{}-resampled".format(path), patient), False)
            else:
                interpolate("LCTSC", patient, "CT", join(path, patient, file), "linear", dims,
                            "sitk_object", "mask", join("{}-resampled".format(path), patient), True)

save_resampled_nrrd("train-nrrd", (1, 1, 3))
save_resampled_nrrd("val-nrrd", (1, 1, 3))
save_resampled_nrrd("test-nrrd", (1, 1, 3))

In [None]:
!rm train-nrrd-resampled/ --recursive
!rm test-nrrd-resampled/ --recursive
!rm val-nrrd-resampled/ --recursive

In [None]:
!zip -r train-nrrd.zip train-nrrd
!zip -r test-nrrd.zip test-nrrd
!zip -r val-nrrd.zip val-nrrd

!zip -r train-nrrd-resampled.zip train-nrrd-resampled
!zip -r test-nrrd-resampled.zip test-nrrd-resampled
!zip -r val-nrrd-resampled.zip val-nrrd-resampled

In [None]:
!aws s3 cp train-nrrd.zip s3://medical-image-segmentation/lungs/smaller-resampled/
!aws s3 cp test-nrrd.zip s3://medical-image-segmentation/lungs/smaller-resampled/
!aws s3 cp val-nrrd.zip s3://medical-image-segmentation/lungs/smaller-resampled/

!aws s3 cp train-nrrd-resampled.zip s3://medical-image-segmentation/lungs/smaller-resampled/
!aws s3 cp test-nrrd-resampled.zip s3://medical-image-segmentation/lungs/smaller-resampled/
!aws s3 cp val-nrrd-resampled.zip s3://medical-image-segmentation/lungs/smaller-resampled/

## Helper functions for debugging
- show_nrrd displays all slices for a given patient in nrrd file format
- convert convert_nrrd_to_png creates png images for each slice in the nrrd file which can be used to train a 2D model, discarding empty image slices

In [None]:
def show_nrrd(path):
    """
    Displays nrrd files in grid format
    Args:
        path (str): path to folder containing nrrd files
    Returns:
        None
    """
    for patient in listdir(path):
        logging.info('showing patient: ' + patient)
        img_data = sitk.GetArrayFromImage(sitk.ReadImage(join(path, patient, "image.nrrd")))
        mask_data = sitk.GetArrayFromImage(sitk.ReadImage(join(path, patient, "mask.nrrd")))

        img = img_data.reshape((img_data.shape[0], img_data.shape[1], img_data.shape[2], 1))
        mask = mask_data.reshape((mask_data.shape[0], mask_data.shape[1], mask_data.shape[2], 1))

        rows = len(img) // 10 + 1
        plt.axis('off')
        fig = plt.figure(figsize=(30, 20), dpi=100)
        for i, array in enumerate(img):
            img_slice = array.reshape((img_data.shape[1], img_data.shape[2]))
            mask_slice = mask[i].reshape((mask_data.shape[1], mask_data.shape[2]))

            ax_img = fig.add_subplot(rows, 20, i*2+1)
            ax_img.imshow(img_slice, cmap="gray")
            ax_img.set_axis_off()
            ax_mask = fig.add_subplot(rows, 20, i*2+2)
            ax_mask.imshow(mask_slice, cmap="gray")
            ax_mask.set_axis_off()
        plt.show()

In [None]:
show_nrrd("test-nrrd-resampled")

In [None]:
def convert_nrrd_to_png(path):
    """
    Converts nrrd file to multiple png files which can be used in 2D training
    Args:
        path (str): path to folder containing nrrd files
    Returns:
        None
    """
    output_folder = join(path + '-output')
    logging.info("output folder:", output_folder)
    try:
        os.mkdir(output_folder)
    except:
        logging.debug("output folder already exists")

    try:
        os.mkdir(join(output_folder, "images"))
    except:
        logging.debug("images folder already exists")

    try:
        os.mkdir(join(output_folder, "masks"))
    except:
        logging.debug("masks folder already exists")

    try:
        os.mkdir(join(output_folder, "images", "lung_l"))
    except:
        logging.debug("images folder already exists")

    try:
    os.mkdir(join(output_folder, "masks", "lung_l"))
    except:
    logging.debug("masks folder already exists")

    for patient in listdir(path):
    logging.info("saving:", patient)
    img_data = sitk.GetArrayFromImage(sitk.ReadImage(join(path, patient, "image.nrrd")))
    mask_data = sitk.GetArrayFromImage(sitk.ReadImage(join(path, patient, "mask.nrrd")))
    for i in range(len(img_data)):
        if np.amax(img_data[i]) == 0:
            logging.debug("not saving empty image")
        else:
            x, y = img_data[i].shape
            new_array = np.zeros((704, 704))
            new_mask = np.zeros((704, 704))
            diff = (704 - x) // 2
            new_array[diff:diff+x, diff:diff+y] = img_data[i]
            new_mask[diff:diff+x, diff:diff+y] = mask_data[i]
            plt.imsave(f"{output_folder}/images/lung_l/{patient}-{i}.png", new_array, cmap="gray")
            plt.imsave(f"{output_folder}/masks/lung_l/{patient}-{i}.png", new_mask, cmap="gray")
            # plt.imsave(f"{output_folder}/images/lung_l/{patient}-{i}.png", img_data[i], cmap="gray")
            # plt.imsave(f"{output_folder}/masks/lung_l/{patient}-{i}.png", mask_data[i], cmap="gray")
    logging.debug(diff)
    

In [None]:
convert_nrrd_to_png("train-nrrd-resampled")
convert_nrrd_to_png("val-nrrd-resampled")
convert_nrrd_to_png("test-nrrd-resampled")

In [None]:
!zip train-nrrd-resampled-output.zip train-nrrd-resampled-output -r
!zip val-nrrd-resampled-output.zip val-nrrd-resampled-output -r
!zip test-nrrd-resampled-output.zip test-nrrd-resampled-output -r

In [None]:
!aws s3 cp train-nrrd-resampled-output.zip s3://medical-image-segmentation/lungs/70-10-20-resample/
!aws s3 cp test-nrrd-resampled-output.zip s3://medical-image-segmentation/lungs/70-10-20-resample/
!aws s3 cp val-nrrd-resampled-output.zip s3://medical-image-segmentation/lungs/70-10-20-resample/