In [None]:
!pip install awscli
import awscli
from google.colab import drive
!pip install pydicom
drive.mount("/content/drive", force_remount=True)

In [None]:
!cat /content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"

import os
!export AWS_SHARED_CREDENTIALS_FILE=/content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"
os.environ['AWS_SHARED_CREDENTIALS_FILE'] = path

!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/train.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/val.zip .
!aws s3 cp s3://medical-image-segmentation/lungs/70-10-20-resample/test.zip .

In [None]:
!unzip train.zip
!unzip val.zip
!unzip test.zip

In [None]:
!pip install SimpleITK==1.2.4
!pip install pydicom
import pydicom
import sys, os, glob
import SimpleITK as sitk
from pydicom import dcmread
import numpy as np
import logging
from helpers import fetch_contour_sop_instance_uid, contour_to_pixels, build_mask

# Load the scans in given folder path
def load_dicom(slice_list, split, patient_id):
    metadata = dcmread(os.path.join(split, patient_id, "metadata", '1-1.dcm'))
    slices = []
    mask_arrays = []
    num_with_rois = 0
    for s in slice_list:
        new_slice = pydicom.dcmread(s)
        sop_uid = new_slice.SOPInstanceUID
        contour_list = fetch_contour_sop_instance_uid(metadata, sop_uid)

        if len(contour_list) == 0:
            logging.info("Image file SOPInstanceUID %s has no ROI data, using empty mask...", sop_uid)
            contour_mask = np.zeros((512, 512), np.uint8)
        else:
            num_with_rois += 1
            contour_pixels = []
            for c in contour_list:
                contour_coord = c.ContourData
                contour_pixels.extend(contour_to_pixels(contour_coord, new_slice))
            contour_mask = build_mask(contour_pixels)

        slices.append(new_slice)
        mask_arrays.append(contour_mask)

    try:
        # seriesDesc = slices[0].SeriesDescription
        slices.sort(key=lambda x: float(x.ImagePositionPatient[2]))
        try:
            slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
        except:
            slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
    except Exception as e:
        print (e)
        #print 'No position found for image', slice_list[0]
        return []
    for s in slices:
        s.SliceThickness = slice_thickness

    img_spacing = [float(slices[0].PixelSpacing[0]),
    float(slices[0].PixelSpacing[1]), slice_thickness]
    img_direction = [int(i) for i in slices[0].ImageOrientationPatient] + [0, 0, 1]
    img_origin = slices[0].ImagePositionPatient

    mask = np.stack([m for m in mask_arrays])
    mask = mask.astype(np.int16)

    return slices, img_spacing, img_direction, img_origin, mask


def getPixelArray(slices):
    image = np.stack([s.pixel_array for s in slices])
    # Convert to int16 (from sometimes int16),
    # should be possible as values should always be low enough (<32k)
    image = image.astype(np.int16)
    print("pre: " + str(np.amax(image)))
    # Set outside-of-scan pixels to 0
    # The intercept is usually -1024, so air is approximately 0
    image[image == -2000] = 0
    # Convert to Hounsfield units (HU)
    for slice_number in range(len(slices)):
        intercept = slices[slice_number].RescaleIntercept
        slope = slices[slice_number].RescaleSlope
        if slope != 1:
            image[slice_number] = slope * image[slice_number].astype(np.float64)
            image[slice_number] = image[slice_number].astype(np.int16)
        image[slice_number] += np.int16(intercept)
    print("post: " + str(np.amax(image)))
    return np.array(image, dtype=np.int16)


def run_core(dicom_dir, split, patient_id):
    print ('Processing patient ', dicom_dir)
    dicomFiles = sorted(glob.glob(dicom_dir + '/*.dcm'))
    dicomFiles = sorted(dicomFiles)
    slices, img_spacing, img_direction, img_origin, mask = load_dicom(dicomFiles, split, patient_id)

    if 0.0 in img_spacing:
        print ('ERROR - Zero spacing found for patient,', seriesID, img_spacing)
        return ''

    imgCube = getPixelArray(slices)

    # Build the SITK nrrd image
    imgSitk = sitk.GetImageFromArray(imgCube)
    imgSitk.SetSpacing(img_spacing)
    imgSitk.SetDirection(img_direction)
    imgSitk.SetOrigin(img_origin)

    maskSitk = sitk.GetImageFromArray(mask)
    maskSitk.SetSpacing(img_spacing)
    maskSitk.SetDirection(img_direction)
    maskSitk.SetOrigin(img_origin)

    return imgSitk, maskSitk


def dcm_to_nrrd(split, dataset, patient_id, data_type, input_dir, output_dir, save=True):
    """
    Converts a stack of slices into a single .nrrd file and saves it.
    Args:
        dataset (str): Name of dataset.
        patient_id (str): Unique patient id.
        data_type (str): Type of data (e.g., ct, pet, mri..)
        input_dir (str): Path to folder containing slices.
        output_dir (str): Path to folder where nrrd will be saved.
        save (bool): If True, the nrrd file is saved
    Returns:
        The sitk image object.
    Raises:
        Exception if an error occurs.
    """
    # try:
    # nrrd_name = "{}_{}.nrrd".format(patient_id, "image")
    # mask_name = "{}_{}.nrrd".format(patient_id, "mask")
    # nrrd_file_path = os.path.join(output_dir, patient_id)
    # nrrd_mask_path = os.path.join(output_dir, patient_id)
    # sitk_object, mask_object = run_core(input_dir, split, patient_id)
    # print(sitk_object)
    # if save:
    #   if not os.path.exists(output_dir):
    #     os.makedirs(output_dir)
    #   nrrdWriter = sitk.ImageFileWriter()
    #   nrrdWriter.SetFileName(nrrd_file_path)
    #   nrrdWriter.SetUseCompression(True)
    #   nrrdWriter.Execute(sitk_object)

    #   nrrdWriter = sitk.ImageFileWriter()
    #   nrrdWriter.SetFileName(nrrd_mask_path)
    #   nrrdWriter.SetUseCompression(True)
    #   nrrdWriter.Execute(mask_object)
        
    # print ("dataset:{} patient_id:{} done!".format(dataset, patient_id))
    # return sitk_object, mask_object

    path = os.path.join(output_dir, patient_id)
    try:
      os.mkdir(path)
    except:
      "folder already created"
    nrrd_name = "image.nrrd"
    mask_name = "mask.nrrd" 
    nrrd_file_path = os.path.join(output_dir, patient_id, nrrd_name)
    nrrd_mask_path = os.path.join(output_dir, patient_id, mask_name)
    sitk_object, mask_object = run_core(input_dir, split, patient_id)
    if save:
        nrrdWriter = sitk.ImageFileWriter()
        nrrdWriter.SetFileName(nrrd_file_path)
        nrrdWriter.SetUseCompression(True)
        nrrdWriter.Execute(sitk_object)

        nrrdWriter = sitk.ImageFileWriter()
        nrrdWriter.SetFileName(nrrd_mask_path)
        nrrdWriter.SetUseCompression(True)
        nrrdWriter.Execute(mask_object)
        
    print ("dataset:{} patient_id:{} done!".format(dataset, patient_id))
    return sitk_object, mask_object
    # except Exception as e:
    #     print ("dataset:{} patient_id:{} error:{}".format(dataset, patient_id, e))


In [None]:
import SimpleITK as sitk
import sys
import os

def sitk_interpolation(path_to_nrrd, interpolation_type, new_spacing):

    data = sitk.ReadImage(path_to_nrrd)
    original_spacing = data.GetSpacing()
    original_size = data.GetSize()
    print('{} {}'.format('original size: ', original_size))
    print('{} {}'.format('original spacing: ', original_spacing))

    new_size = [int(round((original_size[0]*original_spacing[0])/float(new_spacing[0]))),
                int(round((original_size[1]*original_spacing[1])/float(new_spacing[1]))),
                int(round((original_size[2]*original_spacing[2])/float(new_spacing[2])))]

    print('{} {}'.format('new size: ', new_size))

    # http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/20_Expand_With_Interpolators.html
    if interpolation_type == 'linear':
        interpolation_type = sitk.sitkLinear
    elif interpolation_type == 'bspline':
        interpolation_type = sitk.sitkBSpline
    elif interpolation_type == 'nearest_neighbor':
        interpolation_type = sitk.sitkNearestNeighbor

    resampleImageFilter = sitk.ResampleImageFilter()
    new_image = resampleImageFilter.Execute(data,
                                            new_size,
                                            sitk.Transform(),
                                            interpolation_type,
                                            data.GetOrigin(),
                                            [float(x) for x in new_spacing],
                                            data.GetDirection(),
                                            0,
                                            data.GetPixelIDValue())
    new_image.SetSpacing(new_spacing)
    return new_image
    
def interpolate(dataset, patient_id, data_type, path_to_nrrd, interpolation_type, new_spacing, return_type, image_type, output_dir = ""):
    """
    Interpolates a given nrrd file to a given voxel spacing.
    Args:
        dataset (str): Name of dataset.
        patient_id (str): Unique patient id.
        data_type (str): Type of data (e.g., ct, pet, mri, lung(mask), heart(mask)..)
        path_to_nrrd (str): Path to nrrd file.
        interpolation_type (str): Either 'linear' (for images with continuous values), 'bspline' (also for images but will mess up the range of the values), or 'nearest_neighbor' (for masks with discrete values).
        new_spacing (tuple): Tuple containing 3 values for voxel spacing to interpolate to: (x,y,z).
        return_type (str): Either 'sitk_object' or 'numpy_array'.
        output_dir (str): Optional. If provided, nrrd file will be saved there. If not provided, file will not be saved.
    Returns:
        Either a sitk image object or a numpy array derived from it (depending on 'return_type').
    Raises:
        Exception if an error occurs.
    """
    try:
        new_sitk_object = sitk_interpolation(path_to_nrrd, interpolation_type, new_spacing)
        if output_dir != "":
            # write new nrrd
            try:
              os.mkdir(output_dir)
            except:
              print("directory already exists")
              
            writer = sitk.ImageFileWriter()
            writer.SetFileName(os.path.join(output_dir, "{}.nrrd".format(image_type)))
            writer.SetUseCompression(True)
            writer.Execute(new_sitk_object)

        if return_type == "sitk_object":
            return new_sitk_object
        elif return_type == "numpy_array":
            return sitk.GetArrayFromImage(new_sitk_object)

    except Exception as e:
        print ("Error in {}_{}, {}".format(dataset, patient_id, e))


In [None]:
from os import listdir
from os.path import join
import os

def convert_split_to_nrrd(split):
  try:
    os.mkdir("{}-nrrd".format(split))
  except:
    print("split already created")
  for patient in listdir(split):
    dcm_to_nrrd(split, "LCTSC", patient, "CT", join(split, patient, "images"), join("{}-nrrd".format(split)))

In [None]:
convert_split_to_nrrd("train")
convert_split_to_nrrd("val")
convert_split_to_nrrd("test")

In [None]:
import os
from os import listdir
from os.path import join

def save_resampled_nrrd(path):
  try:
    os.mkdir("{}-resampled".format(path))
  except:
    print('directory already exists')
  for patient in listdir(path):
    for file in listdir(join(path, patient)):
      if "image" in file:
        interpolate("LCTSC", patient, "CT", join(path, patient, file), "linear", (1, 1, 3), "sitk_object", "image", join("{}-resampled".format(path), patient))
      else:
        interpolate("LCTSC", patient, "CT", join(path, patient, file), "linear", (1, 1, 3), "sitk_object", "mask", join("{}-resampled".format(path), patient))

save_resampled_nrrd("train-nrrd")
save_resampled_nrrd("val-nrrd")
save_resampled_nrrd("test-nrrd")

In [None]:
!zip -r train-nrrd.zip train-nrrd
!zip -r test-nrrd.zip test-nrrd
!zip -r val-nrrd.zip val-nrrd

In [None]:
!cat /content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"

import os
!export AWS_SHARED_CREDENTIALS_FILE=/content/drive/My\ Drive/config/awscli.ini
path = "/content/drive/My Drive/config/awscli.ini"
os.environ['AWS_SHARED_CREDENTIALS_FILE'] = path

!aws s3 cp train-nrrd.zip s3://medical-image-segmentation/lungs/70-10-20-resample/
!aws s3 cp test-nrrd.zip s3://medical-image-segmentation/lungs/70-10-20-resample/
!aws s3 cp val-nrrd.zip s3://medical-image-segmentation/lungs/70-10-20-resample/