In [1]:
from __future__ import division
import SimpleITK as sitk
import numpy
import os
import glob
import matplotlib.pyplot as plt
from skimage import transform as tf
import math 
from hurry.filesize import size
from hurry.filesize import alternative
from sys import getsizeof
import json
%matplotlib inline

Define location of Images to Train on

In [2]:
#locationImages = ['/Users/gattia/Data/mri/ski10Dataset/TrainingData-A/', '/Users/gattia/Data/mri/ski10Dataset/TrainingData-C/']
locationImages = ['/vol/data/TrainingData-A/', '/vol/data/TrainingData-C/']
location2SaveNumpy = '/vol/data/'

In [3]:
def importImageExtractArray(imageName):
    flipper = sitk.FlipImageFilter()
    flipper.SetFlipAxes([True, False, False])
    image = sitk.ReadImage(imageName)
    flippedImage = flipper.Execute(image)
    imageArray = sitk.GetArrayFromImage(image)
    flippedImageArray = sitk.GetArrayFromImage(flippedImage)
    return(imageArray, flippedImageArray)

def padImage(image, desiredShape):
    shapeOriginal = image.shape
    differenceX = desiredShape[0] - shapeOriginal[0]
    differenceY = desiredShape[1] - shapeOriginal[1]
    differenceZ = desiredShape[2] - shapeOriginal[2]
    halfDiffX = differenceX/2
    halfDiffY = differenceY/2
    halfDiffZ = differenceZ/2
    #Pad x- dimension
    if (differenceX % 2 == 0): 
        paddedArray = numpy.pad(image, [[int(halfDiffX), int(halfDiffX)], [0,0], [0,0]], 'constant', constant_values=(0))
    else: 
        paddedArray = numpy.pad(image, [[int(math.ceil(halfDiffX)), int(math.floor(halfDiffX))], [0,0], [0,0]], 'constant', constant_values=(0))
    
    #pad y-dimension
    if (differenceY % 2 == 0): 
        paddedArray = numpy.pad(paddedArray, [[0,0],[int(halfDiffY), int(halfDiffY)], [0,0]], 'constant', constant_values=(0))
    else: 
        paddedArray = numpy.pad(paddedArray, [[0,0],[int(math.ceil(halfDiffY)), int(math.floor(halfDiffY))], [0,0]], 'constant', constant_values=(0))
    
    #pad z-dimension
    if (differenceZ % 2 == 0): 
        paddedArray = numpy.pad(paddedArray, [[0,0], [0,0], [int(halfDiffZ), int(halfDiffZ)]], 'constant', constant_values=(0))
    else: 
        paddedArray = numpy.pad(paddedArray, [[0,0], [0,0], [int(math.ceil(halfDiffZ)), int(math.floor(halfDiffZ))]], 'constant', constant_values=(0))
    
    return(paddedArray)

Import images and labels from the two folders that contain them. Flip each image in the AP direction. This will double the sample. 

* Save all images including flipped in imagesDictionary. 
* Save all labels in labelsDictionary.

In [4]:
def memoryDictionary(dictionary):
    memory = 0
    for item in dictionary:
        memory += dictionary[item].nbytes
    sensicleMemory = size(memory, system=alternative)
    return(sensicleMemory)

In [5]:
imagesDictionary = {}
os.chdir(locationImages[0])
imageNames = glob.glob('image-*.mhd')
for imageName in imageNames:
    imagesDictionary[imageName[:9]], imagesDictionary[imageName[:9] + '-Flipped'] = importImageExtractArray(imageName)

# os.chdir(locationImages[1])
# imageNames = glob.glob('image-*.mhd')
# for imageName in imageNames: 
#     imagesDictionary[imageName[:9]], imagesDictionary[imageName[:9] + '-Flipped'] = importImageExtractArray(imageName)

In [6]:
imagesDictionary[imageName[:9]].dtype

dtype('int16')

In [7]:
print(memoryDictionary(imagesDictionary))

1 GB


In [8]:
labelsDictionary = {}
os.chdir(locationImages[0])
labelNames = glob.glob('labels-*.mhd')
for labelName in labelNames:
    labelsDictionary[labelName[:10]], labelsDictionary[labelName[:10] + '-Flipped'] = importImageExtractArray(labelName)

# os.chdir(locationImages[1])
# labelNames = glob.glob('labels-*.mhd')
# for labelName in labelNames:
#     labelsDictionary[labelName[:10]], labelsDictionary[labelName[:10] + '-Flipped'] = importImageExtractArray(labelName)    

In [9]:
print(memoryDictionary(labelsDictionary))

842 MB


In [10]:
labelsDictionary[labelName[:10]].dtype

dtype('uint8')

In [11]:
print(imagesDictionary[imageName[:9]].max())
print(labelsDictionary[labelName[:10]].max())

201
4


In [12]:
shape = labelsDictionary[labelName[:10]].shape
shape

(104, 356, 269)

In [13]:
smallest = [1000,1000,1000]
largest = [0,0,0]
for labelName in labelNames:
    shape = labelsDictionary[labelName[:10]].shape
    smallest = [min([smallest[0], shape[0]]),  min([smallest[1], shape[1]]), min([smallest[2], shape[2]])]
    largest = [max([largest[0], shape[0]]),  max([largest[1], shape[1]]), max([largest[2], shape[2]])]
print('Smallest Dimensions are: ' + str(smallest))
print('Largest Dimensions are: ' + str(largest))

Smallest Dimensions are: [92, 316, 259]
Largest Dimensions are: [120, 396, 343]


Code was run on this dataset to get the minimum and maximum dimensions. The result was: 

- MinSize is: [92, 314, 247]
- MaxSize is: [120, 437, 343]

This was used in determining how big to pad the images to be. 

Pad every slice of each image so that the resulting slices are of shape 450,350. This is slightly bigger than the biggest in plane resolution. I didnt want to register each image as I thought this might make the algorithm more robust to differences in alignment etc. We'll see how it works out.

The padding is done for both image and labels. 

In [14]:
img_rows, img_cols = 320, 256

In [17]:
imageDimensions

NameError: name 'imageDimensions' is not defined

In [18]:
for image in imagesDictionary:
    imageDimensions = imagesDictionary[image].shape
    imagesDictionary[image] = tf.resize(imagesDictionary[image], (imageDimensions[0], imageDimensions[1]/1.35, imageDimensions[2]/1.35), order=3)
    imagesDictionary[image] = padImage(imagesDictionary[image], [len(imagesDictionary[image][:,1,1]), img_rows, img_cols])
                                        
    labelName = ('labels-' + image[6:])
    labelsDictionary[labelName] = tf.resize(labelsDictionary[labelName], (imageDimensions[0], imageDimensions[1]/1.35, imageDimensions[2]/1.35), order=0)
    labelsDictionary[labelName] = padImage(labelsDictionary[labelName], [len(labelsDictionary[labelName][:,1,1]), img_rows, img_cols])

In [19]:
print(memoryDictionary(imagesDictionary))
print(memoryDictionary(labelsDictionary))

5 GB
5 GB


In [20]:
noSlices = numpy.zeros([1])
for image in imagesDictionary:
    noSlices = noSlices + len(imagesDictionary[image][:,1,1])
print(noSlices)

[ 8692.]


In [21]:
print(imagesDictionary[image].dtype)
print(labelsDictionary[labelName].dtype)

float64
float64


In [22]:
train_images = numpy.ndarray((noSlices, img_rows, img_cols), dtype=numpy.float32)
train_labels = numpy.ndarray((noSlices, img_rows, img_cols), dtype=numpy.uint8)
index=0
for image in imagesDictionary:
    imageSlices = len(imagesDictionary[image][:,1,1])
#     for imageSlice in range(len(imagesDictionary[image][:,1,1])):
    train_images[index:index+imageSlices] = imagesDictionary[image]
    train_labels[index:index+imageSlices] = labelsDictionary[('labels-' + image[6:])]
    index +=imageSlices
    imagesDictionary[image] = None
    labelsDictionary[('labels-' + image[6:])] = None
# numpy.save(location2SaveNumpy + 'train_images.npy', train_images)
# numpy.save(location2SaveNumpy + 'train_labels.npy', train_labels)
# print('Saving .npy files done')

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


In [None]:

gc.collect()
train_images = numpy.ndarray((noSlices, img_rows/2, img_cols/2), dtype=imagesDictionary[image].dtype)

In [None]:
import psutil

In [None]:
print(noSlices)
print(img_rows)
print(img_cols)


In [None]:
print(train_images.shape)
print(train_labels.shape)
imagesDictionary = None
labelsDictionary = None

In [None]:
(train_images.shape[0],1,train_images.shape[1], train_images.shape[2])

In [None]:
train_images = numpy.expand_dims(train_images, axis=1)
train_labels = numpy.expand_dims(train_labels, axis=1)
print(train_images.shape)
print(train_labels.shape)


In [None]:
numpy.save(location2SaveNumpy + 'train_images.npy', train_images)
numpy.save(location2SaveNumpy + 'train_labels.npy', train_labels)
print('Saving .npy files done')