Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Add data generation, training and prediction code for nodule segmenta…
Browse files Browse the repository at this point in the history
…tion
  • Loading branch information
WGierke committed Oct 24, 2017
1 parent fc25fb0 commit 8d412a0
Show file tree
Hide file tree
Showing 559 changed files with 536 additions and 93 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ test/assets/test_image_data/full/**/* filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.hd5 filter=lfs diff=lfs merge=lfs -text
*.hdf5 filter=lfs diff=lfs merge=lfs -text
*.mhd filter=lfs diff=lfs merge=lfs -text
*.raw filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
Expand Down
1 change: 1 addition & 0 deletions compose/prediction/Dockerfile-dev
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python3.6 get-pip.py
RUN ln -s /usr/bin/python3.6 /usr/local/bin/python
# Requirements have to be pulled and installed here, otherwise caching won't work
COPY ./prediction/.pylidcrc /root/.pylidcrc
COPY ./prediction/requirements/local.txt /requirements/prediction/local.txt
RUN pip install -r /requirements/prediction/local.txt
COPY ./prediction/requirements/torch.txt /requirements/prediction/torch.txt
Expand Down
3 changes: 3 additions & 0 deletions prediction/.pylidcrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[dicom]
path = /images_full
warn = True
10 changes: 8 additions & 2 deletions prediction/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,18 @@
Provides the flask config options
"""
from os import getenv
import os


class Config(object):
PROD_SERVER = getenv('PRODUCTION', False)
PROD_SERVER = os.getenv('PRODUCTION', False)
DEBUG = False
# The following paths are expanded at runtime
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
SEGMENT_ASSETS_DIR = os.path.abspath(os.path.join(CURRENT_DIR, 'src', 'algorithms', 'segment', 'assets'))
DICOM_PATHS_DOCKER_WILDCARD = os.path.join('/images_full', 'LIDC-IDRI-*', '**', '**')
DICOM_PATHS_LOCAL_WILDCARD = os.path.join(CURRENT_DIR, 'src', 'tests', 'assets',
'test_image_data', 'full', 'LIDC-IDRI-*', '**', '**')


class Production(Config):
Expand Down
2 changes: 1 addition & 1 deletion prediction/requirements/local.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
flake8==3.3.0
pytest==3.1.3
pylidc==0.1.8
pylidc==0.1.9 # Fixes UnicodeDecodeError in to_volume() (https://github.com/pylidc/pylidc/issues/9)
66 changes: 0 additions & 66 deletions prediction/src/algorithms/identify/helpers.py

This file was deleted.

11 changes: 6 additions & 5 deletions prediction/src/algorithms/identify/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from keras.models import Model
from keras.optimizers import SGD

from . import helpers
from ...preprocess.lung_segmentation import rescale_patient_images

CUBE_SIZE = 32
CROP_SIZE = 32
Expand Down Expand Up @@ -160,11 +160,11 @@ def prepare_data(patient_id, magnification=1):
"""
patient_img = load_patient_images(patient_id, wildcard="*_i.png", exclude_wildcards=[])
if magnification != 1:
patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)
patient_img = rescale_patient_images(patient_img, (1, 1, 1), magnification)

patient_mask = load_patient_images(patient_id, wildcard="*_m.png", exclude_wildcards=[])
if magnification != 1:
patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)
patient_mask = rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

predict_volume_shape_list = [0, 0, 0]
for dim in range(3):
Expand Down Expand Up @@ -225,7 +225,6 @@ def predict_cubes(model_path, patient_id, magnification=1, ext_name=""):

for patient_index, patient_id in enumerate(reversed(patient_ids)):
logging.info(patient_index, ": ", patient_id)

patient_img, patient_mask, predict_volume = prepare_data(patient_id, magnification)
patient_predictions_csv = annotate(model, predict_volume, patient_img, patient_mask)

Expand Down Expand Up @@ -281,8 +280,10 @@ def annotate(model, predict_volume, patient_img, patient_mask):
continue

if CROP_SIZE != CUBE_SIZE:
cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))
cube_img = rescale_patient_images(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

# if you want to consider CROP_SIZE != CUBE_SIZE, see PR #147 for rescale_patient_images2 which
# rescales input images to support this case
batch_list_coords.append((z, y, x))
img_prep = prepare_image_for_net3D(cube_img)
batch_list.append(img_prep)
Expand Down
3 changes: 3 additions & 0 deletions prediction/src/algorithms/segment/assets/best_model.hdf5
Git LFS file not shown
3 changes: 3 additions & 0 deletions prediction/src/algorithms/segment/assets/lung-mask.npy
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Empty file.
60 changes: 60 additions & 0 deletions prediction/src/algorithms/segment/src/data_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import glob
import os

import numpy as np
import pylidc as pl
from config import Config


def prepare_training_data(in_docker=True):
"""Save a boolean mask of each DICOM scan at ../assets/segmented_lung_patient_{LIDC-ID}.npy that indicates whether
a pixel was annotate by an expert as at least intermediate malicious or not.
Args:
in_docker: whether this method is invoked from within docker or from the prediction directory
"""
INTERMEDIATE_MALICIOUS = 3
assets_dir = Config.SEGMENT_ASSETS_DIR
dicom_wildcard = Config.DICOM_PATHS_DOCKER_WILDCARD

dicom_paths = sorted(glob.glob(dicom_wildcard))
for path in dicom_paths:
directories = path.split(os.path.sep)
lidc_id_path_index = 2 if in_docker else 5
lidc_id = directories[lidc_id_path_index]
lung_patient_file = os.path.join(assets_dir, "segmented_lung_patient_{}".format(lidc_id))

if os.path.isfile(lung_patient_file):
continue

# Compute and save binary mask with information whether pixel is cancerous
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == lidc_id).first()
if scan is None:
print("Scan for path '{}' was not found".format(path))
continue
vol = scan.to_volume(verbose=False) # Leading zeros have to be removed from the DICOM file names

# mask_vol is a boolean, indicator volume for the first annotation of the scan.
mask_vol = np.zeros(vol.shape, dtype=np.bool)

# Load DICOM files and obtain z-coords for each slice, so we can index into them.
dicoms = scan.load_all_dicom_images(verbose=False)
zs = [float(img.ImagePositionPatient[2]) for img in dicoms]

cancerous_annotations = pl.query(pl.Annotation).filter(pl.Annotation.malignancy >= INTERMEDIATE_MALICIOUS,
pl.Annotation.scan_id == scan.id).all()

for annotation in cancerous_annotations:
mask, bbox = annotation.get_boolean_mask(return_bbox=True)

# Obtain indexes of `mask` into `mask_vol`
i1, i2 = bbox[0].astype(np.int)
j1, j2 = bbox[1].astype(np.int)

k1 = zs.index(bbox[2, 0])
k2 = zs.index(bbox[2, 1])

# In case the area already was segmented, don't overwrite it but add the annotated segmentation
annotation_area = np.index_exp[i1:i2 + 1, j1:j2 + 1, k1:k2 + 1]
mask_vol[annotation_area] = np.logical_or(mask, mask_vol[annotation_area])
np.save(lung_patient_file, mask_vol)
144 changes: 144 additions & 0 deletions prediction/src/algorithms/segment/src/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import numpy as np
from keras import backend as K
from keras.engine import Input, Model
from keras.layers import Conv3D, MaxPooling3D, UpSampling3D, Activation
from keras.layers.merge import concatenate
from keras.optimizers import Adam


def simple_model_3d(input_shape, downsize_filters_factor=32, pool_size=(2, 2, 2), n_labels=1,
initial_learning_rate=0.01):
"""
Builds a simple 3D classification model.
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels).
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will
reduce the amount of memory the model will need during training.
:param pool_size: Pool size for the max pooling operations.
:param n_labels: Number of binary labels that the model is learning.
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training.
:return: Untrained simple 3D Model
"""
inputs = Input(input_shape)
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling3D(pool_size=pool_size)(conv1)
conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1)
up1 = UpSampling3D(size=pool_size)(conv2)
conv8 = Conv3D(n_labels, (1, 1, 1))(up1)
act = Activation('sigmoid')(conv8)
model = Model(inputs=inputs, outputs=act)

model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef])

return model


def unet_model_3d(input_shape, downsize_filters_factor=1, pool_size=(2, 2, 2), n_labels=1,
initial_learning_rate=0.01, deconvolution=False):
"""
Builds the 3D U-Net Keras model.
The [U-Net](https://arxiv.org/abs/1505.04597) uses a fully-convolutional architecture consisting of an encoder and
a decoder. The encoder is able to capture contextual information while the decoder enables precise localization.
Due to the large amount of parameters, the input shape has to be small since for e.g. images of shape 144x144x144
the model already consumes 32 GB of memory.
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels).
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will
reduce the amount of memory the model will need during training.
:param pool_size: Pool size for the max pooling operations.
:param n_labels: Number of binary labels that the model is learning.
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training.
:param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of upsamping. This
increases the amount memory required during training.
:return: Untrained 3D UNet Model
"""
inputs = Input(input_shape)
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling3D(pool_size=pool_size)(conv1)

conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling3D(pool_size=pool_size)(conv2)

conv3 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv3)
print(conv3.shape)
pool3 = MaxPooling3D(pool_size=pool_size)(conv3)

conv4 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv3D(int(512 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv4)
print(conv4.shape)

up5 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=2,
nb_filters=int(512 / downsize_filters_factor), image_shape=input_shape[-3:])(conv4)
print(up5.shape)
up5 = concatenate([up5, conv3], axis=4)
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up5)
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv5)

up6 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=1,
nb_filters=int(256 / downsize_filters_factor), image_shape=input_shape[-3:])(conv5)
up6 = concatenate([up6, conv2], axis=4)
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up6)
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv6)

up7 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=0,
nb_filters=int(128 / downsize_filters_factor), image_shape=input_shape[-3:])(conv6)
up7 = concatenate([up7, conv1], axis=4)
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up7)
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv7)

conv8 = Conv3D(n_labels, (1, 1, 1))(conv7)
act = Activation('sigmoid')(conv8)
model = Model(inputs=inputs, outputs=act)

model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef])

return model


def dice_coef(y_true, y_pred, smooth=1.):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)


def compute_level_output_shape(filters, depth, pool_size, image_shape):
"""
Each level has a particular output shape based on the number of filters used in that level and the depth or number
of max pooling operations that have been done on the data at that point.
:param image_shape: shape of the 3d image.
:param pool_size: the pool_size parameter used in the max pooling operation.
:param filters: Number of filters used by the last node in a given level.
:param depth: The number of levels down in the U-shaped model a given node is.
:return: 5D vector of the shape of the output node
"""
if depth != 0:
output_image_shape = np.divide(image_shape, np.multiply(pool_size, depth)).tolist()
else:
output_image_shape = image_shape
return tuple([None, filters] + [int(x) for x in output_image_shape])


def get_upconv(depth, nb_filters, pool_size, image_shape, kernel_size=(2, 2, 2), strides=(2, 2, 2),
deconvolution=False):
if deconvolution:
try:
from keras_contrib.layers import Deconvolution3D
except ImportError:
raise ImportError("Install keras_contrib in order to use deconvolution. Otherwise set deconvolution=False.")

return Deconvolution3D(filters=nb_filters, kernel_size=kernel_size,
output_shape=compute_level_output_shape(filters=nb_filters, depth=depth,
pool_size=pool_size, image_shape=image_shape),
strides=strides, input_shape=compute_level_output_shape(filters=nb_filters,
depth=depth + 1,
pool_size=pool_size,
image_shape=image_shape))
else:
return UpSampling3D(size=pool_size)
Loading

0 comments on commit 8d412a0

Please sign in to comment.