Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
add data generation, training and prediction code for nodule segmenta…
Browse files Browse the repository at this point in the history
…tion using keras
  • Loading branch information
WGierke committed Oct 2, 2017
1 parent aed9fd5 commit 707e012
Show file tree
Hide file tree
Showing 559 changed files with 505 additions and 106 deletions.
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ tests/assets/test_image_data/small/**/*.dcm -filter=lfs -diff=lfs -merge=lfs -te
test/assets/* filter=lfs diff=lfs merge=lfs -text
*.dcm filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.hd5 filter=lfs diff=lfs merge=lfs -text
*.hdf5 filter=lfs diff=lfs merge=lfs -text
*.mhd filter=lfs diff=lfs merge=lfs -text
*.raw filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions compose/prediction/Dockerfile-dev
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ RUN ln -s /usr/bin/python3.6 /usr/local/bin/python
# Requirements have to be pulled and installed here, otherwise caching won't work
COPY ./prediction/requirements /requirements
RUN pip install -r /requirements/local.txt
COPY ./prediction/.pylidcrc /root/.pylidcrc

WORKDIR /app
3 changes: 3 additions & 0 deletions prediction/.pylidcrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[dicom]
path = /images_full
warn = True
2 changes: 1 addition & 1 deletion prediction/requirements/local.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-r base.txt
flake8==3.3.0
pytest==3.1.3
pylidc==0.1.8
pylidc==0.1.9
66 changes: 0 additions & 66 deletions prediction/src/algorithms/identify/helpers.py

This file was deleted.

10 changes: 3 additions & 7 deletions prediction/src/algorithms/identify/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from keras.metrics import binary_accuracy, binary_crossentropy, mean_absolute_error
from keras.models import Model
from keras.optimizers import SGD

from . import helpers
from src.preprocess.lung_segmentation import rescale_patient_images

CUBE_SIZE = 32
MEAN_PIXEL_VALUE = 41
Expand Down Expand Up @@ -180,11 +179,11 @@ def predict_cubes(model_path, patient_id, magnification=1, ext_name=""): # noqa

patient_img = load_patient_images(patient_id, wildcard="*_i.png", exclude_wildcards=[])
if magnification != 1:
patient_img = helpers.rescale_patient_images(patient_img, (1, 1, 1), magnification)
patient_img = rescale_patient_images(patient_img, (1, 1, 1), magnification)

patient_mask = load_patient_images(patient_id, wildcard="*_m.png", exclude_wildcards=[])
if magnification != 1:
patient_mask = helpers.rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)
patient_mask = rescale_patient_images(patient_mask, (1, 1, 1), magnification, is_mask_image=True)

step = PREDICT_STEP
CROP_SIZE = CUBE_SIZE
Expand Down Expand Up @@ -221,9 +220,6 @@ def predict_cubes(model_path, patient_id, magnification=1, ext_name=""): # noqa
if cube_mask.sum() < 2000:
skipped_count += 1
else:
if CROP_SIZE != CUBE_SIZE:
cube_img = helpers.rescale_patient_images2(cube_img, (CUBE_SIZE, CUBE_SIZE, CUBE_SIZE))

img_prep = prepare_image_for_net3D(cube_img)
batch_list.append(img_prep)
batch_list_coords.append((z, y, x))
Expand Down
3 changes: 3 additions & 0 deletions prediction/src/algorithms/segment/assets/best_model.hdf5
Git LFS file not shown
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
61 changes: 61 additions & 0 deletions prediction/src/algorithms/segment/src/data_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import glob
import os

import numpy as np
import pylidc as pl


def get_dicom_paths(in_docker=True):
"""Return DICOM paths to all LIDC directories
e.g. ['../images_full/LIDC-IDRI-0001/1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178/' \
'1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192']"""
if in_docker:
return glob.glob("../images_full/LIDC-IDRI-*/**/**")
else:
return glob.glob("../tests/assets/test_image_data/full/LIDC-IDRI-*/**/**")


def prepare_training_data():
current_dir = os.path.dirname(os.path.realpath(__file__))
assets_dir = os.path.abspath(os.path.join(current_dir, '../assets'))

dicom_paths = sorted(get_dicom_paths())
for path in dicom_paths:
directories = path.split('/')
lidc_id = directories[2]
lung_patient_file = os.path.join(assets_dir, "segmented_lung_patient_{}".format(lidc_id))

if os.path.isfile(lung_patient_file):
continue

# Compute and save binary mask with information whether pixel is cancerous
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == lidc_id).first()
if scan is None:
print("Scan for path '{}' was not found".format(path))
continue
vol = scan.to_volume(verbose=False)

# mask_vol is a boolean, indicator volume for the first annotation of the scan.
mask_vol = np.zeros(vol.shape, dtype=np.bool)

# Load DICOM files and obtain z-coords for each slice, so we can index into them.
dicoms = scan.load_all_dicom_images(verbose=False)
zs = [float(img.ImagePositionPatient[2]) for img in dicoms]

cancerous_annotations = pl.query(pl.Annotation).filter(pl.Annotation.malignancy >= 3,
pl.Annotation.scan_id == scan.id).all()

for annotation in cancerous_annotations:
mask, bbox = annotation.get_boolean_mask(return_bbox=True)

# Obtain indexes of `mask` into `mask_vol`
i1, i2 = bbox[0].astype(np.int)
j1, j2 = bbox[1].astype(np.int)

k1 = zs.index(bbox[2, 0])
k2 = zs.index(bbox[2, 1])

# In case the area already was segmented, don't overwrite it but add the annotated segmentation
annotation_area = np.index_exp[i1:i2 + 1, j1:j2 + 1, k1:k2 + 1]
mask_vol[annotation_area] = np.logical_or(mask, mask_vol[annotation_area])
np.save(lung_patient_file, mask_vol)
139 changes: 139 additions & 0 deletions prediction/src/algorithms/segment/src/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import numpy as np
from keras import backend as K
from keras.engine import Input, Model
from keras.layers import Conv3D, MaxPooling3D, UpSampling3D, Activation
from keras.layers.merge import concatenate
from keras.optimizers import Adam


def simple_model_3d(input_shape, downsize_filters_factor=32, pool_size=(2, 2, 2), n_labels=1,
initial_learning_rate=0.01):
"""
Builds a simple 3D classification model.
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels).
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will
reduce the amount of memory the model will need during training.
:param pool_size: Pool size for the max pooling operations.
:param n_labels: Number of binary labels that the model is learning.
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training.
:return: Untrained simple 3D Model
"""
inputs = Input(input_shape)
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling3D(pool_size=pool_size)(conv1)
conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1)
up1 = UpSampling3D(size=pool_size)(conv2)
conv8 = Conv3D(n_labels, (1, 1, 1))(up1)
act = Activation('sigmoid')(conv8)
model = Model(inputs=inputs, outputs=act)

model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef])

return model


def unet_model_3d(input_shape, downsize_filters_factor=1, pool_size=(2, 2, 2), n_labels=1,
initial_learning_rate=0.01, deconvolution=False):
"""
Builds the 3D UNet Keras model.
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels).
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will
reduce the amount of memory the model will need during training.
:param pool_size: Pool size for the max pooling operations.
:param n_labels: Number of binary labels that the model is learning.
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training.
:param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of upsamping. This
increases the amount memory required during training.
:return: Untrained 3D UNet Model
"""
inputs = Input(input_shape)
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling3D(pool_size=pool_size)(conv1)

conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling3D(pool_size=pool_size)(conv2)

conv3 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv3)
print(conv3.shape)
pool3 = MaxPooling3D(pool_size=pool_size)(conv3)

conv4 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv3D(int(512 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv4)
print(conv4.shape)

up5 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=2,
nb_filters=int(512 / downsize_filters_factor), image_shape=input_shape[-3:])(conv4)
print(up5.shape)
up5 = concatenate([up5, conv3], axis=4)
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up5)
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv5)

up6 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=1,
nb_filters=int(256 / downsize_filters_factor), image_shape=input_shape[-3:])(conv5)
up6 = concatenate([up6, conv2], axis=4)
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up6)
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv6)

up7 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=0,
nb_filters=int(128 / downsize_filters_factor), image_shape=input_shape[-3:])(conv6)
up7 = concatenate([up7, conv1], axis=4)
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up7)
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv7)

conv8 = Conv3D(n_labels, (1, 1, 1))(conv7)
act = Activation('sigmoid')(conv8)
model = Model(inputs=inputs, outputs=act)

model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef])

return model


def dice_coef(y_true, y_pred, smooth=1.):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)


def compute_level_output_shape(filters, depth, pool_size, image_shape):
"""
Each level has a particular output shape based on the number of filters used in that level and the depth or number
of max pooling operations that have been done on the data at that point.
:param image_shape: shape of the 3d image.
:param pool_size: the pool_size parameter used in the max pooling operation.
:param filters: Number of filters used by the last node in a given level.
:param depth: The number of levels down in the U-shaped model a given node is.
:return: 5D vector of the shape of the output node
"""
if depth != 0:
output_image_shape = np.divide(image_shape, np.multiply(pool_size, depth)).tolist()
else:
output_image_shape = image_shape
return tuple([None, filters] + [int(x) for x in output_image_shape])


def get_upconv(depth, nb_filters, pool_size, image_shape, kernel_size=(2, 2, 2), strides=(2, 2, 2),
deconvolution=False):
if deconvolution:
try:
from keras_contrib.layers import Deconvolution3D
except ImportError:
raise ImportError("Install keras_contrib in order to use deconvolution. Otherwise set deconvolution=False.")

return Deconvolution3D(filters=nb_filters, kernel_size=kernel_size,
output_shape=compute_level_output_shape(filters=nb_filters, depth=depth,
pool_size=pool_size, image_shape=image_shape),
strides=strides, input_shape=compute_level_output_shape(filters=nb_filters,
depth=depth + 1,
pool_size=pool_size,
image_shape=image_shape))
else:
return UpSampling3D(size=pool_size)
Loading

0 comments on commit 707e012

Please sign in to comment.