In [1]:
!gsutil -m rsync -r gs://elvos/multichannel_mip_data/ /home/amy/data/amy1

Building synchronization state...
Starting synchronization


In [2]:
!cd /home/amy/data/amy1

In [3]:
!ls /home/amy/data/amy1

client_secret.json    from_numpy  training_labels.csv
from_luke_training    tmp	  validation
from_luke_validation  training	  validation_labels.csv


In [1]:
import os
import csv
import random
import numpy as np
from scipy.ndimage.interpolation import zoom
from keras.preprocessing.image import ImageDataGenerator

from google.cloud import storage

BLACKLIST = ['LAUIHISOEZIM5ILF',
             '2018050121043822',
             '2018050120260258']

def normalize(image, lower_bound=None, upper_bound=None):
    # TODO: This is an issue, we can't zero center per image
    if lower_bound is None:
        lower_bound = image.min()
    if upper_bound is None:
        upper_bound = image.max()

    image[image > upper_bound] = upper_bound
    image[image < lower_bound] = lower_bound

    return (image - image.mean()) / image.std()

class MipGenerator(object):

    def __init__(self, dims=(120, 120, 1), batch_size=16,
                 shuffle=True,
                 validation=False,
                 split=0.2, extend_dims=True,
                 augment_data=True):
        self.dims = dims
        self.batch_size = batch_size
        self.extend_dims = extend_dims
        self.augment_data = augment_data
        self.validation = validation

        self.datagen = ImageDataGenerator(
            rotation_range=20,
            width_shift_range=0.1,
            height_shift_range=0.1,
            zoom_range=0.1,
            horizontal_flip=True
        )

        # Delete all content in tmp/npy/
        filelist = [f for f in os.listdir('/home/amy/data/amy1/tmp/npy')]
        for f in filelist:
            os.remove(os.path.join('/home/amy/data/amy1/tmp/npy', f))

        # Get npy files from Google Cloud Storage
        gcs_client = storage.Client.from_service_account_json(
            '/home/amy/data/amy1/client_secret.json'
        )
        bucket = gcs_client.get_bucket('elvos')
        blobs = bucket.list_blobs(prefix='multichannel_mip_data/from_numpy/')

        files = []
        for blob in blobs:
            file = blob.name

            # Check blacklist
            blacklisted = False
            for each in BLACKLIST:
                if each in file:
                    blacklisted = True

            if not blacklisted:
                # Add all data augmentation methods
                files.append({
                    "name": file,
                })

                if self.augment_data and not self.validation:
                    self.__add_augmented(files, file)

        # Split based on validation
        if validation:
            files = files[:int(len(files) * split)]
        else:
            files = files[int(len(files) * split):]

        # Get label data from Google Cloud Storage
        blob = storage.Blob('labels.csv', bucket)
        blob.download_to_filename('/home/amy/data/amy1/tmp/labels.csv')
        label_data = {}
        with open('/home/amy/data/amy1/tmp/labels.csv', 'r') as pos_file:
            reader = csv.reader(pos_file, delimiter=',')
            for row in reader:
                if row[0] != 'patient_id':
                    label_data[row[0]] = int(row[1])

        labels = np.zeros(len(files))
        for i, file in enumerate(files):
            filename = file['name']
            filename = filename.split('/')[-1]
            filename = filename.split('.')[0]
            filename = filename.split('_')[0]
            labels[i] = label_data[filename]

        # Take into account shuffling
        if shuffle:
            tmp = list(zip(files, labels))
            random.shuffle(tmp)
            files, labels = zip(*tmp)
            labels = np.array(labels)

        self.files = files
        self.labels = labels
        self.bucket = bucket

    def __add_augmented(self, files, file):
        for i in range(1):
            files.append({
                "name": file,
            })

    def generate(self):
        steps = self.get_steps_per_epoch()
        while True:
            for i in range(steps):
                #print(i)
                x, y = self.__data_generation(i)
                yield x, y

    def get_steps_per_epoch(self):
        return len(self.files) // self.batch_size

    def __data_generation(self, i):
        bsz = self.batch_size
        files = self.files[i * bsz:(i + 1) * bsz]
        labels = self.labels[i * bsz:(i + 1) * bsz]
        images = []

        # Download files to tmp/npy/
        for i, file in enumerate(files):
            blob = self.bucket.get_blob(file['name'])
            file_id = file['name'].split('/')[-1]
            file_id = file_id.split('.')[0]
            blob.download_to_filename(
                '/home/amy/data/amy1/tmp/npy/{}.npy'.format(file_id)
            )
            img = np.load('/home/amy/data/amy1/tmp/npy/{}.npy'.format(file_id))
            os.remove('/home/amy/data/amy1/tmp/npy/{}.npy'.format(file_id))
            img = self.__transform_images(img)
            # print(np.shape(img))
            images.append(img)
        images = np.array(images)
        return images, labels

    def __transform_images(self, image):
        image = np.moveaxis(image, 0, -1)

        # Set bounds
        image[image < -40] = -40
        image[image > 400] = 400

        # Normalize image and expand dims
        image = normalize(image)
        if self.extend_dims:
            if len(self.dims) == 2:
                image = np.expand_dims(image, axis=-1)
            else:
                image = np.repeat(image[:, :, np.newaxis],
                                  self.dims[2], axis=2)

        # Data augmentation methods
        if self.augment_data and not self.validation:
            image = self.datagen.random_transform(image)

        # Interpolate axis to reduce to specified dimensions
        dims = np.shape(image)
        image = zoom(image, (self.dims[0] / dims[0],
                             self.dims[1] / dims[1],
                             1))
        return image

Using TensorFlow backend.


In [2]:
from keras.models import Model
from keras.layers import (
    Input, BatchNormalization,
    Dense, Flatten, Conv2DTranspose,
    Concatenate, concatenate, Cropping2D
)
from keras.layers.convolutional import Conv2D, MaxPooling2D


class SimpleUNetBuilder(object):

    @staticmethod
    def build(input_shape, num_classes=2):
        """Create a 3D Convolutional Autoencoder model.

        Parameters:
        - input_shape: Tuple of input shape in the format
            (conv_dim1, conv_dim2, conv_dim3, channels)
        - initial_filter: Initial filter size. This will be doubled
            for each hidden layer as it goes deeper.
        - num_encoding_layers: Number of encoding convolutional +
            pooling layers. The number of decoding
            layers will be the same.

        Returns:
        - A 3D CAD model that takes a 5D tensor (volumetric images
        in batch) as input and returns a 5D vector (prediction) as output.
        """

        if len(input_shape) != 3:
            raise ValueError("Input shape should be a tuple "
                             "(conv_dim1, conv_dim2, conv_dim3)")

        input_img = Input(shape=input_shape, name="cad_input")

        # Conv1 (Output n, n, 96)
        conv1 = Conv2D(96, (5, 5), activation='relu',
                       padding='same')(input_img)
        conv1 = Conv2D(96, (5, 5), activation='relu',
                       padding='same')(conv1)

        # Conv2 (Output n/2, n/2, 128)
        conv2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(conv1)
        conv2 = Conv2D(128, (5, 5), activation='relu',
                       padding='same')(conv2)
        conv2 = Conv2D(128, (5, 5), activation='relu',
                       padding='same')(conv2)

        # Conv3 (Output n/4, n/4, 256)
        conv3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(conv2)
        conv3 = Conv2D(256, (3, 3), activation='relu',
                       padding='same')(conv3)
        conv3 = Conv2D(256, (3, 3), activation='relu',
                       padding='same')(conv3)

        # Conv4 (Output n/8, n/8, 512)
        conv4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(conv3)
        conv4 = Conv2D(512, (3, 3), activation='relu',
                       padding='same')(conv4)
        conv4 = Conv2D(512, (3, 3), activation='relu',
                       padding='same')(conv4)

        # Conv5 (Output n/16, n/16, 1024)
        conv5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(conv4)
        conv5 = Conv2D(1024, (3, 3), activation='relu',
                       padding='same')(conv5)
        conv5 = Conv2D(1024, (3, 3), activation='relu',
                       padding='same')(conv5)

        # begin resizing attempt

        # deconv1 (Output n/8, n/8, 512)
        deconv1 = Conv2DTranspose(512, (3, 3), strides=(2, 2),
                                activation='relu', padding='same')(conv5)
        deconv1_1 = Cropping2D(((0, 1), (0, 1)))(conv4)
        both_1 = concatenate([deconv1, deconv1_1])

        # deconv2 (Output n/4, n/4, 256)
        print(type(both_1))
        deconv2 = Conv2DTranspose(256, (3, 3), strides=(2, 2),
                                activation='relu', padding='same')(both_1)
        deconv2_1 = Cropping2D(((0, 3), (0, 3)))(conv3)
        both_2 = concatenate([deconv2, deconv2_1])

        # deconv3 (Output n/2, n/2, 128)
        deconv3 = Conv2DTranspose(128, (5, 5), strides=(2, 2),
                                  activation='relu', padding='same')(both_2)
        deconv3_1 = Cropping2D(((0, 6), (0, 6)))(conv2)
        both_3 = concatenate([deconv3, deconv3_1])

        # deconv4 (Output n, n, 96)
        deconv4 = Conv2DTranspose(96, (5, 5), strides=(2, 2),
                                  activation='relu', padding='same')(both_3)
        deconv4_1 = Cropping2D(((0, 12), (0, 12)))(conv1)
        both_4 = concatenate([deconv4, deconv4_1])

        # Fully connected layers
        print("HELLO 1")
        dense1 = Dense(1024, activation='relu', use_bias=True)(both_4)
        print("HELLO 2")
        dense2 = Dense(1024, activation='relu', use_bias=True)(dense1)
        print("HELLO 3")
        print(dense2.shape)
        output_img = Dense(num_classes, activation='sigmoid',
                                    use_bias=True)(dense2)
        print("HELLO 4")
        print(output_img.shape)

        # begin unused code
        # Conv2 (Output 50 x 50 x 64)
        # x = Conv2D(256, (5, 5), activation='relu', padding='same')(x)
        # x = BatchNormalization()(x)
        # x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)

        # # Conv3 (Output 12 x 12 x 96)
        # x = Conv2D(512, (3, 3), activation='relu',
        #            padding='same')(x)

        # # Conv4 (Output 6 x 6 x 128)
        # x = Conv2D(512, (3, 3), activation='relu', strides=(2, 2),
        #            padding='same')(x)

        # # Conv5 (Output 3 x 3 x 128)
        # x = Conv2D(1024, (3, 3), activation='relu', strides=(2, 2),
        #            padding='same')(x)

        # # Flatten
        # x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
        # x = Flatten()(x)

        # # Fully connected layers
        # x = Dense(1024, activation='relu', use_bias=True)(x)
        # x = Dense(1024, activation='relu', use_bias=True)(x)
        # output_img = Dense(num_classes, activation='sigmoid',
        #                    use_bias=True)(x)

        model = Model(inputs=input_img, outputs=output_img)
        return model


m = SimpleUNetBuilder.build((220, 220, 3))
m.summary()

<class 'tensorflow.python.framework.ops.Tensor'>
HELLO 1
HELLO 2
HELLO 3
(?, 208, 208, 1024)
HELLO 4
(?, 208, 208, 2)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
cad_input (InputLayer)          (None, 220, 220, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 220, 220, 96) 7296        cad_input[0][0]                  
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 220, 220, 96) 230496      conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 110, 110, 96) 0           conv2d_2[0][0]           

In [7]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D 
from keras.models import Model
from keras.optimizers import Adam

# Generators
training_generator = MipGenerator(dims=(220,220,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True)
validation_generator = MipGenerator(dims=(220,220,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True,
                                  validation=True)

m.compile(optimizer=Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08,decay=0.0), loss='binary_crossentropy', metrics=['accuracy'])

m.fit_generator(generator=training_generator.generate(),
                    validation_data=validation_generator.generate(),
                    steps_per_epoch=training_generator.get_steps_per_epoch(),
                    validation_steps=validation_generator.get_steps_per_epoch(),
                    epochs = 10)

Epoch 1/10


ValueError: Error when checking target: expected dense_9 to have 4 dimensions, but got array with shape (10, 1)

In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import (
    Input, BatchNormalization,
    Dense, Flatten, Conv2DTranspose,
    Concatenate, Cropping2D
)

# Datasets
#partition =  load_training_data() # IDs
#labels = load_labels()[1] # Labels

# Generators
training_generator = MipGenerator(dims=(220,220,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True)
validation_generator = MipGenerator(dims=(220,220,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True,
                                  validation=True)

# Design model
model = Sequential()

model.add(Dense(32, input_shape=(220,220,3)))
model.add(Dense(32, input_shape=(220,200,3)))
model.add(Flatten())
model.add(Dense(1))
model.summary()

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train model on dataset
model.fit_generator(generator=training_generator.generate(),
                    validation_data=validation_generator.generate(),
                    steps_per_epoch=training_generator.get_steps_per_epoch(),
                    validation_steps=validation_generator.get_steps_per_epoch(),
                    epochs = 10)

Using TensorFlow backend.


NameError: name 'MipGenerator' is not defined

In [3]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D 
from keras.models import Model
from keras.optimizers import Adam

# Generators
training_generator = MipGenerator(dims=(230,230,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True)
validation_generator = MipGenerator(dims=(230,230,3),
                                  extend_dims=False,
                                  batch_size=10,
                                  augment_data=True,
                                  validation=True)

# expected input shape: (160, 160, 3)
#base_model = applications.nasnet.NASNetMobile(input_shape=(220, 220, 3), include_top=False, weights='imagenet', input_tensor=None, pooling=None, classes=1000)

base_model = applications.densenet.DenseNet121(include_top=False, weights='imagenet', input_tensor=None, input_shape=(230,230,3), pooling=None, classes=1000)

# build a classifier model to put on top of the convolutional model
# This consists of a global average pooling layer and a fully connected layer with 256 nodes # Then apply dropout and sigmoid activation
model_top = Sequential()
model_top.add(GlobalAveragePooling2D(input_shape=base_model.output_shape[1:], data_format=None))
model_top.add(Dense(1024, activation='relu'))
model_top.add(Dropout(0.7))
model_top.add(Dense(1, activation='sigmoid'))
model = Model(inputs=base_model.input, outputs=model_top(base_model.output))
# Compile model using Adam optimizer with common values and binary cross entropy loss # Use low learning rate (lr) for transfer learning
model.compile(optimizer=Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08,decay=0.0), loss='binary_crossentropy', metrics=['accuracy'])

# Train model on dataset
model.fit_generator(generator=training_generator.generate(),
                    validation_data=validation_generator.generate(),
                    steps_per_epoch=training_generator.get_steps_per_epoch(),
                    validation_steps=validation_generator.get_steps_per_epoch(),
                    epochs = 15)

Epoch 1/15




Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fe59ff45f98>

In [None]:
# import matplotlib library, and plot training cuve
import matplotlib.pyplot as plt 
print(history.history.keys())
plt.figure()
plt.plot(history.history['acc'], 'orange', label='Training accuracy') 
plt.plot(history.history['val_acc'], 'blue', label='Validation accuracy') 
plt.plot(history.history['loss'], 'red', label='Training loss') 
plt.plot(history.history['val_loss'], 'green', label='Validation loss') 
plt.legend()
plt.show()

In [None]:
model.save('model-densenet-06-18-18')