# Import libraries

In [1]:
import os
import sys
import warnings
import scipy.misc
import numpy as np
import math
import matplotlib.pyplot as plt
from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images, imsave
from skimage.transform import resize
import cv2
import matplotlib
from keras import regularizers
from scipy import ndimage
from keras.models import Model, load_model
from keras.layers import *
from keras import backend as K
from keras import optimizers
from keras.callbacks import EarlyStopping
from keras.utils import multi_gpu_model 
import tensorflow as tf
from keras.metrics import binary_crossentropy
warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# define constants

In [4]:
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
TRAIN_IM = './train_im/'
TRAIN_MASK = './train_mask/'
TEST_IM = './test_im/'
TEST_MASK = './test_mask/'
NUM_CLASSES = 4
NUM_GPUS = 8

# Load training and test images

In [None]:
def load_train():
    num_train = len(os.listdir(TRAIN_IM))
    X_train = np.zeros((num_train, IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)
    Y_train = np.zeros((num_train, IMG_HEIGHT, IMG_WIDTH, NUM_CLASSES), dtype=np.bool)
    sys.stdout.flush()
    #load training images
    for count, filename in tqdm(enumerate(os.listdir(TRAIN_IM)), total=num_train):
        img = imread(os.path.join(TRAIN_IM, filename))[:,:,2]
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH, 1), mode='constant', preserve_range=True)
        X_train[count] = img
        name, ext = os.path.splitext(filename)
        mask_name = name + '_mask' + ext    
        mask = imread(os.path.join(TRAIN_MASK, mask_name))[:,:,:NUM_CLASSES]
        mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH))
        Y_train[count] = mask
    return X_train, Y_train
    
def load_test():
    num_test = len(os.listdir(TEST_IM))
    X_test = np.zeros((num_test, IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)
    Y_test = np.zeros((num_test, IMG_HEIGHT, IMG_WIDTH, NUM_CLASSES), dtype=np.bool)
    sys.stdout.flush()
    for count, filename in tqdm(enumerate(os.listdir(TEST_IM)), total=num_test):
        img = imread(os.path.join(TEST_IM, filename))[:,:,2]    
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH, 1), mode='constant', preserve_range=True)
        X_test[count] = img
        name, ext = os.path.splitext(filename)
        mask_name = name + '_mask' + ext    
        mask = imread(os.path.join(TEST_MASK, mask_name))[:,:,:NUM_CLASSES]
        mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH))
        Y_test[count] = mask
    return X_test, Y_test
X_train, Y_train = load_train()

# compute weight for each class

In [None]:
def comp_weights():
    back_count = 0
    ec_count = 0
    chrom_count = 0
    nuc_count = 0
    alpha = 1 #used for exponential scaling
    for x in Y_train:
        back_count = back_count + x[:,:,0].sum()
        nuc_count = nuc_count + x[:,:,1].sum() 
        chrom_count = chrom_count + x[:,:,2].sum()
        ec_count = ec_count + x[:,:,3].sum()
    print("number of pixels for background, nuclei, chromosomes, ecDNA: ", 
          back_count, nuc_count, chrom_count, ec_count)
    tot = back_count + nuc_count + chrom_count + ec_count
    back_w = 1
    nuc_w = (nuc_count)**alpha /  (nuc_count)**alpha
    chrom_w = (nuc_count)**alpha /(chrom_count)**alpha
    ec_w = (nuc_count)**alpha / (ec_count)**alpha
    weights = [back_w, nuc_w, chrom_w, ec_w]
    return weights

In [None]:
def weighted_loss(original_loss, weights_list):
    def lossFunc(true, pred):     
        select_class = [K.equal(tf.cast(i, tf.int64), 
                                tf.cast(K.argmax(true, axis=-1), 
                                        tf.int64)) for i in range(len(weights_list))]
        select_class = [K.cast(x, K.floatx()) for x in select_class]
        weights = [sel * w for sel, w in zip(select_class, weights_list)] 
        
        scalar = weights[0]
        for i in range(1, len(weights)):
            scalar = scalar + weights[i]

        loss = original_loss(true,pred)
        loss = loss * scalar
        return loss
    return lossFunc

# Custom loss function
def dice_coef(y_true, y_pred):    
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return ((2. * intersection + 1.) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.))

def dice_loss(y_true,y_pred):
    return 1-dice_coef(y_true,y_pred)

def BCE_loss(y_true, y_pred):
    return (binary_crossentropy(y_true, y_pred))

def bce_dice(y_true, y_pred):
    return BCE_loss(y_true, y_pred) + dice_loss(y_true, y_pred)

def mIoU(y_true, y_pred):
    prec = []
    for t in np.arange(0.05, 0.1, 0.5):
        score, up_opt = tf.metrics.mean_iou(y_true, tf.to_int32(y_pred > t), num_classes=NUM_CLASSES)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=-1)

print('Weight functions compiled')

# build model function

In [2]:
def build_model(width=32, num_classes=4):
    def conv_block(x, width, k_reg=False):
        c1 = Conv2D(width, (3, 3), activation='elu', padding='same') (x)
        if(k_reg):
            c1 = Conv2D(width, (3, 3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(0.001)) (c1)
        else:
            c1 = Conv2D(width, (3, 3), activation='elu', padding='same') (c1)
        return c1

    inputs = Input((IMG_HEIGHT, IMG_WIDTH, 1))
    activation = 'sigmoid'
    s = Lambda(lambda x: x / 255) (inputs)
    c1 = conv_block(s, width)
    p1 = MaxPooling2D((2, 2)) (c1)

    c2 = conv_block(p1, width*2, k_reg=True)
    p2 = MaxPooling2D((2, 2)) (c2)

    c3 = conv_block(p2, width*4, k_reg=True)
    p3 = MaxPooling2D((2, 2)) (c3)

    c4 = conv_block(p3, width*8, k_reg=True)
    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

    c5 = conv_block(p4, width*16)

    u6 = Conv2DTranspose(width*8, (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = concatenate([u6, c4])
    c6 = conv_block(u6, width*8)

    u7 = Conv2DTranspose(width*4, (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = concatenate([u7, c3])
    c7 = conv_block(u7, width*4)

    u8 = Conv2DTranspose(width*2, (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = concatenate([u8, c2])
    c8 = conv_block(u8, width*2)

    u9 = Conv2DTranspose(width, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = conv_block(u9, width)

    outputs = Conv2D(num_classes, (1, 1), activation=activation) (c9)
    model = Model(inputs=[inputs], outputs=[outputs])
    return model

# build and compile model (multi-GPU support)

In [None]:
model = build_model(num_classes=NUM_CLASSES)
if(NUM_GPUS > 1):
    model = multi_gpu_model(model, gpus=NUM_GPUS)
weights = comp_weights()
model.compile(optimizer='Adamax', loss = weighted_loss(bce_dice, weights), metrics = [mIoU])
model.summary()

# train model

In [None]:
earlystopper = EarlyStopping(patience=7, verbose=1)
history = model.fit(X_train, Y_train, validation_split=0.25, batch_size = 16, 
                             verbose=1, epochs=45, callbacks=[earlystopper])
model_out = model.layers[-2]
model_out.save_weights(filepath="./ecDNA_model.hdf5")

# plot training results

In [None]:
print(history.history.keys())

plt.plot(history.history['mIoU'])
plt.plot(history.history['val_mIoU'])
plt.title('ecDNA IoU score')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.savefig('IoU.png')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.ylabel('loss')
plt.title('ecDNA loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.savefig('loss.png')
plt.show()

# save as also model rather than just as a weight file

In [5]:
model = build_model()
model.load_weights("./ecDNA_model.hdf5")
model.save('ecDNA_model.h5')

Instructions for updating:
Colocations handled automatically by placer.


# predict on holdout set

In [None]:
from os import listdir
from os.path import isfile, join
X_test, Y_test = load_test()
onlyfiles = [f for f in listdir(TEST_IM) if isfile(join(TEST_IM, f))]
for i in X_test:
    x = np.expand_dims(i, axis=0)
    comb_pred = np.squeeze(model.predict(x, verbose=0))
    plt.imshow(comb_pred[...,3])
    plt.show()
    break

In [6]:
import keras; print(keras.__version__)

2.2.4
