In [1]:
import os
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("ggplot")
%matplotlib inline
import cv2
import random
import matplotlib.image as mpimg

from tqdm import tqdm_notebook, tnrange
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

import tensorflow as tf

from keras.models import Model, load_model
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D
from keras.layers.merge import concatenate, add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.python.client import device_lib

import imageio
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

Using TensorFlow backend.


In [2]:
class ImgMaskGenerator(object):
    def __init__(self,
                 dirpath='./',
                 img_w=1024, img_h=1024,
                 batch_size=3,
                 img_c = 3,
                 verbose=1):
        
        # configurations    
        self.HEIGHT     = img_h
        self.WIDTH      = img_w
        self.BATCH_SIZE = batch_size
        self.COLORS     = img_c
        self.DIRPATH    = dirpath
    
    def augaug(self, partSize = 100):
        ia.seed(1)
        images = []
        segmaps = []
        
        self.ids = self.ids_train + self.ids_valid
        for n, _id in enumerate(self.ids):
            print(n, _id)
            # Load images
            img = load_img(os.path.join(self.IMAGES_DIR, _id))
            images.append(img_to_array(img))
    

            # Load masks
            mask = img_to_array(load_img(os.path.join(self.MASKS_DIR, _id)))
            
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15)) # find coef
            mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
            mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
            
            rhash = random.getrandbits(128)      
            mpimg.imsave("augdataset/images/{}.jpeg".format(rhash), x_img/255)
            mpimg.imsave("augdataset/masks/{}.jpeg".format(rhash), mask/255)
            
            segmaps.append(mask)
            
            if len(images) == partSize:
                augmenters_imgs = [
                    iaa.Affine(
                        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                        rotate=(-45, 45),
                        shear=(-16, 16),
                        order=[0, 1]
                        #cval=(0, 255),
                        #mode=ia.ALL
                    ),
                    iaa.Crop(px=(0, 10)), # crop images from each side by 0 to 16px (randomly chosen)
                    iaa.Fliplr(0.5), # horizontally flip 50% of the images
                    iaa.GaussianBlur(sigma=(0, 3.0)) # blur images with a sigma of 0 to 3.0
                ]      
                
                seq_imgs = iaa.Sequential(augmenters_imgs, random_order=False)        
                seq_imgs_deterministic = seq_imgs.to_deterministic()
                

                imgs_aug = seq_imgs_deterministic.augment_images(images)
                
                masks_aug = seq_imgs_deterministic.augment_images(segmaps)
                
                
                for ima, ma in zip(imgs_aug, masks_aug):
                    rhash = random.getrandbits(128)
                    
                    mpimg.imsave("augdataset/images/{}.jpeg".format(rhash), ima/255)
                    mpimg.imsave("augdataset/masks/{}.jpeg".format(rhash), ma/255)
                
                images  = []
                segmaps = []
        
    def prepare_data(self, verbose=1):
        # check paths
        self.IMAGES_DIR = os.path.join(self.DIRPATH, "images")
        self.MASKS_DIR = os.path.join(self.DIRPATH, "masks")
        if not os.path.exists(self.IMAGES_DIR):
            raise Exception("Path not exists {}!".format(self.IMAGES_DIR))
        if not os.path.exists(self.MASKS_DIR):
            raise Exception("Path not exists {}!".format(self.MASKS_DIR))
        
        # dataset
        ids = next(os.walk(self.IMAGES_DIR))[2] # list of names all images in the given path
        
        # Split on train and valid
        self.ids_train, self.ids_valid = train_test_split(ids, test_size=0.1, random_state=66)
        self.N_train = (len(self.ids_train) // self.BATCH_SIZE) + 1
        self.N_valid = (len(self.ids_valid) // self.BATCH_SIZE) + 1
        if verbose:
            print("Train: ", self.N_train, " * ", self.BATCH_SIZE)
            print("Valid: ", self.N_valid, " * ", self.BATCH_SIZE)
    
    def normalize(self, x_img):
        return resize(x_img, (self.HEIGHT, self.WIDTH, self.COLORS))
    
    def load_all_data(self, mode="valid"):
        assert mode in ["train", "valid"]
        buff = []
        if mode == "train":
            self.ids = self.ids_train
        else:
            self.ids = self.ids_valid
        
        self.ids = self.ids_valid
        X = np.zeros((len(self.ids), self.HEIGHT, self.WIDTH, self.COLORS), dtype=np.float32)
        y = np.zeros((len(self.ids), self.HEIGHT, self.WIDTH, 1), dtype=np.float32) 
        
        for n, _id in enumerate(self.ids):
            # Load images
            img = load_img(os.path.join(self.IMAGES_DIR, _id))
            x_img = img_to_array(img)
            x_img = self.normalize(x_img)

            # Load masks
            mask = img_to_array(load_img(os.path.join(self.MASKS_DIR, _id), grayscale=True))
            mask = resize(mask, (self.HEIGHT, self.WIDTH, 1), mode = 'constant', preserve_range = True)
            # Save images
            X[n] = x_img/255.0
            y[n] = mask/255.0
            
        return X, y
        
    def generator(self, partSize=20, mode="train"):
        assert mode in ["train", "valid"]
            
        buff = []
        if mode == "train":
            self.ids = self.ids_train
        else:
            self.ids = self.ids_valid
        
        parts = []
        buff = []
        X = np.zeros((self.BATCH_SIZE, self.HEIGHT, self.WIDTH, self.COLORS), dtype=np.float32)
        y = np.zeros((self.BATCH_SIZE, self.HEIGHT, self.WIDTH, 1), dtype=np.float32)    
        while True:
            for x in self.ids:
                buff.append(x)
                if len(buff)  == self.BATCH_SIZE:
                    for n, _id in enumerate(buff):
                        # Load images
                        img = load_img(os.path.join(self.IMAGES_DIR, _id))
                        
                        try:
                            x_img = img_to_array(img)
                        except:
                            print(buff)
                            break
                            buff = []
                            
                        mask = img_to_array(load_img(os.path.join(self.MASKS_DIR, _id), grayscale=True))
                        
                        x_img = self.normalize(x_img)
                        # Load masks
                        mask = resize(mask, (self.HEIGHT, self.WIDTH, 1), mode = 'constant', preserve_range = True)
                        # Save images
                        X[n] = x_img/255.0
                        y[n] = mask/255.0
                        
                    parts.append((X, y))
                    if len(parts) > partSize:
                        for part in parts:
                            yield part
                        parts = []
                        
                    # to default 
                    X = np.zeros((self.BATCH_SIZE, self.HEIGHT, self.WIDTH, self.COLORS), dtype=np.float32)
                    y = np.zeros((self.BATCH_SIZE, self.HEIGHT, self.WIDTH, 1), dtype=np.float32)
                    buff = []

In [3]:
class UNET(ImgMaskGenerator):
    def __init__(self, config={}):
        # configurations    
        self.HEIGHT     = config.get("img_h", 1024)
        self.WIDTH      = config.get("img_w", 1024 + 512)
        self.BATCH_SIZE = config.get("batch_size", 3)
        self.COLORS     = config.get("img_c", 3)
        
        self.EPOCHS     = config.get("epochs", 50)

    def conv2d_block(self, input_tensor, n_filters, kernel_size = 3, batchnorm = True, colors=1):
        """Function to add 2 convolutional layers with the parameters passed to it"""
        # first layer
        x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
                  kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
        if batchnorm:
            x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # second layer
        x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
                  kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
        if batchnorm:
            x = BatchNormalization()(x)
        x = Activation('relu')(x)

        return x
    
    def get_unet(self, input_img, n_filters = 16, dropout = 0.1, batchnorm = True, colors=3):
        """Function to define the UNET Model"""
        # Contracting Path
        c1 = self.conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm, colors=colors)
        p1 = MaxPooling2D((2, 2))(c1)
        p1 = Dropout(dropout)(p1)

        c2 = self.conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm, colors=colors)
        p2 = MaxPooling2D((2, 2))(c2)
        p2 = Dropout(dropout)(p2)

        c3 = self.conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm, colors=colors)
        p3 = MaxPooling2D((2, 2))(c3)
        p3 = Dropout(dropout)(p3)

        c4 = self.conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm, colors=colors)
        p4 = MaxPooling2D((2, 2))(c4)
        p4 = Dropout(dropout)(p4)

        c5 = self.conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm, colors=colors)

        # Expansive Path
        u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
        u6 = concatenate([u6, c4])
        u6 = Dropout(dropout)(u6)
        c6 = self.conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)

        u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
        u7 = concatenate([u7, c3])
        u7 = Dropout(dropout)(u7)
        c7 = self.conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)

        u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
        u8 = concatenate([u8, c2])
        u8 = Dropout(dropout)(u8)
        c8 = self.conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)

        u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
        u9 = concatenate([u9, c1])
        u9 = Dropout(dropout)(u9)
        c9 = self.conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)

        outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)
        model = Model(inputs=[input_img], outputs=[outputs])
        return model
    
    def detect(self, imgs):
        res = []
        X = np.zeros((len(imgs), self.HEIGHT, self.WIDTH, self.COLORS), dtype=np.float32)
        shapes = np.zeros((len(imgs), 3))
        for i, img in enumerate(imgs):
            X[i]      = self.normalize(img)
            shapes[i] = np.array((img.shape[0], img.shape[1], 3))
        preds = unet.model.predict(X)
        
        preds_t = (preds > 0.5).astype(np.uint8)
        
        preds_t_n = []
        for pred, shape in zip(preds_t, shapes):    
            pred = cv2.cvtColor(pred, cv2.COLOR_GRAY2RGB)*255
            
            # clear mask
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15)) # find coef
            pred = cv2.morphologyEx(pred, cv2.MORPH_OPEN, kernel)

            pred = resize(pred, shape).astype(np.float32)
            
            pred = cv2.cvtColor(pred*255, cv2.COLOR_RGB2GRAY)
            
            ret, thresh = cv2.threshold(pred.astype(np.uint8), 127, 255, 0)
            contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

            for i, c in enumerate(contours):
                new_img = np.zeros(img.shape, np.uint8)
                cv2.fillConvexPoly(new_img, np.array(cv2.convexHull(c), 'int32'), (255, 255, 255))
                res.append(new_img)
                
        return res
      #      self.PRED = pred
      #      
      #      preds_t_n.append([[[w] for w in h]for h in pred])
      #
      #  preds_t = (np.array(preds_t_n) > 0.5).astype(np.uint8)
    #
    #    UNP = [{"masks": pred.astype(bool)} for pred in preds_t]
    #    return UNP
    
    def create_model(self):
        input_img = Input((self.HEIGHT, self.WIDTH, self.COLORS), name='img')
        self.model = self.get_unet(input_img, n_filters=16, dropout=0.05, batchnorm=True)
        self.model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=["accuracy"])
        return self.model
    
    def prepare(self, dirpath, verbose=1):
        self.DIRPATH = dirpath
        self.prepare_data(verbose=verbose)
        
        self.train_gen = self.generator(mode="train")
        self.valid_gen = self.generator(mode="valid")
        
    def fit(self, tmp_path='./tmp_model.h5', epochs=None, verbose=1):
        self.callbacks = [
            EarlyStopping(patience=20, verbose=verbose),
            ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=verbose),
            ModelCheckpoint(tmp_path, verbose=verbose, save_best_only=True, save_weights_only=True)
        ]
        return self.model.fit_generator(    self.train_gen, 
                      steps_per_epoch     = self.N_train, 
                      epochs              = epochs or self.EPOCHS, 
                      verbose             = verbose, 
                      callbacks           = self.callbacks, 
                      validation_data     = self.valid_gen, 
                      validation_steps    = self.N_valid)
    
    def load_last(self, tmp_path='./tmp_model.h5'):
        self.model.load_weights(tmp_path)

In [4]:
DATASET_DIR = "./augdataset" 

unet = UNET({"epochs": 100})
unet.create_model()

W0605 13:13:03.833971 140244803487360 deprecation_wrapper.py:119] From /usr/local/lib64/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0605 13:13:03.841641 140244803487360 deprecation_wrapper.py:119] From /usr/local/lib64/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0605 13:13:03.842151 140244803487360 deprecation_wrapper.py:119] From /usr/local/lib64/python3.7/site-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0605 13:13:03.855051 140244803487360 deprecation_wrapper.py:119] From /usr/local/lib64/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0605 13:13:0

InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [None]:
unet.prepare(DATASET_DIR)

In [None]:
#unet.augaug()

In [None]:
#raise Exception("STOP")

In [None]:
unet.load_last()
results = unet.fit()

In [None]:
plt.figure(figsize=(8, 8))
plt.title("Learning curve")
plt.plot(results.history["loss"], label="loss")
plt.plot(results.history["val_loss"], label="val_loss")
plt.plot( np.argmin(results.history["val_loss"]), np.min(results.history["val_loss"]), marker="x", color="r", label="best model")
plt.xlabel("Epochs")
plt.ylabel("log_loss")
plt.legend();

### Inference

In [None]:
unet.load_last()

In [None]:
X, y = unet.load_all_data(mode="valid")

In [None]:
# Evaluate on validation set (this must be equals to the best log_loss)
unet.model.evaluate(X[:20], y[:20], verbose=1)

In [None]:
# Predict val
preds = unet.model.predict(X[:50], verbose=1)

In [None]:
# Threshold predictions
preds_t = (preds > 0.5).astype(np.uint8)

In [None]:
def plot_sample(X, y, preds, binary_preds, ix=None):
    """Function to plot the results"""
    if ix is None:
        ix = random.randint(0, len(X))

    has_mask = y[ix].max() > 0

    fig, ax = plt.subplots(1, 4, figsize=(20, 10))
    ax[0].imshow(X[ix, ...])
    if has_mask:
        ax[0].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[0].set_title('Orig')

    ax[1].imshow(y[ix].squeeze())
    ax[1].set_title('Np')

    ax[2].imshow(preds[ix].squeeze(), vmin=0, vmax=1)
    if has_mask:
        ax[2].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[2].set_title('NP Predicted')
    
    ax[3].imshow(binary_preds[ix].squeeze(), vmin=0, vmax=1)
    if has_mask:
        ax[3].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[3].set_title('NP Predicted binary');

### Predictions on training set

In [None]:
%%time
# Check if training data looks all right
plot_sample(X, y, preds, preds_t, ix=14)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
%%time
plot_sample(X, y, preds, preds_t)

In [None]:
class ShuffledSimplePipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(ShuffledSimplePipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir, random_shuffle = True, initial_fill = 21)
        self.decode = ops.HostDecoder(output_type = types.RGB)

    def define_graph(self):
        jpegs, labels = self.input()
        images = self.decode(jpegs)
        return (images, labels)

In [None]:
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline

class GPUPipeline(Pipeline):
    def __init__(self, image_dir, batch_size, num_threads, device_id):
        super(GPUPipeline, self).__init__(batch_size, num_threads, device_id, seed = 12)
        self.input = ops.FileReader(file_root = image_dir, random_shuffle = True, initial_fill = 21)
        self.decode = ops.HostDecoder(output_type = types.RGB)
        self.rotate = ops.Rotate(device = "gpu")
        self.rng = ops.Uniform(range = (-10.0, 10.0))

    def define_graph(self):
        jpegs, labels = self.input()
        images = self.decode(jpegs)
        angle = self.rng()
        rotated_images = self.rotate(images.gpu(), angle = angle)
        return (rotated_images, labels)

In [None]:
pipe = GPUPipeline("dataset", 32, 1, 0)
pipe.build()

In [None]:
images, labels = pipe.run()

In [None]:
plt.imshow(images.as_cpu().at(1))

In [None]:
a = np.array([[1]])

In [None]:
a = a[:]

In [None]:
a

In [None]:
import keras
keras.utils.to_categorical([[[1]]], num_classes=None)

In [None]:
[[[1]]]