In [4]:
import numpy as np
import pandas as pd

import io
import tifffile as tiff
import zipfile

import skimage.transform
import shapely.wkt # shapely.wkt.loads()
import shapely.geometry 
import cv2

from matplotlib import pyplot as plt

# Specify that this notebook should only use the CPU
# This avoids memory problems if the model is running on the GPU elsewhere
# Note: do not try to train / predict as this will likely be very slow
import os
os.environ["THEANO_FLAGS"] = "device=cpu"

from keras.models import Model, load_model
from keras.layers import Input, merge, Activation, Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D, Cropping2D, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras import backend as K
from keras.regularizers import l2
from keras.constraints import maxnorm
from keras.layers.advanced_activations import LeakyReLU
K.set_image_dim_ordering('th')  # Theano dimension ordering in this code
# "tf" assumes (rows, cols, channels) while "th" assumes (channels, rows, cols)
# Possibly change this around natively in the data so the backend doesn't have to switch them
# Only necessary if I use TF!

from matplotlib import pyplot as plt
from pushbullet import Pushbullet

from collections import defaultdict

Using TensorFlow backend.


In [2]:
sample = pd.read_csv('./data/sample_submission.csv',names=['imageID','feature','wkt'],skiprows=1)
grid_sizes = pd.read_csv('./data/grid_sizes.csv',names=['imageID','xmax','ymin'],skiprows=1)

class_ = 0

In [17]:
test_imgs = np.load('./data/submission_images_processed.npy').item()

In [None]:
def jaccard(y_true, y_pred,smooth=1.):
        y_true_f = K.flatten(y_true)
        y_pred_f = K.flatten(y_pred)
        intersection = K.sum(y_true_f * y_pred_f)
        return (intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + smooth)
    
model = load_model('u-net-complete-model-run_88-class_2.h5',custom_objects={'jaccard': jaccard})

In [None]:
def make_polygons(msk, epsilon=5, min_area=1.):
    # first, find contours with cv2: it's much faster than shapely
    # cv2.RETR_CCOMP specifies that there are only two hierarchies: external and internal
    contours, hierarchy = cv2.findContours(((msk == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)[1:]
    
    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours]
    
    if not contours:
        return shapely.geometry.MultiPolygon()
    
    # now messy stuff to associate parent and child contours
    # defaultdict creates an emptylist if it's called on a key that doesn't exist
    cnt_children = defaultdict(list)
    
    # Creates an empty set, which is an unordered collection
    child_contours = set()
    
    # Tests if hierarchy has shape 1 in the first dimension
    # Should throw an error if it is not 1, i.e. we expect it to be 1
    assert hierarchy.shape[0] == 1
    
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    # The hierarchy is an array of (4,) elements like this: [Next, Previous, First_Child, Parent]
    # Hierarchy should have shape (1, X, 4) thus hierarchy[0] just grabs the all of hierarchy arrays
    # We then grab the Parents value and the contour's position (from enumerate) for each contour
    for idx, parent_idx in enumerate(hierarchy[0][:,3]):
        # If the parent_id != -1, this means there is a parent, thus this hiearchy is a child
        if parent_idx != -1:
            # Add the id of that contour to the child contours set
            child_contours.add(idx)
            # Add a key:value to the cnt_children dict, 
            # The key is the contour id, and the value is a list containing the child contour
            cnt_children[parent_idx].append(approx_contours[idx])
    
    # Ultimately, the child polygons must be filled with 0 so that they are empty...
    # Thus have a dict of child contours (with their corresponding parent ID) is helpful
    
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    # For each contour...
    for idx, cnt in enumerate(approx_contours):
        # First check that the contour is not a child
        # And determine that it's area is larger than some minimum we can define
        # min_area is set to 1 for now
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            # Check that the second dimension of the contour is 1
            assert cnt.shape[1] == 1
            
            # Make a shapely polygon object
            # supply shell and holes
            # Shells are the contours that passed the if test above (i.e. NOT children)
            # Holes are all of the children that are stored in the cnt_children dict
            # The list comprehension grabs each contour from the dict matching the ID of this polygon and checks it's size is above the minimum
            poly = shapely.geometry.Polygon(shell=cnt[:, 0, :],
                                            holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area])
            
            # Finally, we append this polygon to the running list
            all_polygons.append(poly)
            
    # approximating polygons might have created invalid ones, fix them
    all_polygons = shapely.geometry.MultiPolygon(all_polygons)
    
    # The is_valid() method from shapely returns true if the polygon is valid
    if not all_polygons.is_valid:
        # From http://toblerity.org/shapely/shapely.geometry.html#module-shapely.geometry.multipolygon
        # A zero distance may be used to “tidy” a polygon
        all_polygons = all_polygons.buffer(0)
        
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons

In [None]:
def predictor():
    if os.path.exists('./data/submisison_wkts.npy'):
        wkts = np.load('./data/submisison_wkts.npy').item()
    else:
        wkts = {}

    msks = {}
    for imageID in sorted(unique(sample.imageID)):
        img = test_imgs[imageID]
        msk = model.predict(img)
        msks[imageID] = msk
        img_h, img_w = img.shape[1], img.shape[2]

        # Threshold the mask
        msk[msk>=threshold] = 1
        msk[msk<threshold] = 0

        # Make polygons
        polygons = mask_to_polygons(msk)

        # Scale polygons
        # Double check this...
        xmax, ymin = grid_sizes[grid_sizes.imageID == imageID].iloc[0, 1:].astype(float)
        xfact = xmax / (img_w * (img_w / (img_w + 1)))
        yfact = ymin / (img_h * (img_h / (img_h + 1)))

        polygons = shapely.affinity.scale(polygons, xfact=xfact, yfact=yfact, origin=(0, 0, 0))

        # Make WKTs
        wkt = shapely.wkt.dumps(polygons)

        wkts[imageID] = {class_:wkt}

    np.save('./data/submisison_msks.npy',msks)
    np.save('./data/submisison_wkts.npy', wkts)