In [1]:
import errno
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from shapely.wkt import loads as wkt_loads
import tifffile as tiff
import os
import random
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as K
from keras.models import load_model
from sklearn.metrics import jaccard_similarity_score
from shapely.geometry import MultiPolygon, Polygon
import shapely.wkt
import shapely.affinity
from collections import defaultdict

import csv
import glob
import gdal

from datetime import datetime

from keras import __version__
print(__version__)

Using TensorFlow backend.


2.0.5


In [2]:
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

K.set_image_data_format('channels_first')

N_Cls = 1
data_base_dir = "/home/ubuntu/data/"
inDir = './'
mkdir_p(inDir + '/data')
out_dir = 'output/'
mkdir_p(out_dir)

print(os.getcwd())
DF = pd.read_csv(data_base_dir+'train_wkt_v4_TREES.csv')
val_DF = pd.read_csv(data_base_dir+'val_wkt_v4_TREES.csv')
GS = pd.read_csv(data_base_dir+'grid_sizes.csv', names=['ImageId', 'Xmax', 'Ymin'], skiprows=1)
SB = pd.read_csv(data_base_dir+'sample_submission.csv')

ISZ = 160
smooth = 1e-12

EPSILON = 2  # polygon edge smoothing factor (higher=less nodes, i.e. less detail)
MIN_AREA = 5.  # smallest area a polygon can be

val_img_names = ["6010_4_4", "6070_2_3", "6100_2_3", "6140_1_2", "6110_4_0"]

/home/ubuntu/git/kaggle-dstl/keras2-baseline-improvements


# Image preprocessing

## Create indexes (all test/train/val TIFFs)

In [3]:
def get_bands(src):
    red = src.GetRasterBand(5)
    nir = src.GetRasterBand(7)
    blue = src.GetRasterBand(2)

    r_arr = red.ReadAsArray()
    nir_arr = nir.ReadAsArray()
    b_arr = blue.ReadAsArray()

    r_arr = r_arr.astype(np.float32)
    nir_arr = nir_arr.astype(np.float32)
    b_arr = b_arr.astype(np.float32)

    return r_arr, nir_arr, b_arr


def ndvi_calc(red, nir):
    # https://en.wikipedia.org/wiki/Normalized_Difference_Vegetation_Index
    return ((nir - red) / (nir + red))


def evi_calc(r_arr, nir_arr, b_arr,
             canopy_background_adjustment,
             coefficient_1_of_the_aerosol_resistance_term,
             coefficient_2_of_the_aerosol_resistance_term,
             gain_factor):
    # https://en.wikipedia.org/wiki/Enhanced_vegetation_index
    L = canopy_background_adjustment
    C1 = coefficient_1_of_the_aerosol_resistance_term
    C2 = coefficient_2_of_the_aerosol_resistance_term
    G = gain_factor
    return G * (nir_arr - r_arr) / (nir_arr + C1*r_arr - C2*b_arr + L)


def get_scalers(x_max, y_min, im_size):
    h, w = im_size  # they are flipped so that mask_for_polygons works correctly
    w_ = w * (w / (w + 1))
    h_ = h * (h / (h + 1))
    return w_ / x_max, h_ / y_min


# "With the value for L used, this is essentially the same computation as NDVI - hence commented out.
# (indeed, the visualisation of the index bands indicate the similarity)."
def savi_calc(r_arr, nir_arr, canopy_background_adjustment_factor):
    # https: // en.wikipedia.org / wiki / Soil - Adjusted_Vegetation_Index   
    L = canopy_background_adjustment_factor
    return ((1 + L)*(nir_arr - r_arr)) / (nir_arr + r_arr + L)



def get_bands(src):
    red = src.GetRasterBand(5)
    nir = src.GetRasterBand(7)
    blue = src.GetRasterBand(2)

    r_arr = red.ReadAsArray()
    nir_arr = nir.ReadAsArray()
    b_arr = blue.ReadAsArray()

    r_arr = r_arr.astype(np.float32)
    nir_arr = nir_arr.astype(np.float32)
    b_arr = b_arr.astype(np.float32)

    return r_arr, nir_arr, b_arr


def get_xmax_ymin(imageid):
    for _im_id, _x, _y in csv.reader(open(data_base_dir + "/grid_sizes.csv")):
        if _im_id == imageid:
            x_max, y_min = float(_x), float(_y)
            return x_max, y_min


def create_indexes(image_id):
#     ndvi_all = []
#     evi_all = []
#     savi_all = []
    
    m_path = data_base_dir + "sixteen_band/{}_M.tif".format(image_id)
    imageid = m_path.split("/")[-1].split("_M.tif")[0]

    src = gdal.Open(m_path)

    r_arr, nir_arr, b_arr = get_bands(src)

    im_size = r_arr.shape

    x_max, y_min = get_xmax_ymin(imageid)

    x_scaler, y_scaler = get_scalers(x_max, y_min, im_size)

    ndvi = ndvi_calc(r_arr, nir_arr)
    evi = evi_calc(r_arr, nir_arr, b_arr,
         canopy_background_adjustment=1,
         coefficient_1_of_the_aerosol_resistance_term=6,
         coefficient_2_of_the_aerosol_resistance_term=7.5,
         gain_factor=2.5)
    savi = savi_calc(r_arr, nir_arr, canopy_background_adjustment_factor=0.5)

    return ndvi, savi, evi
#     ndvi_all.append(ndvi)
#     evi_all.append(evi)
#     savi_all.append(savi)

#     np.save(inDir + '/data/ndvi_all', np.array(ndvi_all))
#     np.save(inDir + '/data/evi_all', np.array(evi_all))
#     np.save(inDir + '/data/savi_all', np.array(savi_all))

## tweaked code for including indexes

In [4]:
def _convert_coordinates_to_raster(coords, img_size, xymax):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    Xmax, Ymax = xymax
    H, W = img_size
    W1 = 1.0 * W * W / (W + 1)
    H1 = 1.0 * H * H / (H + 1)
    xf = W1 / Xmax
    yf = H1 / Ymax
    coords[:, 1] *= yf
    coords[:, 0] *= xf
    coords_int = np.round(coords).astype(np.int32)
    return coords_int


def _get_xmax_ymin(grid_sizes_panda, imageId):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    xmax, ymin = grid_sizes_panda[grid_sizes_panda.ImageId == imageId].iloc[0, 1:].astype(float)
    return (xmax, ymin)


def _get_polygon_list(wkt_list_pandas, imageId, cType):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    df_image = wkt_list_pandas[wkt_list_pandas.ImageId == imageId]
    multipoly_def = df_image[df_image.ClassType == cType].MultipolygonWKT
    polygonList = None
    if len(multipoly_def) > 0:
        assert len(multipoly_def) == 1
        polygonList = wkt_loads(multipoly_def.values[0])
    return polygonList


def _get_and_convert_contours(polygonList, raster_img_size, xymax):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    perim_list = []
    interior_list = []
    if polygonList is None:
        return None
    for k in range(len(polygonList)):
        poly = polygonList[k]
        perim = np.array(list(poly.exterior.coords))
        perim_c = _convert_coordinates_to_raster(perim, raster_img_size, xymax)
        perim_list.append(perim_c)
        for pi in poly.interiors:
            interior = np.array(list(pi.coords))
            interior_c = _convert_coordinates_to_raster(interior, raster_img_size, xymax)
            interior_list.append(interior_c)
    return perim_list, interior_list


def _plot_mask_from_contours(raster_img_size, contours, class_value=1):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    img_mask = np.zeros(raster_img_size, np.uint8)
    if contours is None:
        return img_mask
    perim_list, interior_list = contours
    cv2.fillPoly(img_mask, perim_list, class_value)
    cv2.fillPoly(img_mask, interior_list, 0)
    return img_mask


def generate_mask_for_image_and_class(raster_size, imageId, class_type, grid_sizes_panda=GS, wkt_list_pandas=DF):
    # __author__ = visoft
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    xymax = _get_xmax_ymin(grid_sizes_panda, imageId)  # for scaling according to competition
    polygon_list = _get_polygon_list(wkt_list_pandas, imageId, class_type)  # read (training) polygon data
    contours = _get_and_convert_contours(polygon_list, raster_size, xymax)  # creating outline from vector nodes
    mask = _plot_mask_from_contours(raster_size, contours, 1)  # filling in polygon outlines (i.e. creating the mask)
    return mask


def M(image_id):
    # __author__ = amaia
    # https://www.kaggle.com/aamaia/dstl-satellite-imagery-feature-detection/rgb-using-m-bands-example
    filename = os.path.join(data_base_dir, 'sixteen_band', '{}_M.tif'.format(image_id))
    img = tiff.imread(filename)
    img = np.rollaxis(img, 0, 3)
    return img


def stretch_n(bands, lower_percent=0, higher_percent=100): # <- "Claims to improve" (was lower_percent=5, higher_percent=95)
    # "Contrast enhancement", see https://www.kaggle.com/aamaia/rgb-using-m-bands-example
    out = np.zeros_like(bands).astype(np.float32)
    n = bands.shape[2]
    for i in range(n):
        a = 0  # np.min(band)
        b = 1  # np.max(band)
        c = np.percentile(bands[:, :, i], lower_percent)
        d = np.percentile(bands[:, :, i], higher_percent)
        t = a + (bands[:, :, i] - c) * (b - a) / (d - c)
        t[t < a] = a
        t[t > b] = b
        out[:, :, i] = t

    return out.astype(np.float32)


def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 8+3))     # +3 for the 3 indexes (ndvi, savi, evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 9] = savi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 10] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)

    
def get_patches(img, msk, amt=10000, aug=True):
    is2 = int(1.0 * ISZ)  # ISZ=is2=patch size
    xm, ym = img.shape[0] - is2, img.shape[1] - is2  # starting coordinate for getting patches (taking into consideration padding)

    x, y = [], []

    # threshold determining when to add an image - if the image contains a greater percentage of pixels in the training
    # set than specified by the thresholds below:
    tr = [0.3]
    
    count = 0

    for i in range(amt):
        do_we_append = False
        
        # get random point in inner image (i.e exluding the outer image padding)
        xc = random.randint(0, xm)
        yc = random.randint(0, ym)

        # get patch
        im = img[xc:xc + is2, yc:yc + is2]
        ms = msk[xc:xc + is2, yc:yc + is2]

        for j in range(N_Cls):
            # sum pixels containing class j
            sm = np.sum(ms[:, :, j])
            if aug:
                if random.uniform(0, 1) > 0.5:
                    im = im[::-1]
                    ms = ms[::-1]
                if random.uniform(0, 1) > 0.5:
                    im = im[:, ::-1]
                    ms = ms[:, ::-1]

            # calculate the percent of covered pixels (for one class) - check if greater than threshold.
            if 1.0 * sm / is2 ** 2 > tr[j]:
                do_we_append = True
            
            # Add "blank ones" with probability 0.1
            elif sm < 500:
                do_we_append = True
                count += 1
                print("Add blank, idx={}, count={}".format(i, count))
            
        if do_we_append:
            x.append(im)
            y.append(ms)

    x, y = 2 * np.transpose(x, (0, 3, 1, 2)) - 1, np.transpose(y, (0, 3, 1, 2))  # Maybe some sort of standardisation/normalisation
    print(x.shape, y.shape, np.amax(x), np.amin(x), np.amax(y), np.amin(y))
    return x, y

In [None]:
 stick_all_train()  # sticking all training images in 5x5x10 grid (i.e. 5*M_pixels x 5*M_pixels x 10 classes)
                    # plus indexes

## Create custom validation set

In [5]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=3000, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 8+3))     # +3 for the 3 indexes (ndvi, savi, evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 9] = savi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 10] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [34]:
# DONE IN BASELINE
stick_all_val()
make_val()

let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0




(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 11)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for validation
Add blank, idx=13, count=1
Add blank, idx=34, count=2
Add blank, idx=46, count=3
Add blank, idx=61, count=4
Add blank, idx=64, count=5
Add blank, idx=65, count=6
Add blank, idx=67, count=7
Add blank, idx=77, count=8
Add blank, idx=82, count=9
Add blank, idx=94, count=10
Add blank, idx=96, count=11
Add blank, idx=98, count=12
Add blank, idx=102, count=13
Add blank, idx=104, count=14
Add blank, idx=109, count=15
Add blank, idx=121, count=16
Add blank, idx=122, count=17
Add blank, idx=125, count=18
Add blank, idx=139, count=19
Add blank, idx=147, count=20
Add blank, idx=150, count=21
Add blank, idx=158, count=22
Add blank, idx=159, count=23
Add blank, idx=174, count=24
Add blank, idx=177, count=25
Add blank, idx=178, count=26
Add blank, idx=181, count=27
Add blank, i

(726, 11, 160, 160) (726, 1, 160, 160) inf -inf 1.0 0.0
Validation data shapes:
(726, 11, 160, 160)
(726, 1, 160, 160)


## Visualise indexes

In [35]:
x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)


In [41]:
import scipy.misc

def save_visual_msk_patch(np_arr, folder, sample_idx=0):
    scipy.misc.imsave("visualise/{}/{}msk.bmp".format(folder, sample_idx), np_arr[sample_idx][0])

def save_visual_img_patch(np_arr, folder, sample_idx=0, band=0):
    scipy.misc.imsave("visualise/{}/{}img_{}.bmp".format(folder, sample_idx, band), np_arr[sample_idx][band])

In [42]:
mkdir_p("visualise/trn")
mkdir_p("visualise/trn/ndvi")
mkdir_p("visualise/trn/savi")
mkdir_p("visualise/trn/evi")

x_trn, y_trn = get_patches(img, msk, amt=6000, aug=False)
for i in range(x_trn.shape[0]):
    save_visual_img_patch(x_trn, "trn", sample_idx=i)
    save_visual_img_patch(x_trn, "trn/ndvi", sample_idx=i, band=8)
    save_visual_img_patch(x_trn, "trn/savi", sample_idx=i, band=9)
    save_visual_img_patch(x_trn, "trn/evi", sample_idx=i, band=10)
    save_visual_msk_patch(y_trn, "trn", sample_idx=i)

print("Training data shapes:")
print(x_trn.shape)
print(y_trn.shape)

Add blank, idx=1, count=1
Add blank, idx=3, count=2
Add blank, idx=20, count=3
Add blank, idx=78, count=4
Add blank, idx=83, count=5
Add blank, idx=96, count=6
Add blank, idx=103, count=7
Add blank, idx=115, count=8
Add blank, idx=179, count=9
Add blank, idx=196, count=10
Add blank, idx=267, count=11
Add blank, idx=303, count=12
Add blank, idx=332, count=13
Add blank, idx=340, count=14
Add blank, idx=362, count=15
Add blank, idx=366, count=16
Add blank, idx=374, count=17
Add blank, idx=419, count=18
Add blank, idx=428, count=19
Add blank, idx=452, count=20
Add blank, idx=518, count=21
Add blank, idx=522, count=22
Add blank, idx=641, count=23
Add blank, idx=698, count=24
Add blank, idx=797, count=25
Add blank, idx=826, count=26
Add blank, idx=881, count=27
Add blank, idx=919, count=28
Add blank, idx=925, count=29
Add blank, idx=932, count=30
Add blank, idx=944, count=31
Add blank, idx=962, count=32
Add blank, idx=1000, count=33
Add blank, idx=1031, count=34
Add blank, idx=1060, count=35

  bytedata = (data - cmin) * scale + low


Training data shapes:
(965, 11, 160, 160)
(965, 1, 160, 160)


In [43]:
mkdir_p("visualise/val")
mkdir_p("visualise/val/ndvi")
mkdir_p("visualise/val/savi")
mkdir_p("visualise/val/evi")

for i in range(x_val.shape[0]):
    save_visual_img_patch(x_val, "val", sample_idx=i)
    save_visual_img_patch(x_val, "val/ndvi", sample_idx=i, band=8)
    save_visual_img_patch(x_val, "val/savi", sample_idx=i, band=9)
    save_visual_img_patch(x_val, "val/evi", sample_idx=i, band=10)
    save_visual_msk_patch(y_val, "val", sample_idx=i)

print("Validation data shapes:")
print(x_val.shape)
print(y_val.shape)

  bytedata = (data - cmin) * scale + low
  bytedata = (data - cmin) * scale + low


Validation data shapes:
(726, 11, 160, 160)
(726, 1, 160, 160)


SAVI and NDVI yield very similar images - indeed the calucation is almost the same. Remove (the more complication/involved) SAVI computation.

Lets review all bands to see if any in particular highlight the trees well.

In [49]:
folder_names = ["trn/0/",
                "trn/1/",
                "trn/2/",
                "trn/3/",
                "trn/4/",
                "trn/5/",
                "trn/6/",
                "trn/7/",
                "trn/ndvi/",
                "trn/savi/",
                "trn/evi/"]

x_trn, y_trn = get_patches(img, msk, amt=6000, aug=False)
for i in range(x_trn.shape[0]):
    for band_idx, folder_name in enumerate(folder_names):
        mkdir_p("visualise/"+folder_name)        
        
        save_visual_img_patch(x_trn, folder_name, sample_idx=i, band=band_idx)
    
print("Training data shapes:")
print(x_trn.shape)
folder_name = "trn/msk/"
mkdir_p("visualise/"+folder_name)        print(y_trn.shape)

Add blank, idx=41, count=1
Add blank, idx=89, count=2
Add blank, idx=115, count=3
Add blank, idx=118, count=4
Add blank, idx=124, count=5
Add blank, idx=138, count=6
Add blank, idx=178, count=7
Add blank, idx=182, count=8
Add blank, idx=223, count=9
Add blank, idx=231, count=10
Add blank, idx=276, count=11
Add blank, idx=296, count=12
Add blank, idx=321, count=13
Add blank, idx=329, count=14
Add blank, idx=342, count=15
Add blank, idx=370, count=16
Add blank, idx=389, count=17
Add blank, idx=475, count=18
Add blank, idx=481, count=19
Add blank, idx=498, count=20
Add blank, idx=535, count=21
Add blank, idx=553, count=22
Add blank, idx=566, count=23
Add blank, idx=590, count=24
Add blank, idx=594, count=25
Add blank, idx=595, count=26
Add blank, idx=616, count=27
Add blank, idx=629, count=28
Add blank, idx=662, count=29
Add blank, idx=734, count=30
Add blank, idx=741, count=31
Add blank, idx=749, count=32
Add blank, idx=771, count=33
Add blank, idx=796, count=34
Add blank, idx=820, count

(933, 11, 160, 160) (933, 1, 160, 160) inf -3901.0 1.0 0.0


  bytedata = (data - cmin) * scale + low


Training data shapes:
(933, 11, 160, 160)
(933, 1, 160, 160)


In [58]:
folder_name = "trn/msk/"
mkdir_p("visualise/"+folder_name)

for i in range(y_trn.shape[0]):
    save_visual_msk_patch(y_trn, folder_name, sample_idx=i)

In [61]:
folder_names = ["val/0/",
                "val/1/",
                "val/2/",
                "val/3/",
                "val/4/",
                "val/5/",
                "val/6/",
                "val/7/",
                "val/ndvi/",
                "val/savi/",
                "val/evi/"]

for i in range(x_val.shape[0]):
    for band_idx, folder_name in enumerate(folder_names):
        mkdir_p("visualise/"+folder_name)        
        
        save_visual_img_patch(x_val, folder_name, sample_idx=i, band=band_idx)

  bytedata = (data - cmin) * scale + low
  bytedata = (data - cmin) * scale + low


In [65]:
mkdir_p("visualise/val/msk/")

for i in range(y_val.shape[0]):        
    save_visual_msk_patch(y_val, "val/msk/", sample_idx=i)

From a random set of images, for distinguishing trees the bands **3,4,5,ndvi,evi** appeared visually best.

Looking at the kensu blog on the competition, RGB and P bands look quite useful for detecting trees also - https://viewer.kensu.io/notebooks/Geotrellis/DSTL-LEARNING.snb?read_only=1. (NOTE: It could be that the RGB was created from the P with pan-sharpening - hence we'll also train a model with just the P.)

## Recreating data for bands of interest

In [6]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=3000, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 5))      # (3,4,5,ndvi,evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))  # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            # Add indexes found visually to be useful
            x[s * i:s * i + s, s * j:s * j + s, 0] = img[:s, :s, 3]
            x[s * i:s * i + s, s * j:s * j + s, 1] = img[:s, :s, 4]
            x[s * i:s * i + s, s * j:s * j + s, 2] = img[:s, :s, 5]
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 3] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 4] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 5))      # (3,4,5,ndvi,evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            # Add indexes found visually to be useful
            x[s * i:s * i + s, s * j:s * j + s, 0] = img[:s, :s, 3]
            x[s * i:s * i + s, s * j:s * j + s, 1] = img[:s, :s, 4]
            x[s * i:s * i + s, s * j:s * j + s, 2] = img[:s, :s, 5]
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 3] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 4] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [87]:
stick_all_train()

stick_all_val()
make_val()

let's stick all imgs together
20
(837, 849, 8) 6010_1_2 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0




(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6090_2_0 1.0 0.0
x shape is:
(4175, 3340, 5)
y shape is:
(4175, 3340, 1)
1.0 0.0
let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0
(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 5)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for validation
Add blank, idx=37, count=1
Add blank, idx=38, count=2
Add blank, idx=39, count=3
Add blank

Add blank, idx=2328, count=269
Add blank, idx=2331, count=270
Add blank, idx=2345, count=271
Add blank, idx=2347, count=272
Add blank, idx=2360, count=273
Add blank, idx=2363, count=274
Add blank, idx=2369, count=275
Add blank, idx=2371, count=276
Add blank, idx=2384, count=277
Add blank, idx=2386, count=278
Add blank, idx=2398, count=279
Add blank, idx=2406, count=280
Add blank, idx=2414, count=281
Add blank, idx=2435, count=282
Add blank, idx=2443, count=283
Add blank, idx=2445, count=284
Add blank, idx=2470, count=285
Add blank, idx=2471, count=286
Add blank, idx=2475, count=287
Add blank, idx=2486, count=288
Add blank, idx=2505, count=289
Add blank, idx=2516, count=290
Add blank, idx=2518, count=291
Add blank, idx=2521, count=292
Add blank, idx=2528, count=293
Add blank, idx=2534, count=294
Add blank, idx=2535, count=295
Add blank, idx=2536, count=296
Add blank, idx=2555, count=297
Add blank, idx=2561, count=298
Add blank, idx=2573, count=299
Add blank, idx=2588, count=300
Add blan

# Training

In [15]:
def jaccard_coef(y_true, y_pred):
    # __author__ = Vladimir Iglovikov
    intersection = K.sum(y_true * y_pred, axis=[0, -1, -2])
    sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])

    jac = (intersection + smooth) / (sum_ - intersection + smooth)

    return K.mean(jac)


def jaccard_coef_int(y_true, y_pred):
    # __author__ = Vladimir Iglovikov
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))

    intersection = K.sum(y_true * y_pred_pos, axis=[0, -1, -2])
    sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return K.mean(jac)


def calc_jacc(model):
    img = np.load(inDir + '/data/x_val_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
    
    print("Validation data shapes:")
    print(img.shape)
    print(msk.shape)


    prd = model.predict(img, batch_size=4)
    print(prd.shape, msk.shape)
    avg, trs = [], []

    for i in range(N_Cls):
        t_msk = msk[:, i, :, :]
        t_prd = prd[:, i, :, :]
        t_msk = t_msk.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])
        t_prd = t_prd.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])

        # grid search the threshold
        m, b_tr = 0, 0
        for j in range(10):
            tr = j / 10.0  # threshold
            pred_binary_mask = t_prd > tr

            jk = jaccard_similarity_score(t_msk, pred_binary_mask)
            if jk > m:
                m = jk
                b_tr = tr

        print(i, m, b_tr)
        avg.append(m)
        trs.append(b_tr)

    score = sum(avg) / 10.0
    return prd, score, trs  # trs is the best threshold value (a list, for each of the 10 classes)

In [70]:
def get_unet_with_indexing():
    inputs = Input((5, ISZ, ISZ))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=1)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=1)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=1)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=1)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(N_Cls, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[conv10])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', 
                  metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
    return model

In [88]:
del x_val
del y_val

x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)

In [89]:
del x_trn
del y_trn

x_trn, y_trn = get_patches(img, msk, amt=6000, aug=False)

model = get_unet_with_indexing()

Add blank, idx=0, count=1
Add blank, idx=30, count=2
Add blank, idx=38, count=3
Add blank, idx=80, count=4
Add blank, idx=107, count=5
Add blank, idx=152, count=6
Add blank, idx=215, count=7
Add blank, idx=240, count=8
Add blank, idx=264, count=9
Add blank, idx=342, count=10
Add blank, idx=370, count=11
Add blank, idx=405, count=12
Add blank, idx=438, count=13
Add blank, idx=453, count=14
Add blank, idx=476, count=15
Add blank, idx=497, count=16
Add blank, idx=505, count=17
Add blank, idx=509, count=18
Add blank, idx=519, count=19
Add blank, idx=520, count=20
Add blank, idx=553, count=21
Add blank, idx=590, count=22
Add blank, idx=611, count=23
Add blank, idx=645, count=24
Add blank, idx=662, count=25
Add blank, idx=710, count=26
Add blank, idx=712, count=27
Add blank, idx=743, count=28
Add blank, idx=752, count=29
Add blank, idx=779, count=30
Add blank, idx=781, count=31
Add blank, idx=793, count=32
Add blank, idx=818, count=33
Add blank, idx=832, count=34
Add blank, idx=840, count=35

(955, 5, 160, 160) (955, 1, 160, 160) inf -3901.0 1.0 0.0


In [90]:
x_trn.shape

(955, 5, 160, 160)

In [92]:
y_trn.shape

(955, 1, 160, 160)

In [91]:
model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Train on 955 samples, validate on 671 samples
Epoch 1/1


<keras.callbacks.History at 0x7eff823d05f8>

In [93]:
prd, score, trs = calc_jacc(model)
print('val jk', score)
print('trs', trs)

Validation data shapes:
(671, 5, 160, 160)
(671, 1, 160, 160)
(671, 1, 160, 160) (671, 1, 160, 160)




0 0.452524217586 0.0
val jk 0.0452524217586
trs [0.0]


In [95]:
now = datetime.now().strftime("%Y%m%d_%H%M%S.h5")
model.save_weights(out_dir+'model_'+now+'.h5')

In [96]:
del x_trn
del y_trn
x_trn, y_trn = get_patches(img, msk, amt=6000, aug=False)

model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Add blank, idx=8, count=1
Add blank, idx=13, count=2
Add blank, idx=28, count=3
Add blank, idx=59, count=4
Add blank, idx=61, count=5
Add blank, idx=78, count=6
Add blank, idx=99, count=7
Add blank, idx=105, count=8
Add blank, idx=127, count=9
Add blank, idx=192, count=10
Add blank, idx=213, count=11
Add blank, idx=231, count=12
Add blank, idx=256, count=13
Add blank, idx=278, count=14
Add blank, idx=283, count=15
Add blank, idx=291, count=16
Add blank, idx=299, count=17
Add blank, idx=301, count=18
Add blank, idx=310, count=19
Add blank, idx=409, count=20
Add blank, idx=414, count=21
Add blank, idx=415, count=22
Add blank, idx=436, count=23
Add blank, idx=535, count=24
Add blank, idx=562, count=25
Add blank, idx=580, count=26
Add blank, idx=599, count=27
Add blank, idx=637, count=28
Add blank, idx=641, count=29
Add blank, idx=675, count=30
Add blank, idx=697, count=31
Add blank, idx=723, count=32
Add blank, idx=728, count=33
Add blank, idx=780, count=34
Add blank, idx=789, count=35
Ad

(933, 5, 160, 160) (933, 1, 160, 160) inf -3901.0 1.0 0.0
Train on 933 samples, validate on 671 samples
Epoch 1/1


<keras.callbacks.History at 0x7eff82390c18>

Hmmmm... not learning anything. Lets visualise our training/val data

In [None]:
print(x_val.shape)
print(y_val.shape)

In [98]:
folder_names = ["val/3/",
                "val/4/",
                "val/5/",
                "val/ndvi/",
                "val/evi/"]

for i in range(x_val.shape[0]):
    for band_idx, folder_name in enumerate(folder_names):
        mkdir_p("visualise/"+folder_name)        
        
        save_visual_img_patch(x_val, folder_name, sample_idx=i, band=band_idx)

  bytedata = (data - cmin) * scale + low
  bytedata = (data - cmin) * scale + low


In [100]:
mkdir_p("visualise/val/msk/")

for i in range(y_val.shape[0]):
    save_visual_img_patch(y_val, "val/msk/", sample_idx=i)

In [101]:
folder_names = ["trn/3/",
                "trn/4/",
                "trn/5/",
                "trn/ndvi/",
                "trn/evi/"]

for i in range(x_trn.shape[0]):
    for band_idx, folder_name in enumerate(folder_names):
        mkdir_p("visualise/"+folder_name)        
        
        save_visual_img_patch(x_trn, folder_name, sample_idx=i, band=band_idx)

  bytedata = (data - cmin) * scale + low


In [102]:
mkdir_p("visualise/trn/msk/")

for i in range(y_trn.shape[0]):
    save_visual_img_patch(y_trn, "trn/msk/", sample_idx=i)

Nothing out of the ordinary - let's add all bands back + indexes

# All bands + indexes

In [103]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=3000, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 10))      # (8 + ndvi + evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))  # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            # Add indexes found visually to be useful          
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s]

            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 9] = evi[:s, :s]
            
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 10))      # (6+ ndvi + evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            # Add indexes found visually to be useful
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s]

            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]
            x[s * i:s * i + s, s * j:s * j + s, 9] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [104]:
stick_all_train()

stick_all_val()
make_val()

let's stick all imgs together
20
(837, 849, 8) 6010_1_2 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0




(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6090_2_0 1.0 0.0
x shape is:
(4175, 3340, 10)
y shape is:
(4175, 3340, 1)
1.0 0.0
let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0
(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 10)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for validation
Add blank, idx=4, count=1
Add blank, idx=6, count=2
Add blank, idx=7, count=3
Add blank,

(731, 10, 160, 160) (731, 1, 160, 160) inf -inf 1.0 0.0
Validation data shapes:
(731, 10, 160, 160)
(731, 1, 160, 160)


In [105]:
def get_unet_with_indexing_v2():
    inputs = Input((10, ISZ, ISZ))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=1)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=1)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=1)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=1)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(N_Cls, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[conv10])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', 
                  metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
    return model

In [108]:
del x_val
del y_val
del x_trn
del y_trn
del img
del msk

x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)

model = get_unet_with_indexing_v2()

In [109]:
print(x_val.shape)
print(y_val.shape)

(731, 10, 160, 160)
(731, 1, 160, 160)


In [112]:
x_trn, y_trn = get_patches(img, msk, amt=6000, aug=False)

print(x_trn.shape)
print(y_trn.shape)

model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Add blank, idx=7, count=1
Add blank, idx=62, count=2
Add blank, idx=72, count=3
Add blank, idx=141, count=4
Add blank, idx=151, count=5
Add blank, idx=158, count=6
Add blank, idx=173, count=7
Add blank, idx=199, count=8
Add blank, idx=204, count=9
Add blank, idx=220, count=10
Add blank, idx=252, count=11
Add blank, idx=254, count=12
Add blank, idx=255, count=13
Add blank, idx=264, count=14
Add blank, idx=307, count=15
Add blank, idx=317, count=16
Add blank, idx=367, count=17
Add blank, idx=385, count=18
Add blank, idx=396, count=19
Add blank, idx=437, count=20
Add blank, idx=453, count=21
Add blank, idx=460, count=22
Add blank, idx=486, count=23
Add blank, idx=517, count=24
Add blank, idx=564, count=25
Add blank, idx=578, count=26
Add blank, idx=587, count=27
Add blank, idx=590, count=28
Add blank, idx=614, count=29
Add blank, idx=637, count=30
Add blank, idx=743, count=31
Add blank, idx=779, count=32
Add blank, idx=820, count=33
Add blank, idx=840, count=34
Add blank, idx=847, count=3

<keras.callbacks.History at 0x7eff80439588>

In [110]:
now = datetime.now().strftime("%Y%m%d_%H%M%S.h5")
model.save_weights(out_dir+'model_'+now+'.h5')

Hmmm... is there a bug - let's check by going back to square 1.

# Back to square 1

Let's also reduce the number of patches per epoch to reduce runtime (i.e. `amt=300` for val and `600` for trn) - also don't regenerate new training data each epoch ("let's overfit this small sample" - JH).

In [7]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=300, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 8))      # (8 + ndvi + evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))  # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :] = img[:s, :s, :] # DOES img[:s, :s] WORK?           
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 8))      # (6+ ndvi + evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :] = img[:s, :s, :] # DOES img[:s, :s] WORK?           
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [114]:
stick_all_train()

stick_all_val()
make_val()

let's stick all imgs together
20
(837, 849, 8) 6010_1_2 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6090_2_0 1.0 0.0
x shape is:
(4175, 3340, 8)
y shape is:
(4175, 3340, 1)
1.0 0.0
let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0
(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 8)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for valida

In [115]:
def get_unet():
    inputs = Input((8, ISZ, ISZ))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=1)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=1)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=1)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=1)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(N_Cls, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[conv10])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', 
                  metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
    return model

In [116]:
x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)

model = get_unet()

DON'T regenerate new training data each epoch ("let's overfit this small sample" - JH). 
AND smaller num_samples

In [117]:
del x_trn
del y_trn
x_trn, y_trn = get_patches(img, msk, amt=600, aug=False)

Add blank, idx=1, count=1
Add blank, idx=95, count=2
Add blank, idx=101, count=3
Add blank, idx=104, count=4
Add blank, idx=110, count=5
Add blank, idx=113, count=6
Add blank, idx=173, count=7
Add blank, idx=176, count=8
Add blank, idx=204, count=9
Add blank, idx=270, count=10
Add blank, idx=283, count=11
Add blank, idx=375, count=12
Add blank, idx=445, count=13
Add blank, idx=467, count=14
Add blank, idx=538, count=15
Add blank, idx=560, count=16
Add blank, idx=562, count=17
Add blank, idx=573, count=18
Add blank, idx=593, count=19
(98, 8, 160, 160) (98, 1, 160, 160) 1.0 -1.0 1.0 0.0


In [118]:
model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Train on 98 samples, validate on 69 samples
Epoch 1/1


<keras.callbacks.History at 0x7eff6b508748>

# M + NDVI

Same again (i.e. no new training data each epoch AND smaller num_samples)...

In [18]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=300, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 8+1))      # (8 + ndvi + evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))  # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]   
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 8+1))      # (6+ ndvi + evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = ndvi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [19]:
stick_all_train()

stick_all_val()
make_val()

let's stick all imgs together
20
(837, 849, 8) 6010_1_2 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0




(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6090_2_0 1.0 0.0
x shape is:
(4175, 3340, 9)
y shape is:
(4175, 3340, 1)
1.0 0.0
let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0
(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 9)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for validation
Add blank, idx=2, count=1
Add blank, idx=10, count=2
Add blank, idx=14, count=3
Add blank,

In [20]:
def get_unet_M_NDVI():
    inputs = Input((8+1, ISZ, ISZ))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=1)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=1)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=1)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=1)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(N_Cls, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[conv10])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', 
                  metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
    return model

In [21]:
x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)

model = get_unet_M_NDVI()

In [23]:
del x_trn
del y_trn
x_trn, y_trn = get_patches(img, msk, amt=600, aug=False)

Add blank, idx=5, count=1
Add blank, idx=11, count=2
Add blank, idx=12, count=3
Add blank, idx=33, count=4
Add blank, idx=42, count=5
Add blank, idx=66, count=6
Add blank, idx=74, count=7
Add blank, idx=121, count=8
Add blank, idx=235, count=9
Add blank, idx=238, count=10
Add blank, idx=310, count=11
Add blank, idx=327, count=12
Add blank, idx=346, count=13
Add blank, idx=406, count=14
Add blank, idx=436, count=15
Add blank, idx=455, count=16
Add blank, idx=472, count=17
Add blank, idx=474, count=18
Add blank, idx=478, count=19
Add blank, idx=483, count=20
Add blank, idx=519, count=21
Add blank, idx=544, count=22
Add blank, idx=561, count=23
(84, 9, 160, 160) (84, 1, 160, 160) 1.0 -1.0 1.0 0.0


In [24]:
model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Train on 84 samples, validate on 71 samples
Epoch 1/1


<keras.callbacks.History at 0x7fa322048198>

Hmmm... Maybe EVI is breaking it:

# M + EVI

Same again (i.e. no new training data each epoch AND smaller num_samples) + EVI...

In [26]:
def make_val(aug=False):
    print("let's pick some samples for validation")
    img = np.load(inDir + '/data/x_valALL_%d.npy' % N_Cls)
    msk = np.load(inDir + '/data/y_valALL_%d.npy' % N_Cls)
    x, y = get_patches(img, msk, amt=300, aug=aug)  # amt := attempt (maybe - i.e. attempting to create 3000 patches, without
                                            # percentage area cover threshold.

    print("Validation data shapes:")
    print(x.shape)
    print(y.shape)

    np.save(inDir + '/data/x_val_%d' % N_Cls, x)
    np.save(inDir + '/data/y_val_%d' % N_Cls, y)
    
    
def stick_all_train():
    print("let's stick all imgs together")
    s = 835  # size of the M images (roughly)

    # 25 training images grid
    x = np.zeros((5 * s, 4 * s, 8+1))      # (8 + ndvi + evi)
    y = np.zeros((5 * s, 4 * s, N_Cls))  # axis=2 denote the class label

    ids = sorted(DF.ImageId.unique())
    print(len(ids))
    
    
    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(4):
            id = ids[1 * i + j]

            img = M(id)
        
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = evi[:s, :s]   
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1)[:s, :s]
    
    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))
    
    np.save(inDir + '/data/x_trn_%d' % N_Cls, x)
    np.save(inDir + '/data/y_trn_%d' % N_Cls, y)
    
    
def stick_all_val():
    print("let's stick all imgs together")
    s = 835  # size of the M images

    # 25 training images grid
    x = np.zeros((5 * s, 1 * s, 8+1))      # (6+ ndvi + evi)
    y = np.zeros((5 * s, 1 * s, N_Cls))   # axis=2 denote the class label

    ids = sorted(val_DF.ImageId.unique())
    print(len(ids))

    # x start grid position (based on image size)
    for i in range(5):
        # y start grid position (based on image size)
        for j in range(1):
            id = ids[1 * i + j]

            img = M(id)
            
            img = stretch_n(img)
            print(img.shape, id, np.amax(img), np.amin(img))
            
            x[s * i:s * i + s, s * j:s * j + s, :8] = img[:s, :s, :]
            
            # Add indexes from above
            ndvi, savi, evi = create_indexes(id)
            x[s * i:s * i + s, s * j:s * j + s, 8] = evi[:s, :s]
            
            # generate training masks by class
            for z in range(N_Cls):
                y[s * i:s * i + s, s * j:s * j + s, z] = generate_mask_for_image_and_class(
                    (img.shape[0], img.shape[1]), id, z + 1, wkt_list_pandas=val_DF)[:s, :s]

    print("x shape is:")
    print(x.shape)
    print("y shape is:")
    print(y.shape)

    print(np.amax(y), np.amin(y))

    np.save(inDir + '/data/x_valALL_%d' % N_Cls, x)
    np.save(inDir + '/data/y_valALL_%d' % N_Cls, y)

In [30]:
stick_all_train()

stick_all_val()
make_val()

let's stick all imgs together
20
(837, 849, 8) 6010_1_2 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0




(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 849, 8) 6010_4_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 848, 8) 6040_1_0 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 848, 8) 6040_1_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6040_2_2 1.0 0.0
(837, 846, 8) 6040_4_4 1.0 0.0
(837, 851, 8) 6060_2_3 1.0 0.0
(837, 848, 8) 6090_2_0 1.0 0.0
x shape is:
(4175, 3340, 9)
y shape is:
(4175, 3340, 1)
1.0 0.0
let's stick all imgs together
5
(837, 848, 8) 6010_4_4 1.0 0.0
(838, 835, 8) 6070_2_3 1.0 0.0
(837, 848, 8) 6100_2_3 1.0 0.0
(837, 849, 8) 6110_4_0 1.0 0.0
(837, 849, 8) 6140_1_2 1.0 0.0
x shape is:
(4175, 835, 9)
y shape is:
(4175, 835, 1)
1.0 0.0
let's pick some samples for validation
Add blank, idx=1, count=1
Add blank, idx=36, count=2
Add blank, idx=47, count=3
Add blank,

In [31]:
def get_unet_M_EVI():
    inputs = Input((8+1, ISZ, ISZ))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=1)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=1)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=1)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=1)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(N_Cls, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[conv10])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', 
                  metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
    return model

In [32]:
x_val, y_val = np.load(inDir + '/data/x_val_%d.npy' % N_Cls), np.load(inDir + '/data/y_val_%d.npy' % N_Cls)
img = np.load(inDir + '/data/x_trn_%d.npy' % N_Cls)
msk = np.load(inDir + '/data/y_trn_%d.npy' % N_Cls)

model = get_unet_M_EVI()

In [None]:
del x_trn
del y_trn
x_trn, y_trn = get_patches(img, msk, amt=600, aug=False)

In [34]:
model.fit(x_trn, y_trn, batch_size=64, epochs=1, verbose=1, shuffle=True,
          validation_data=(x_val, y_val))

Train on 106 samples, validate on 76 samples
Epoch 1/1


<keras.callbacks.History at 0x7fa2ec1c0b00>

# BINGO - EVI is the coporate

Lets start a clean notebook.