In [1]:
import pandas as pd
import numpy as np
import tifffile as tif
import cv2
import matplotlib.pyplot as plt

from importlib import reload
import os, gc, glob

from shapely import wkt
from skimage import measure, morphology

import utils
import global_vars

### Outline
* load prediction masks
* threshold the masks 
    * find decent thresholds for each class, using train data and labels
* generate wkt
    * turn masks into polygons
    * turn pollygons into wkt
* write each wkt to a file

In [2]:
im_names = glob.glob(os.path.join(global_vars.DATA_DIR, 'bin_masks', 'train_16x16','*.tif'))
label_list = list()
train_pred_list = list()

for name in im_names:
    prediction = tif.imread(name)
    train_pred_list.append(prediction[:,:,1:])
    shape = prediction.shape[:2]
    label_list.append(utils.load_all_lab(name.split('/')[-1].split('.')[0], 835))

In [3]:
len(train_pred_list)

25

In [4]:
thresholds = list()

for i in range(8):
    best_threshold = utils.find_thresh(train_pred_list, label_list,i)
    thresholds.append(best_threshold)

In [5]:
#this method is based on solution posted in forum for differentiating river from standing water. 
def get_standing_river(im_mask, water_ind = -2):
    empty = np.zeros(im_mask.shape[:2])
    if im_mask[:,:,water_ind].sum() > 50000:
        return im_mask[:,:,-2], empty
    else:
        return empty, im_mask[:,:,-2]

def remove_large(train_mask, limit):
    tl = measure.regionprops(measure.label(train_mask))
    for i in tl:
        if i.area > limit:
            c = i.coords
            for k in range(c.shape[0]):
                train_mask[c[k,0], c[k,1]] = 0
    return train_mask

def make_car_truck(vehicles):
    car = remove_large(vehicles.copy(),200)
    car = morphology.remove_small_objects(car, 12)
    
    truck = remove_large(vehicles.copy(),800)
    truck = morphology.remove_small_objects(truck, 30)
    return car, truck

In [19]:
thresholds

[0.5, 0.25, 0.6, 0.25, 0.4, 0.7, 0.7, 0.1]

In [20]:
files = glob.glob(os.path.join(global_vars.DATA_DIR, 'bin_masks', 'test_16x16','*.tif'))

for file in files:
    preds = tif.imread(file)
    new_preds = np.zeros(list(preds.shape[:2])+[8])
    for i in range(8):
        new_preds[:,:,i] = preds[:,:,i+1] > thresholds[i]
        
    new_preds = new_preds.astype(np.uint8)
    preds = new_preds.copy()
    
    new_preds = np.zeros((list(preds.shape[:2])+[10]), dtype=bool)
    new_preds[:,:,:6] = preds[:,:,:6]
    river, water = get_standing_river(preds.copy())
    new_preds[:,:,6] = river
    new_preds[:,:,7] = water
    
    car, truck = make_car_truck(preds[:,:,-1])
    
    new_preds[:,:,8] = car
    new_preds[:,:,9] = truck
    
    pred_df = utils.simple_to_wkt(new_preds, file[-12:-4], list(range(1,11)))
    tmp_path = os.path.join(global_vars.DATA_DIR,'submissions','subm16x16',file[-12:-4]+ '_wkt.csv') 
    pred_df.to_csv(tmp_path)
    print(file)

/media/d/ssd2/dstl/bin_masks/test_16x16/6100_0_0.tif


  warn("Only one label was provided to `remove_small_objects`. "


/media/d/ssd2/dstl/bin_masks/test_16x16/6110_2_3.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6010_1_3.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6180_1_2.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6150_4_2.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6010_0_2.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6080_1_3.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6080_4_2.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6040_2_0.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6130_2_3.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6010_3_1.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6160_4_0.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6070_3_0.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6020_0_4.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6100_1_1.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6170_0_0.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6050_1_0.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6110_4_1.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6150_1_4.tif
/media/d/ssd2/dstl/bin_masks/test_16x16/6070_1