In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from shapely.wkt import loads as wkt_loads
import tifffile as tiff
import os
import random
from shapely.wkt import loads           
from matplotlib.patches import Polygon, Patch
from shapely.geometry import MultiPolygon, Polygon
import shapely.wkt
import shapely.affinity
from sklearn.metrics import jaccard_score
from collections import defaultdict
import tensorflow as tf
from tifffile import imwrite

import warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
num_class = 10
DF = pd.read_csv('train_wkt_v4/train_wkt_v4.csv')
GS = pd.read_csv('grid_sizes/grid_sizes.csv', names=['ImageId', 'Xmax', 'Ymin'], skiprows=1)
SB = pd.read_csv(os.path.join('sample_submission/sample_submission.csv'))
size = 160
smooth = 1e-12

In [3]:
DF = pd.read_csv('train_wkt_v4/train_wkt_v4.csv')
DF.head()

Unnamed: 0,ImageId,ClassType,MultipolygonWKT
0,6040_2_2,1,MULTIPOLYGON EMPTY
1,6040_2_2,2,MULTIPOLYGON EMPTY
2,6040_2_2,3,MULTIPOLYGON EMPTY
3,6040_2_2,4,MULTIPOLYGON (((0.003025 -0.007879000000000001...
4,6040_2_2,5,MULTIPOLYGON (((0.005311 -0.009044999999999999...


In [4]:
def convert_coordinate(coords, img_size, xy_max):
    '''
    converting images to given range
    '''
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    Xmax, Ymin = xy_max               #Xmax and Ymin form gridsize dataframe
    Height, Weight = img_size            # image sizes
    W = 1.0 * Weight * Weight / (Weight + 1)  #from kaggle tutorial
    H = 1.0 * Height * Height / (Height + 1)
    xf = W / Xmax
    yf = H / Ymin
    coords[:, 1] *= yf
    coords[:, 0] *= xf
    coords_int = np.round(coords).astype(np.int32)
    return coords_int

In [5]:
def get_xmax_ymin(GS, imageId):
    '''
    getting x_max and y_min from grid_size dataframe
    ''' 
    xmax, ymin = GS[GS.ImageId == imageId].iloc[0, 1:].astype(float)
    return (xmax, ymin)

In [6]:
def get_listof_polygons(wkt_list, imageId, cType):
    '''
    given multipolygonWKT getting list of all polygons
    '''
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    df_image = wkt_list[wkt_list.ImageId == imageId]
    multipoly_def = df_image[df_image.ClassType == cType].MultipolygonWKT
    polygonList = None
    if len(multipoly_def) > 0:
        assert len(multipoly_def) == 1
        polygonList = loads(multipoly_def.values[0])
    return polygonList

In [7]:
def get_and_convert_contours(polygonList, img_size, xy_coods):
    '''
    get the countour of polygons
    '''
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    perim_list = []
    interior_list = []
    if polygonList is None:
        return None
    for k in range(len(polygonList)):
        poly = polygonList[k]
        perim = np.array(list(poly.exterior.coords))
        perim_c = convert_coordinate(perim, img_size, xy_coods)
        perim_list.append(perim_c)
        for pi in poly.interiors:
            interior = np.array(list(pi.coords))
            interior_c = convert_coordinate(interior, img_size, xy_coods)
            interior_list.append(interior_c)
    return perim_list, interior_list

In [8]:
import cv2
def plot_mask_from_contours(img_size, contours):
    '''
    plot by filling all polygon contours
    '''
    # https://www.geeksforgeeks.org/draw-a-filled-polygon-using-the-opencv-function-fillpoly/
    # https://www.kaggle.com/visoft/dstl-satellite-imagery-feature-detection/export-pixel-wise-mask
    img_mask = np.zeros(img_size, np.uint8)
    if contours is None:
        return img_mask
    perim_list, interior_list = contours
    cv2.fillPoly(img_mask, perim_list,1)    
    cv2.fillPoly(img_mask, interior_list,0)
    return img_mask

In [9]:
def generate_mask_for_image_and_class(img_size, imageId, class_type, GS=GS, wkt_list=DF):
    '''
    generate mask by filling the counters
    '''
    
    '''
    combining all above functions
    '''
    xy_max = get_xmax_ymin(GS, imageId)
    polygon_list = get_listof_polygons(wkt_list, imageId, class_type)
    contours = get_and_convert_contours(polygon_list, img_size, xy_max)
    mask = plot_mask_from_contours(img_size, contours)
    return mask

In [10]:
def band_8(image_id, size=800):
    '''
    getting 8 band images i.e M Bands
    '''
    filename = "sixteen_band/{}_M.tif".format(image_id)
    img = tiff.imread(filename)
    img = np.rollaxis(img, 0, 3)
    return img

In [11]:
def stretch_n(img, lower_percent=5, higher_percent=95):    
    '''
    adjusting the contrast of images and getting values in a range
    '''
    # https://www.kaggle.com/aamaia/rgb-using-m-bands-example
    out = np.zeros_like(img, dtype=np.float32)
    n = img.shape[2]
    for i in range(n):
        a = 0       # np.min(img)
        b = 1       # np.max(img)
        c = np.percentile(img[:, :, i], lower_percent)
        d = np.percentile(img[:, :, i], higher_percent)
        t = a + (img[:, :, i] - c) * (b - a) / (d - c)
        t[t < a] = a
        t[t > b] = b
        out[:, :, i] = t
    return out.astype(np.float32)

In [13]:
from tqdm import tqdm

image_ids = sorted(DF.ImageId.unique())
for img_id in tqdm(image_ids):
    img = stretch_n(band_8(img_id))
    y = np.zeros((img.shape[0], img.shape[1],10))
    for ele in range(10):
        y[:,:,ele] = generate_mask_for_image_and_class((img.shape[0], img.shape[1]), img_id, ele+1)
    imwrite('masks/'+img_id+'.tif',y)

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:37<00:00,  1.51s/it]


In [14]:
from patchify import patchify

image_ids = sorted(DF.ImageId.unique())
x, y = [], []
for img_id in tqdm(image_ids):
    img = stretch_n(band_8(img_id))
    mask = tiff.imread('masks/'+img_id+'.tif')
    patch_img = patchify(img,(160,160,8),step=60)  #smaller images with dimension (160,160,8)
    patch_mask = patchify(mask,(160,160,10),step=60) #smaller masks with dimension (160,160,8)
    
    for i in range(patch_img.shape[0]):
        for j in range(patch_img.shape[1]):
            #appending all patches
            x.append(patch_img[i,j,0,:,:].astype(np.float16)) 
            y.append(patch_mask[i,j,0,:,:].astype(np.float16))
               
x, y = 2 * np.transpose(x, (0, 3, 1, 2)) - 1, np.transpose(y, (0, 3, 1, 2))
print(x.shape, y.shape, np.amax(x), np.amin(x), np.amax(y), np.amin(y))

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:12<00:00,  1.95it/s]


(3600, 8, 160, 160) (3600, 10, 160, 160) 1.0 -1.0 1.0 0.0


In [15]:
np.save('data/all_images',np.array(x))
np.save('data/all_masks',np.array(y))