In [None]:
# env setup
from tqdm import tqdm
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [26]:
# seed random

import numpy as np
import random

try:
    RND
except NameError:
    RND = 123

np.random.seed(RND)
random.seed(RND)

In [14]:
# create output dir
try:
    if not os.path.isdir(OUT_DIR): os.makedirs(OUT_DIR)
except NameError:
    pass

In [None]:
# create tensorboard logs dir
try:
    if not os.path.isdir(TENSORBOARD_DIR): 
        os.makedirs(TENSORBOARD_DIR)
        print('Created ', TENSORBOARD_DIR)
except NameError:
    pass

In [1]:
# create models checkpoint dir
try:
    if not os.path.isdir(MODEL_CHECKPOINT_DIR):
        os.makedirs(MODEL_CHECKPOINT_DIR)
        print('Created ', MODEL_CHECKPOINT_DIR)
except NameError:
    print('MODEL_CHECKPOINT_DIR not defined ')
    pass

In [2]:
# reading geotiff

import rasterio, warnings, numpy as np

def read_geotiff(path):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        with rasterio.open(path) as src:
            b, g, r, nir = src.read()
            return np.dstack([r, g, b, nir])

In [None]:
# list files in directory

import os

def list_files(src_dir):
    f = []
    for (dirpath, dirnames, filenames) in os.walk(src_dir):
        f.extend(filenames)
        break
    return f

In [None]:
# display model as svg
def model_as_svg(model):
    from IPython.display import SVG
    from keras.utils.vis_utils import model_to_dot
    return SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [9]:
# data generation
def generate_batch(n_samples,
                   batch_index,
                   X_files,
                   Y_tags_misc,
                   Y_tags_weather,
                   images_dir,
                   image_shape):

    Xs = np.zeros((n_samples, *image_shape), dtype=np.float32)

    batches_in_X = int(len(X_files) / n_samples)
    batch_index %= batches_in_X
    
    X_files_batch = X_files[batch_index * n_samples:
                            batch_index * n_samples + n_samples]
    
#     with open('/tmp/bi', 'w') as ff:
#         ff.write(str(batch_index * n_samples))
    
    i = 0
    for f in X_files_batch:
        Xs[i] = np.fromfile(
            images_dir + '/' + f, dtype=np.float32).reshape(image_shape)
        i += 1
        
    if Y_tags_misc is None or Y_tags_weather is None:
        
        return Xs
        
    else:

        Y_tags_misc_batch = Y_tags_misc[batch_index * n_samples:
                                        batch_index * n_samples + n_samples]

        Y_tags_weather_batch = Y_tags_weather[batch_index * n_samples:
                                              batch_index * n_samples + n_samples]

        return (Xs, {
            'tags_misc': Y_tags_misc_batch,
            'tags_weather': Y_tags_weather_batch
        })

In [None]:
# data generation

# 0 agriculture
# 1 artisinal_mine
# 2 bare_ground
# 3 blooming
# 4 blow_down
# 5 conventional_mine
# 6 cultivation
# 7 habitation
# 8 primary
# 9 road
# 10 selective_logging
# 11 slash_burn
# 12 water


def generate_batch_2(n_samples, batch_index, X_files, Y_tags_agriculture=None,
                     Y_tags_artisinal_mine=None, Y_tags_bare_ground=None,
                     Y_tags_blooming=None, Y_tags_blow_down=None,
                     Y_tags_conventional_mine=None, Y_tags_cultivation=None,
                     Y_tags_habitation=None, Y_tags_primary=None, Y_tags_road=None,
                     Y_tags_selective_logging=None, Y_tags_slash_burn=None, Y_tags_water=None,
                     Y_tags_weather=None, images_dir=None, image_shape=None):

    Xs = np.zeros((n_samples, *image_shape), dtype=np.float32)

    batches_in_X = int(len(X_files) / n_samples)
    batch_index %= batches_in_X

    i1 = batch_index * n_samples
    i2 = i1 + n_samples

    X_files_batch = X_files[i1:i2]
    i = 0
    for f in X_files_batch:
        Xs[i] = np.fromfile(
            images_dir + '/' + f, dtype=np.float32).reshape(image_shape)
        i += 1

    if Y_tags_agriculture is None:

        return Xs

    else:

        Y_tags_agriculture_batch = Y_tags_agriculture[i1:i2]
        Y_tags_artisinal_mine_batch = Y_tags_artisinal_mine[i1:i2]
        Y_tags_bare_ground_batch = Y_tags_bare_ground[i1:i2]
        Y_tags_blooming_batch = Y_tags_blooming[i1:i2]
        Y_tags_blow_down_batch = Y_tags_blow_down[i1:i2]
        Y_tags_conventional_mine_batch = Y_tags_conventional_mine[i1:i2]
        Y_tags_cultivation_batch = Y_tags_cultivation[i1:i2]
        Y_tags_habitation_batch = Y_tags_habitation[i1:i2]
        Y_tags_primary_batch = Y_tags_primary[i1:i2]
        Y_tags_road_batch = Y_tags_road[i1:i2]
        Y_tags_selective_logging_batch = Y_tags_selective_logging[i1:i2]
        Y_tags_slash_burn_batch = Y_tags_slash_burn[i1:i2]
        Y_tags_water_batch = Y_tags_water[i1:i2]
        Y_tags_weather_batch = Y_tags_weather[i1:i2]

        return (Xs, {
            'agriculture': Y_tags_agriculture_batch,
            'artisinal_mine': Y_tags_artisinal_mine_batch,
            'bare_ground': Y_tags_bare_ground_batch,
            'blooming': Y_tags_blooming_batch,
            'blow_down': Y_tags_blow_down_batch,
            'conventional_mine': Y_tags_conventional_mine_batch,
            'cultivation': Y_tags_cultivation_batch,
            'habitation': Y_tags_habitation_batch,
            'primary': Y_tags_primary_batch,
            'road': Y_tags_road_batch,
            'selective_logging': Y_tags_selective_logging_batch,
            'slash_burn': Y_tags_slash_burn_batch,
            'water': Y_tags_water_batch,
            'weather': Y_tags_weather_batch
        })

In [2]:
# map actual files to csv provided

import re
import pandas as pd

test_mapping = pd.read_csv('/datasets/kaggle/planet/test_v2_file_mapping.csv')

def map_file_v2(orig):
    
    if not re.match('^test_', orig) is None:
        
        base = re.findall('^test_\\d+', orig)[0]
        return (base + '.tif', base + '.jpg')
        
    else:

        fid = re.findall('\\d+', orig)[0]
        d = test_mapping[test_mapping['old'] == ('file_' + fid + '.tif')]
        if len(d) == 0: fid_new = fid
        else: fid_new = re.findall('\\d+', d['new'].values[0])[0]
        return ('file_' + fid_new + '.tif', 'test_' + fid_new + '.jpg')

In [7]:
import numpy as np
from keras.preprocessing.image import random_rotation, random_zoom, random_shear, random_shift, flip_axis

# TODO: add flip


def random_transform(
        img,
        debug=True,
        zoom_prob=0.33,
        zoom_range=(0.75, 1.25),
        rotation_prob=0.33,
        rotation_degs=30,
        shear_prob=0.33,
        shear_factor=np.pi / 816,  # of w
        shift_prob=0.33,
        shift_factors=(0.25, 0.25),
        flip_v_prob=0.33,
        flip_h_prob=0.33,
        fill_mode='reflect'):

    if np.random.random() < zoom_prob:
        img = random_zoom(
            img,
            zoom_range,
            row_axis=0,
            col_axis=1,
            channel_axis=2,
            fill_mode=fill_mode)
        if debug: print('Random zoom applied', zoom_range)

    if np.random.random() < rotation_prob:
        img = random_rotation(
            img,
            rotation_degs,
            row_axis=0,
            col_axis=1,
            channel_axis=2,
            fill_mode=fill_mode)
        if debug: print('Random rotation applied', rotation_degs)

    if np.random.random() < shear_prob:
        img = random_shear(
            img,
            shear_factor,
            row_axis=0,
            col_axis=1,
            channel_axis=2,
            fill_mode=fill_mode)
        if debug: print('Random shear applied', shear_factor)

    if np.random.random() < shift_prob:
        img = random_shift(
            img,
            shift_factors[0],
            shift_factors[1],
            row_axis=0,
            col_axis=1,
            channel_axis=2,
            fill_mode=fill_mode)
        if debug: print('Random shift applied', shift_factors)

    if np.random.random() < flip_v_prob:
        img = flip_axis(img, axis=0)
        if debug: print('V axis flipped')

    if np.random.random() < flip_h_prob:
        img = flip_axis(img, axis=1)
        if debug: print('H axis flipped')

    return img