<a href="https://colab.research.google.com/github/heromiya/ocha_ai/blob/master/ocha_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf ocha_ai
!git clone https://github.com/heromiya/ocha_ai.git

In [None]:
!pip install pillow keras==2.3.1 tensorflow-gpu==1.15 numpy pandas segmentation-models

In [None]:
import os, shutil, tempfile, random, glob, subprocess, errno, datetime, numpy as np, matplotlib.pyplot as plt
import tensorflow as tf, keras

from PIL import Image
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Input, Conv2DTranspose, Concatenate, BatchNormalization, UpSampling2D
from keras.layers import  Dropout, Activation
from keras.optimizers import Adam, SGD
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras import backend as K
from keras.utils import plot_model
from random import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import multi_gpu_model
from tensorflow.python.client import device_lib
import sys
import pandas

#pretrained_weights = sys.argv[1]
#if pretrained_weights == 'None':
#    pretrained_weights = None

n_gpus = len([x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU'])


In [None]:
# Directoly/folder prepration
root_folder = 'ocha_ai/'

training_ann_ras = root_folder + 'ann_ras/'
training_img = root_folder + 'img/'
test_img = root_folder + 'test_img/'

#patch_root = tempfile.mkdtemp()
patch_root = root_folder + '/patch/'
#os.makedirs(patch_root, exist_ok=True)
patch_ann = patch_root + '/patch_ann/'
patch_img = patch_root + '/patch_img/'
patch_pred = patch_root + '/patch_pred/'

model_folder = root_folder + '/model/'

test_results = root_folder + '/test_results/'
timestamp =  str(datetime.datetime.now())
log_d = root_folder + "logs/" + timestamp + '/'
os.makedirs(log_d, exist_ok=True) 

os.makedirs(training_ann_ras, exist_ok=True)
os.makedirs(patch_ann, exist_ok=True)
os.makedirs(patch_img, exist_ok=True)
os.makedirs(model_folder, exist_ok=True)
os.makedirs(test_results, exist_ok=True)

In [None]:
patch_size = 256 

batch_size = 512 # 256
learning_rate = 0.001 # 0.00003 # 0.001 is a default of Adam in Keras.

# Paramters for patch generation by gen_training_patch.sh
n_patch = 100 # Too large number sometimes fails in training. More thab 100 is preferred.
n_epoch = 100  # preferably >= 100 for operational model.


In [None]:
#os.system("bash -x "+root_folder+"/gen_training_patch.sh " + training_ann_ras + " " + training_img + " " + patch_ann + " " + patch_img + " " + str(patch_size) + " " + str(n_patch))
#!bash -x ocha_ai/gen_training_patch.sh " + training_ann_ras + " " + training_img + " " + patch_ann + " " + patch_img + " " + str(patch_size) + " " + str(n_patch))
%%script env MASKDIR="$training_ann_ras" RASDIR="$training_img" PATMASKDIR="$patch_ann" PATRASDIR="$patch_img" PATCH_SIZE="$patch_size" N_PATCH="$n_patch" bash

echo "### BEGIN $0 $(date +'%F_%T')"

function gen_patch() {
    TIF=$1
    IFS=' '
    
    JSON=$(gdalinfo -json "$TIF")
    SIZE=($(echo $JSON | python3 -c "import sys, json; print(json.load(sys.stdin)['size'])" | tr -d [],))
    X_SIZE=${SIZE[0]}
    Y_SIZE=${SIZE[1]}
    MASK_TIF=$MASKDIR/$(basename "$TIF" | sed 's/\(\.[a-zA-Z]\{3\}\)$/-a\1/g')
    
    upperRight=($(echo $JSON | python3 -c "import sys, json; print(json.load(sys.stdin)['cornerCoordinates']['upperRight'])" | tr -d [],))
    lowerLeft=($(echo $JSON | python3 -c "import sys, json; print(json.load(sys.stdin)['cornerCoordinates']['lowerLeft'])" | tr -d [],))
    #geoTransform=($(echo $JSON | python3 -c "import sys, json; print(json.load(sys.stdin)['geoTransform'])" | tr -d [],))
    
    #PIXEL_SIZE_X=${geoTransform[1]}
    #PIXEL_SIZE_Y=$(echo ${geoTransform[5]} | tr -d "-")
    PIXEL_SIZE_X=1
    PIXEL_SIZE_Y=-1
    PATCH_SIZE_GX=$(perl -e "print $PATCH_SIZE * $PIXEL_SIZE_X")
    PATCH_SIZE_GY=$(perl -e "print $PATCH_SIZE * $PIXEL_SIZE_Y")
    IMG_EXT="${lowerLeft[0]} ${lowerLeft[1]} ${upperRight[0]} ${upperRight[1]}"
       
    # Patches for non-buildings
    j=1
    while [ $j -le $N_PATCH ]; do
        PATCH_XMIN=$(perl -e "print ${lowerLeft[0]} + rand($X_SIZE * $PIXEL_SIZE_X - $PATCH_SIZE_GX)")
        PATCH_YMIN=$(perl -e "print ${lowerLeft[1]} + rand($Y_SIZE * $PIXEL_SIZE_Y - $PATCH_SIZE_GY)")
        PATCH_XMAX=$(perl -e "print $PATCH_XMIN + $PATCH_SIZE_GX")
        PATCH_YMAX=$(perl -e "print $PATCH_YMIN + $PATCH_SIZE_GY")
	
	
        FNAME=$(printf %09d $(shuf -i 0-1000000000 -n 1 )).png
        PATCH_IMG=${PATRASDIR}/img/$FNAME
        PATCH_MASK=${PATMASKDIR}/img/$FNAME
        #gdalwarp -q -r lanczos -tr $RES $RES -te $PATCH_XMIN $PATCH_YMIN $PATCH_XMAX $PATCH_YMAX "$TIF" "$PATCH_IMG"
        #gdalwarp -q -r lanczos -tr $RES $RES -te $PATCH_XMIN $PATCH_YMIN $PATCH_XMAX $PATCH_YMAX "$MASK_TIF" "$PATCH_MASK"
        gdal_translate -q -of PNG -projwin $PATCH_XMIN $PATCH_YMAX $PATCH_XMAX $PATCH_YMIN "$TIF" "$PATCH_IMG"
        gdal_translate -q -of PNG -projwin $PATCH_XMIN $PATCH_YMAX $PATCH_XMAX $PATCH_YMIN "$MASK_TIF" "$PATCH_MASK"
	
        j=$(expr $j + 1)
    done
#done
}
export -f gen_patch

echo "$PATMASKDIR" "$PATRASDIR"
rm -rf "$PATMASKDIR" "$PATRASDIR" && mkdir -p $PATMASKDIR/img $PATRASDIR/img
#for TIF in $(find "$RASDIR" -type f | grep -e ".*\.png$" -e ".*\.tif$"); do gen_patch $TIF; done
parallel gen_patch {} ::: $(find "$RASDIR" -type f | grep -e ".*\.tif$" -e ".*\.png$")

echo "### END $0 $(date +'%F_%T')" 



In [None]:
seed = 777 # any integer is okay.

data_gen_args = dict(horizontal_flip=True,
                     vertical_flip=True,
                     validation_split=0.6329,
                     rotation_range=180,
                     shear_range=0.2,
                     zoom_range=0.2,
                     rescale=1./255
                     )

def mask_preprocessing(img):
    img[img > 0] = 255
    return img

# Using ImageDataGenerator for images with more than 3 channels #4664
# https://github.com/keras-team/keras/issues/4664
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen  = ImageDataGenerator(**data_gen_args, preprocessing_function = mask_preprocessing)

train_image_generator = image_datagen.flow_from_directory(
        patch_img,
        class_mode=None,
        seed=seed,
        batch_size = batch_size,
        target_size = (patch_size, patch_size),
        subset='training')

train_mask_generator = mask_datagen.flow_from_directory(
        patch_ann,
        class_mode=None,
        seed=seed,
        batch_size = batch_size,
        target_size = (patch_size, patch_size),
        color_mode='grayscale',
        subset='training')


test_image_generator = image_datagen.flow_from_directory(
        patch_img,
        class_mode=None,
        seed=seed,
        batch_size = batch_size,
        target_size = (patch_size, patch_size),
        subset='validation')

test_mask_generator = mask_datagen.flow_from_directory(
        patch_ann,
        class_mode=None,
        seed=seed,
        batch_size = batch_size,
        target_size = (patch_size, patch_size),
        color_mode='grayscale',
        subset='validation')


train_generator = zip(train_image_generator, train_mask_generator)
test_generator = zip(test_image_generator, test_mask_generator)

#x, y= next(train_generator)

from keras.callbacks import LearningRateScheduler

def time_decay(epoch, initial_lrate):
#    decay_rate = 0.01
#    new_lrate = initial_lrate/(1+decay_rate*epoch)
    new_lrate = learning_rate / (epoch+1)
    return new_lrate

lrate = LearningRateScheduler(time_decay,verbose=1)

class PlotLearning(keras.callbacks.Callback):

    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        #self.fig = plt.figure()
        self.logs = []
        
    def on_epoch_end(self, epoch, logs={}):
        
        #choose a random test image and preprocess
        path = np.random.choice(patch_files)
        raw = Image.open(path)
        raw = np.array(raw)/255.
        raw = raw[:,:,0:3]
        
        #predict the mask 
        pred = model.predict(np.expand_dims(raw, 0))
        
        msk  = pred.squeeze()
        msk = np.stack((msk,)*3, axis=-1)
        msk[msk >= 0.5] = 1 
        msk[msk < 0.5] = 0 
        
        #show the mask and the segmented image 
        combined = np.concatenate([raw, msk, raw* msk], axis = 1)
        plt.axis('off')
        plt.imshow(combined)
        plt.show()
        

def build_callbacks():
    checkpointer = ModelCheckpoint(filepath = log_d + '/weights.' + timestamp + '.{epoch:04d}-{val_loss:.4f}.hdf5', verbose=0, save_weights_only=False, save_best_only=True)
    callbacks = [checkpointer, lrate, PlotLearning()]
    return callbacks


In [None]:
## Training
# Configuration for multiple-band image data.
# https://segmentation-models.readthedocs.io/en/latest/tutorial.html#training-with-non-rgb-data
import segmentation_models as sm # https://github.com/qubvel/segmentation_models
model = sm.Unet('resnet34', classes=1, activation='sigmoid')

model.summary()

if n_gpus > 1:
    model = multi_gpu_model(model, gpus=n_gpus)

#if pretrained_weights is not None:
#    model.load_weights(pretrained_weights) # Loading pretrained model.

         
model.compile(
    optimizer = keras.optimizers.Adam(lr=learning_rate),
    loss = sm.losses.bce_jaccard_loss,
    metrics = ['accuracy',sm.metrics.iou_score]
)



In [None]:
from fnmatch import fnmatch
root = patch_img
pattern = "*.png"
patch_files = []

for path, subdirs, files in os.walk(root):
    for name in files:
        if fnmatch(name, pattern):
            patch_files.append(os.path.join(path, name))

train_steps = len(patch_files)/batch_size
test_steps = len(patch_files)/batch_size
model_history = model.fit_generator(train_generator, 
                                    epochs = n_epoch, 
                                    steps_per_epoch = train_steps,
                                    validation_data = test_generator, 
                                    validation_steps = test_steps,
                                    callbacks = build_callbacks(), 
                                    verbose = 1)

loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

epochs = range(n_epoch)