In [1]:
# Load all the dependencies
import os
import sys
import random
import warnings
import numpy as np
from itertools import chain
from numpy import genfromtxt
from tensorflow import random
from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Layer, UpSampling2D, GlobalAveragePooling2D, Multiply, Dense, Reshape, Permute, multiply, dot, add, Input
from keras.layers.core import Dropout, Lambda, SpatialDropout2D, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model, load_model, model_from_yaml, Sequential
import tensorflow as tf

np.random.seed(1337) # for reproducibility
random.set_seed(1337)
print(tf.__version__)

2.3.0


In [2]:
# Use dice coefficient function as the loss function 
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2.0 * intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.0)

# Jacard coefficient
def jacard_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + 1.0)

# calculate loss value
def jacard_coef_loss(y_true, y_pred):
    return -jacard_coef(y_true, y_pred)

# calculate loss value
def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def Residual_CNN_block(x, size, dropout=0.0, batch_norm=True):
    if K.image_dim_ordering() == 'th':
        axis = 1
    else:
        axis = 3
    conv = Conv2D(size, (3, 3), padding='same')(x)
    if batch_norm is True:
        conv = BatchNormalization(axis=axis)(conv)
    conv = Activation('relu')(conv)
    conv = Conv2D(size, (3, 3), padding='same')(conv)
    if batch_norm is True:
        conv = BatchNormalization(axis=axis)(conv)
    conv = Activation('relu')(conv)
    conv = Conv2D(size, (3, 3), padding='same')(conv)
    if batch_norm is True:
        conv = BatchNormalization(axis=axis)(conv)
    conv = Activation('relu')(conv)
    return conv

class multiplication(Layer):
    def __init__(self,inter_channel = None,**kwargs):
        super(multiplication, self).__init__(**kwargs)
        self.inter_channel = inter_channel
    def build(self,input_shape=None):
        self.k = self.add_weight(name='k',shape=(1,),initializer='zeros',dtype='float32',trainable=True)
    def get_config(self):
        base_config = super(multiplication, self).get_config()
        config = {'inter_channel':self.inter_channel}
        return dict(list(base_config.items()) + list(config.items()))  
    def call(self,inputs):
        g,x,x_query,phi_g,x_value = inputs[0],inputs[1],inputs[2],inputs[3],inputs[4]
        h,w,c = int(x.shape[1]),int(x.shape[2]),int(x.shape[3])
        x_query = K.reshape(x_query, shape=(-1,h*w, self.inter_channel//4))
        phi_g = K.reshape(phi_g,shape=(-1,h*w,self.inter_channel//4))
        x_value = K.reshape(x_value,shape=(-1,h*w,c))
        scale = dot([K.permute_dimensions(phi_g,(0,2,1)), x_query], axes=(1, 2))
        soft_scale = Activation('softmax')(scale)
        scaled_value = dot([K.permute_dimensions(soft_scale,(0,2,1)),K.permute_dimensions(x_value,(0,2,1))],axes=(1, 2))
        scaled_value = K.reshape(scaled_value, shape=(-1,h,w,c))        
        customize_multi = self.k * scaled_value
        layero = add([customize_multi,x])
        my_concat = Lambda(lambda x: K.concatenate([x[0], x[1]], axis=3))
        concate = my_concat([layero,g])
        return concate 
    def compute_output_shape(self,input_shape):
        ll = list(input_shape)[1]
        return (None,ll[1],ll[1],ll[3]*3)
    def get_custom_objects():
        return {'multiplication': multiplication}

def attention_up_and_concatenate(inputs):
    g,x = inputs[0],inputs[1]
    inter_channel = g.get_shape().as_list()[3]
    g = Conv2DTranspose(inter_channel, (2,2), strides=[2, 2],padding='same')(g)
    x_query = Conv2D(inter_channel//4, [1, 1], strides=[1, 1], data_format='channels_last')(x)
    phi_g = Conv2D(inter_channel//4, [1, 1], strides=[1, 1], data_format='channels_last')(g)
    x_value = Conv2D(inter_channel//2, [1, 1], strides=[1, 1], data_format='channels_last')(x)
    inputs = [g,x,x_query,phi_g,x_value]
    concate = multiplication(inter_channel)(inputs)
    return concate

class multiplication2(Layer):
    def __init__(self,inter_channel = None,**kwargs):
        super(multiplication2, self).__init__(**kwargs)
        self.inter_channel = inter_channel
    def build(self,input_shape=None):
        self.k = self.add_weight(name='k',shape=(1,),initializer='zeros',dtype='float32',trainable=True)
    def get_config(self):
        base_config = super(multiplication2, self).get_config()
        config = {'inter_channel':self.inter_channel}
        return dict(list(base_config.items()) + list(config.items()))  
    def call(self,inputs):
        g,x,rate = inputs[0],inputs[1],inputs[2]
        scaled_value = multiply([x, rate])
        att_x =  self.k * scaled_value
        att_x = add([att_x,x])
        my_concat = Lambda(lambda x: K.concatenate([x[0], x[1]], axis=3))
        concate = my_concat([att_x, g])
        return concate 
    def compute_output_shape(self,input_shape):
        ll = list(input_shape)[1]
        return (None,ll[1],ll[1],ll[3]*2)
    def get_custom_objects():
        return {'multiplication2': multiplication2}

def attention_up_and_concatenate2(inputs):
    g, x = inputs[0],inputs[1]
    inter_channel = g.get_shape().as_list()[3]
    g = Conv2DTranspose(inter_channel//2, (3,3), strides=[2, 2],padding='same')(g)
    g = Conv2D(inter_channel//2, [1, 1], strides=[1, 1], data_format='channels_last')(g)
    theta_x = Conv2D(inter_channel//4, [1, 1], strides=[1, 1], data_format='channels_last')(x)
    phi_g = Conv2D(inter_channel//4, [1, 1], strides=[1, 1], data_format='channels_last')(g)
    f = Activation('relu')(add([theta_x, phi_g]))
    psi_f = Conv2D(1, [1, 1], strides=[1, 1], data_format='channels_last')(f)
    rate = Activation('sigmoid')(psi_f)
    concate =  multiplication2()([g,x,rate])
    return concate

In [3]:
loaded_model = load_model('June21/model/model_augv_attention2.h5', 
                             custom_objects={'multiplication': multiplication,'multiplication2': multiplication2, 
                                             'dice_coef_loss':dice_coef_loss, 'dice_coef':dice_coef,})

# If want to train on the data **without** the NAIP, run the block below.

In [4]:
# The dataset has 8 channels:
# 0. Curvature
# 1. Slope
# 2. Openness
# 3. DEM
# 4. TPI 21
# 5. Reflectance (LiDAR intensity)
# 6. Geomorphon
# 7. TPI 9
# but the model expects 8 channels
# So we exclude TPI_9 channel from the data set

data_path = 'Covington_data/without_NAIP/'

# read in training and validation sample patches
X_train_new = np.load(data_path+'train_data.npy')
X_Validation_new = np.load(data_path+'vali_data.npy')
print(X_train_new.shape)
print(X_Validation_new.shape)

#Read training and validation labels
Y_Validation = np.load(data_path+'vali_label.npy')
Y_train = np.load(data_path+'train_label.npy')

#Cast both labales to float32
Y_Validation = Y_Validation.astype(np.float32)
Y_train = Y_train.astype(np.float32)


(600, 224, 224, 8)
(600, 224, 224, 8)


In [5]:
patch_size = 224
IMG_WIDTH = patch_size
IMG_HEIGHT = patch_size
# Number of feature channels 
INPUT_CHANNELS = 8
# Number of output masks (1 in case you predict only one type of objects)
OUTPUT_MASK_CHANNELS = 1
maxepoch = 1000
# hyperparameters
# learning_rate = 0.0000359
learning_rate = 0.0001
patience = 20
aug = 'v'
loaded_model.compile(optimizer=Adam(lr=learning_rate),loss = dice_coef_loss,metrics=[dice_coef,'accuracy'])
callbacks = [
        ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=patience, min_lr=1e-9, verbose=1, mode='min'),
        EarlyStopping(monitor='val_loss', patience=patience+10, verbose=0),
        ModelCheckpoint('model'+aug+'_attention2.h5', monitor='val_loss', save_best_only=True, verbose=0),
    ]

In [6]:
no_transfer_learning_results = loaded_model.fit(X_train_new, Y_train, validation_data=(X_Validation_new,Y_Validation), batch_size=12, epochs=maxepoch, callbacks=callbacks)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 00055: ReduceLROnPlateau reducing learning rate to 6.999999823165126e-05.
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000


In [7]:
import pickle
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
root_path = ''
# save the trained model
model_yaml = loaded_model.to_yaml()
with open(root_path+"model_attention2_Notransfered_NoNAIP_"+timestr+".yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)
# save the weights
loaded_model.save(root_path+"model_attention2_Notransfered_NoNAIP_"+timestr+".h5")
# save the intermdediate results and training statistics
with open(root_path+"history_attention2_Notransfered_NoNAIP_"+timestr+".pickle", 'wb') as file_pi:
    pickle.dump(no_transfer_learning_results.history, file_pi, protocol=2)

### Will wait for the whole area data to do prediction

In [13]:
# Save the predicted labels.
X_test = np.load('Covington_data/without_NAIP/prediction_data_07092020.npy')
preds_test = loaded_model.predict(X_test)
preds_test_t = (preds_test > 0.5).astype(np.uint8)
np.save('Covington_data/without_NAIP/pred_Rowan_mdl_Covington_data.npy',preds_test_t)

# Progress 31/08/2020
# Progress 31/08/2020
1. Corrected the data (removing None class (-9999) from test dataset)
    - will generate the new test results  
  
  
2. Preparing for CEGIS presentation
    - Added prelim results  
    - Will add the base scenario which is the U-net model predict the dataset without NAIP in Covinton river  
      
        
    
3. preparing the script for the presentation  
    
# Plan for this week
1. Finish the presentation for CEGIS
2. Read and summarize more paper
3. Try training the model with more weight of stream class.

----

# Progress 24/08/2020

**Comments:** Try to get the why and what it hold true and how to make or to apply to other places.  

1. Generate the whole area and do testing
    - Generated the dataset
    - Evaluated the testing data and generated the prelim results
**Problem:** the data has more than 2 classes as shown in evaluation.   
      
    
2. Created the outline of the presentation for CEGIS 
    - Still need more details:   
    https://docs.google.com/presentation/d/1PWrlgGEMCCJLXsAHeiTe40xdA22RtISE6gUpRbbplRs/edit?usp=sharing

# Plan for this week
1. Finish the presentation for CEGIS
2. Read and summarize more paper
3. Try training the model with more weight of stream class.
4. Correct the data (remove the None class)

---

# Progress 17/08/2020
1. Finished generating the new dataset
    - Cleaned the NAIP data and all raw data of Covington River
    - Included NAIP imagery into the dataset
    - Edited the data preprocessing script to make it easier to add or remove data 
    - Added script documents and comments  
      
2. Generating the whole area dataset the included NAIP imagery
    - Using High memory node on Keeling   
    - **Problem:** The VPN disconnected after 2 hours in!!! T_T I have to start over.  
  
3. Trained the model with new dataset  
    - The performance is significatly higher than the dataset without NAIP  
  
4. Read more papers and added summary of the read paper
    -https://docs.google.com/document/d/1BApPn0aWTwstEpbnKC9g0p5KSOhi74_rF7nzRYM9CtE/edit  
  
# Plan for this week
1. Generate the whole area and do testing
2. Start preparing the presentation for CEGIS 
3. Read and summarize more papers
    - Focus more on machine learning in hydro, remote sensing classification.



---



# Prgress 10/08/2020

1. Successfully trained the model on my own PC.  
    - Fixed cuCNN and CUDA version problems 
    - Trained with 4 trainable layers  
      **Problem:** The model just disrtegards the stream class.  
      **Root cause:** Unbalanced sample of stream and non-stream classes   

2. In progress: Adding NAIP image to the dataset. 
    - Extracted the NAIP for Covinton and put it on Keeling 
    - modifying the preprocessing code
    
3. Outline the Introduction of the paper and reviewed some papers
    - https://docs.google.com/document/d/1BApPn0aWTwstEpbnKC9g0p5KSOhi74_rF7nzRYM9CtE/edit
    
# Plan for this week
1. Finished adding the NAIP and train the model again
2. Start the first draft of the introduction 

