# Testing CNNs on multiple cloud gaps at varying cloud cover %s

In [26]:
from IPython import display
from matplotlib import pyplot as plt
import rasterio
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, accuracy_score

### Functions 

In [1]:
def preprocessing_gaps(path, img, pctl):

    # Get local image
    with rasterio.open(path + 'images/'+ img + '/stack/stack.tif', 'r') as ds:
        data = ds.read()
        data = data.transpose((1, -1, 0)) # Not sure why the rasterio.read output is originally (D, W, H)
    
    # load cloudmasks
    cloudMaskDir = path+'clouds'
    
    cloudMask = np.load(cloudMaskDir+'/'+img+'_clouds.npy')
    # Note how the sign is >=, not <, we want inverse of training data
    cloudMask = cloudMask >= np.percentile(cloudMask, pctl)

    # Need to remove NaNs because any arithmetic operation involving an NaN will result in NaN
    data[cloudMask] = -999999
    
    # Convert -999999 to None
    data[data == -999999] = np.nan

    # Get indices of non-nan values. These are the indices of the original image array
    data_ind = np.where(~np.isnan(data[:,:,1]))
    
    # Reshape into a single vector of pixels.
    data_vector = data.reshape([data.shape[0] * data.shape[1], data.shape[2]])

    # Remove NaNs
    data_vector = data_vector[~np.isnan(data_vector).any(axis=1)]

    # Compute per-band means and standard deviations of the input bands.
    data_mean = data_vector[:,0:14].mean(0)
    data_std = data_vector[:,0:14].std(0)

    # Normalize features
    data_vector[:,0:14] = (data_vector[:,0:14] - data_mean) / data_std
    
    return data_vector, data_mean, data_std, data_ind

# =============================================================
# =============================================================
# =============================================================

def gapFill(data_vector, data_mean, data_std, img, pctl):
    
    tf.reset_default_graph()
    
    model_path = path+'models/cnn_vary_clouds/'+img+'/'+img+'_clouds_'+str(pctl)
    model_name = img+'_clouds_'+str(pctl)
    checkpoint_filename = model_name+'_checkpoint'
    
    # Had to alter some config and runoptions because kept running into OOM at last step during eval 
    config = tf.ConfigProto(
        device_count = {'GPU': 0}
    )
    config.gpu_options.allow_growth = True
    run_options=tf.RunOptions(report_tensor_allocations_upon_oom=True)

    
    with tf.Session(config=config) as sess:
        graph = tf.get_default_graph()
        mySaver = tf.train.import_meta_graph(model_path+'/'+model_name+'.ckpt-1000.meta') # Get metadata of saved graph
        mySaver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir=model_path+'./')) # Restore checkpoint
#         input = graph.get_tensor_by_name("input:0") # Get inputs placeholder
        outputs = graph.get_tensor_by_name('outputs:0') # Get outputs
        hidden1 = graph.get_tensor_by_name('hidden1:0')
        hidden2 = graph.get_tensor_by_name('hidden2:0')
        
        tf.local_variables_initializer()

#         y_pred = outputs.eval({input: data_vector[:,0:14]})
        y_pred = sess.run(outputs, feed_dict = {input: data_vector[:,0:14]})

    return y_pred


Problem: TF doesn't save every variable, like outputs or input, so we need to save those somehow
https://stackoverflow.com/questions/43887425/how-to-import-a-model-in-tensorflow

### Predicting flooding in cloud gaps using pre-trained models

In [11]:
path = 'C:/Users/ipdavies/CPR/data/'
pctls = [10,20,30,40,50,60,70,80,90]
import math

img_list = ['4101_LC08_027038_20131103_1',
            '4101_LC08_027038_20131103_2',
            '4101_LC08_027039_20131103_1',
            '4115_LC08_021033_20131227_1',
            '4337_LC08_026038_20160325_1']

import time
def timer(start,end, formatted = True):
    if formatted == True: # Returns full formated time in hours, minutes, seconds
        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        return str("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    else: # Returns minutes + fraction of minute
        minutes, seconds = divmod(time.time() - start_time, 60)
        seconds = seconds/60
        minutes = minutes + seconds
        return str(minutes)
    
for j, img in enumerate(img_list):
    
    print(img)
    
    precision = []
    recall = []
    f1 = []
    accuracy = []
    times = []
#     predictions []
    gapMetricsList = []
    
    for i, pctl in enumerate(pctls):

        data_vector, data_mean, data_std, data_ind = preprocessing_gaps(path, img, pctl)
        
        start_time = time.time()
        
        y_pred = gapFill(data_vector, data_mean, data_std, img, pctl)
        
        times.append(timer(start_time, time.time(), False)) # Elapsed time in minutes
        
        y_true = data_vector[:,14]

        accuracy.append(sklearn.metrics.accuracy_score(y_true, y_pred))
        precision.append(sklearn.metrics.precision_score(y_true, y_pred))
        recall.append(sklearn.metrics.recall_score(y_true, y_pred))
        f1.append(sklearn.metrics.f1_score(y_true, y_pred))
        
#         predictions.append(y_pred)
        
    times = [float(i) for i in times] # Need to convert time objects to float, otherwise valMetrics will be non-numeric
        
    gapMetrics = pd.DataFrame(np.column_stack([pctls, accuracy, precision, recall, f1, times]),
                          columns=['cloud_cover','accuracy','precision','recall','f1', 'time'])
    
    gapMetrics.to_csv(path+'/models/cnn_vary_clouds/'+img+'/gapMetrics.csv', index=False)
    
    gapMetricsList.append(gapMetrics)
        

4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parameters from C:/Users/ipdavies/CPR/data/models/cnn_vary_clouds/4101_LC08_027038_20131103_1/4101_LC08_027038_20131103_1_clouds_10./4101_LC08_027038_20131103_1_clouds_10.ckpt-1000


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parameters from C:/Users/ipdavies/CPR/data/models/cnn_vary_clouds/4101_LC08_027038_20131103_1/4101_LC08_027038_20131103_1_clouds_20./4101_LC08_027038_20131103_1_clouds_20.ckpt-1000
4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parameters from C:/Users/ipdavies/CPR/data/models/cnn_vary_clouds/4101_LC08_027038_20131103_1/4101_LC08_027038_20131103_1_clouds_30./4101_LC08_027038_20131103_1_clouds_30.ckpt-1000
4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parameters from C:/Users/ipdavies/CPR/data/models/cnn_vary_clouds/4101_LC08_027038_20131103_1/4101_LC08_027038_20131103_1_clouds_40./4101_LC08_027038_20131103_1_clouds_40.ckpt-1000
4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parameters from C:/Users/ipdavies/CPR/data/models/cnn_vary_clouds/4101_LC08_027038_20131103_1/4101_LC08_027038_20131103_1_clouds_50./4101_LC08_027038_20131103_1_clouds_50.ckpt-1000
4101_LC08_027038_20131103_1
INFO:tensorflow:Restoring parame

ValueError: all the input array dimensions except for the concatenation axis must match exactly

### Plot gap metrics

In [None]:
columns = 2
    
# Create list of axes
fig, axes = plt.subplots(nrows = int(len(gapMetricsList) / columns + 1), ncols = columns, 
                         sharex = True, figsize = (10,5), squeeze = False)

axes_list = [item for sublist in axes for item in sublist] 

for i in range(len(gapMetricsList)):
    ax = axes_list.pop(0) # Pop each axis out then put data into it
    valMetricsList[i].plot(ax = ax, x='cloud_cover', y=['recall', 'precision','f1','accuracy']) 
    ax.tick_params(
        which='both',
        bottom='off',
        left='off',
        right='off',
        top='off'
    )
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

Why do the metrics not decline linearly?
 - Not enough flooded pixels in training set

Test metrics vs ...
- Number of flooded pixels in training / number of flooded pixels in image
 

### Plot filled gaps

First create binary correct/incorrect prediction image

In [None]:
model_path = path+'models/cnn_vary_clouds/'
img = '4337_LC08_026038_20160325_1'
pctls = [10,20,30,40,50,60,70,80,90]

flooded_imgs = []
prediction_imgs = []

# Reshape predicted values back into image band
with rasterio.open(path + 'images/'+ img + '/stack/stack.tif', 'r') as ds:
        shape = ds.read(1).shape # Shape of full original image
        arr_empty = np.zeros(shape) # Create empty array with this shape
        arr_empty[:] = np.nan # Convert all zeroes to NaN
            
for i, pctl in enumerate(pctls):
    data_vector, data_mean, data_std, data_ind = preprocessing_gaps(path, img, pctl)

    # Add predicted values to cloud-covered pixel positions
    prediction_img = arr_empty
    rows, cols = zip(data_ind)
    prediction_img[rows, cols] = predictions[i]
    prediction_imgs.append(prediction_img)
    
    # Add actual flood values to cloud-covered pixel positions
    flooded_img = arr_empty
    flooded_img[rows, cols] = data_vector[:,14]
    flooded_imgs.append(flooded_img)

In [None]:
flooded_imgs == prediction_imgs

In [None]:
plt.figure(figsize=(20,100))
columns = 2

images = []

for i in range(len(flooded_imgs)+len(prediction_imgs)):
    images.append(flooded_imgs[i])
    images.append(prediction_imgs[i])

for i, image in enumerate(images):
    plt.subplot(len(images) / columns + 1, columns, i + 1)
#     plt.suptitle('Actual flooding, predicted flooding', fontsize=20)
    plt.imshow(image)
    plt.colorbar

### Visualizing correct/incorrect predictions

In [None]:
model_path = path+'models/cnn_vary_clouds/'
img = '4337_LC08_026038_20160325_1'
pctls = [10,20,30,40,50,60,70,80,90]

comparison_imgs = []
prediction_imgs = []

# Reshape predicted values back into image band
with rasterio.open(path + 'images/'+ img + '/stack/stack.tif', 'r') as ds:
        actual_flooded = ds.read(15)
        shape = ds.read(1).shape # Shape of full original image
        arr_empty = np.zeros(shape) # Create empty array with this shape
        arr_empty[:] = np.nan # Convert all zeroes to NaN
        ones = np.ones(shape=shape)
            
for i, pctl in enumerate(pctls):
    data_vector, data_mean, data_std, data_ind = preprocessing_gaps(path, img, pctl)
    
    # Add predicted values to cloud-covered pixel positions
    prediction_img = arr_empty
    rows, cols = zip(data_ind)
    prediction_img[rows, cols] = predictions[i]
    prediction_imgs.append(prediction_img)
    
    red = np.where(ones, prediction_img, 0.5)
    blue = np.where(ones, actual_flooded, 0.5)
    green = np.minimum(red, blue)
    
    comparison_img = np.dstack((red, green, blue))
    comparison_imgs.append(comparison_img)

In [None]:
plt.imshow(prediction_imgs[8])

In [None]:
# Look at correlation between predicted values and features?
data_vector, data_mean, data_std, data_ind = preprocessing_gaps(path, img, 90)
df = pd.DataFrame(data=data_vector, columns=['aspect','curve', 'developed', 'distExtent', 'elevation', 'forest',
 'GSW_maxExtent', 'hand', 'other_landcover', 'planted', 'slope', 'spi', 'twi', 'wetlands', 'flooded'])
df['preds'] = predictions[8]

In [None]:
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')

## Plot cloud masks 

In [None]:
pctls = [10,20,30,40,50,60,70,80,90]
path = 'C:/Users/ipdavies/CPR/data/'
model_path = path+'models/cnn_vary_clouds/'
img = '4337_LC08_026038_20160325_1'

images = [] 

for i, pctl in enumerate(pctls):
    
    # load cloudmasks
    cloudMaskDir = path+'clouds'
    
    cloudMask = np.load(cloudMaskDir+'/'+img+'_clouds.npy')
    cloudMask = cloudMask < np.percentile(cloudMask, pctl)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# settings
h, w = 10, 10        # for raster image
nrows, ncols = 3, 3  # array of sub-plots
figsize = [12, 12]     # figure size, inches

# # prep (x,y) for extra plotting on selected sub-plots
# xs = np.linspace(0, 2*np.pi, 60)  # from 0 to 2pi
# ys = np.abs(np.sin(xs))           # absolute of sine

titles = ['10%', '20%', '30%','40%','50%',
         '60%','70%','80%','90%']

# create figure (fig), and array of axes (ax)
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)

# plot simple raster image on each sub-plot
for i, axi in enumerate(ax.flat):
    # i runs from 0 to (nrows*ncols-1)
    # axi is equivalent with ax[rowid][colid]
    img = images[i]
    axi.imshow(img)
    axi.axis('off')

    # write row/col indices as axes' title for identification
    axi.set_title(titles[i], fontdict = {'fontsize' : 18})

# one can access the axes by ax[row_id][col_id]
# do additional plotting on ax[row_id][col_id] of your choice
# ax[0][2].plot(xs, 3*ys, color='red', linewidth=3)
# ax[4][3].plot(ys**2, xs, color='green', linewidth=3)

# plt.tight_layout(True)
plt.subplots_adjust(wspace=0.1, hspace=0)
plt.show()