In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import rasterio
import sys
sys.path.append('../scripts')
from training import training3
from prediction import prediction
from evaluation import evaluation
from CPR.utils import preprocessing, timer
import time
from pathlib import Path

project_path = Path('C:/Users/ipdavies/CPR')

data_path = project_path / 'data'

# Version numbers
print('Python Version:', sys.version)

Python Version: 3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]


In [2]:
# ======================================================================================================================
# Performance metrics vs. image metadata
img_list = ['4444_LC08_044033_20170222_3']
pctls = [10, 20, 30, 40, 50, 60, 70, 80, 90]
feat_list_new = ['GSW_maxExtent', 'GSW_distExtent', 'GSW_perm', 'aspect', 'curve', 'developed', 'elevation',
                 'forest', 'hand', 'other_landcover', 'planted', 'slope', 'spi', 'twi', 'wetlands', 'flooded']

batch = 'v2'
uncertainty = False
BATCH_SIZE = 8192
EPOCHS = 100
DROPOUT_RATE = 0.3  # Dropout rate for MCD
HOLDOUT = 0.3  # Validation data size
remove_perm = True

model_params = {'batch_size': BATCH_SIZE,
                'epochs': EPOCHS,
                'verbose': 2,
                'use_multiprocessing': True}

viz_params = {'img_list': img_list,
              'pctls': pctls,
              'data_path': data_path,
              'uncertainty': uncertainty,
              'batch': batch,
              'feat_list_new': feat_list_new}

In [3]:
img = img_list[0]
pctl = 60
print('Preprocessing', img, pctl, '% cloud cover')
data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, gaps=True)
feat_list_keep = [feat_list_new[i] for i in feat_keep]  # Removed if feat was deleted in preprocessing
if remove_perm:
    perm_index = feat_list_keep.index('GSW_perm')
    flood_index = feat_list_keep.index('flooded')
    data_vector_test[data_vector_test[:, perm_index] == 1, flood_index] = 0  # Remove flood water that is perm water
data_vector_test = np.delete(data_vector_test, perm_index, axis=1)  # Remove GSW_perm column
data_shape = data_vector_test.shape
X_test, y_test = data_vector_test[:, 0:data_shape[1]-1], data_vector_test[:, data_shape[1]-1]

print('Predicting for {} at {}% cloud cover'.format(img, pctl))

#         # There is a problem loading keras models: https://github.com/keras-team/keras/issues/10417
#         # Workaround is to use load_model: https://github.com/keras-team/keras-tuner/issues/75
#         start_time = time.time()
#         model_path = data_path / batch / 'models' / img / '{}'.format(img + '_clouds_' + str(pctl) + '.h5')
#         trained_model = tf.keras.models.load_model(model_path)
#         preds = trained_model.predict(X_test, batch_size=model_params['batch_size'], use_multiprocessing=True)
#         preds = np.argmax(preds, axis=1)  # Display most probable value



Preprocessing 4444_LC08_044033_20170222_3 60 % cloud cover
Predicting for 4444_LC08_044033_20170222_3 at 60% cloud cover


  data_vector[:, 0:shape[1]-1] = (data_vector[:, 0:shape[1]-1] - data_mean) / data_std


In [4]:
normalize = True
from CPR.utils import tif_stacker, cloud_generator

img_path = data_path / 'images' / img
stack_path = img_path / 'stack' / 'stack.tif'

# load cloudmasks
cloudmask_dir = data_path / 'clouds'

cloudmask = np.load(cloudmask_dir / '{0}'.format(img+'_clouds.npy'))

# Check for any features that have all zeros/same value and remove. This only matters with the training data
cloudmask = cloudmask < np.percentile(cloudmask, pctl)
# Get local image
with rasterio.open(str(stack_path), 'r') as ds:
    data = ds.read()
    data = data.transpose((1, -1, 0))  # Not sure why the rasterio.read output is originally (D, W, H)
    data[cloudmask] = -999999
    data[data == -999999] = np.nan
    data[np.isneginf(data)] = np.nan
    data_vector = data.reshape([data.shape[0] * data.shape[1], data.shape[2]])
    data_vector = data_vector[~np.isnan(data_vector).any(axis=1)]
    data_std = data_vector[:, 0:data_vector.shape[1] - 1].std(0)

In [5]:
# Just adding this next line in to correctly remove the deleted feat from feat_list_new during training
# Should remove once I've decided whether to train with or without perm water
feat_keep = [a for a in range(data.shape[2])]
with rasterio.open(str(stack_path), 'r') as ds:
    data = ds.read()
    data = data.transpose((1, -1, 0))  # Not sure why the rasterio.read output is originally (D, W, H)

if 0 in data_std.tolist():
    zero_feat = data_std.tolist().index(0)
    data = np.delete(data, zero_feat, axis=2)
    feat_keep.pop(zero_feat)

# Convert -999999 and -Inf to Nans
data[cloudmask] = -999999
data[data == -999999] = np.nan
data[np.isneginf(data)] = np.nan

# Get indices of non-nan values. These are the indices of the original image array
data_ind = np.where(~np.isnan(data[:, :, 1]))

# Reshape into a 2D array, where rows = pixels and cols = features
data_vector = data.reshape([data.shape[0] * data.shape[1], data.shape[2]])
shape = data_vector.shape

# Remove NaNs
data_vector = data_vector[~np.isnan(data_vector).any(axis=1)]

data_mean = data_vector[:, 0:shape[1] - 1].mean(0)
data_std = data_vector[:, 0:shape[1] - 1].std(0)

# Normalize data - only the non-binary variables
if normalize:
    data_vector[:, 0:shape[1]-1] = (data_vector[:, 0:shape[1]-1] - data_mean) / data_std

In [7]:
data_test, data_vector_test, data_ind_test, feat_keep = preprocessing(data_path, img, pctl, gaps=True)
data_vector_test.

In [None]:
normalize = True
from CPR.utils import tif_stacker, cloud_generator

img_path = data_path / 'images' / img
stack_path = img_path / 'stack' / 'stack.tif'

# load cloudmasks
cloudmask_dir = data_path / 'clouds'

cloudmask = np.load(cloudmask_dir / '{0}'.format(img+'_clouds.npy'))

# Check for any features that have all zeros/same value and remove. This only matters with the training data
cloudmask = cloudmask < np.percentile(cloudmask, pctl)
# Get local image
with rasterio.open(str(stack_path), 'r') as ds:
    data = ds.read()
    data = data.transpose((1, -1, 0))  # Not sure why the rasterio.read output is originally (D, W, H)
    data[cloudmask] = -999999
    data[data == -999999] = np.nan
    data[np.isneginf(data)] = np.nan
    data_vector = data.reshape([data.shape[0] * data.shape[1], data.shape[2]])
    data_vector = data_vector[~np.isnan(data_vector).any(axis=1)]
    data_std = data_vector[:, 0:data_vector.shape[1] - 1].std(0)

# Just adding this next line in to correctly remove the deleted feat from feat_list_new during training
# Should remove once I've decided whether to train with or without perm water
feat_keep = [a for a in range(data.shape[2])]
with rasterio.open(str(stack_path), 'r') as ds:
    data = ds.read()
    data = data.transpose((1, -1, 0))  # Not sure why the rasterio.read output is originally (D, W, H)

if 0 in data_std.tolist():
    zero_feat = data_std.tolist().index(0)
    data = np.delete(data, zero_feat, axis=2)
    feat_keep.pop(zero_feat)

if gaps:
    cloudmask = cloudmask < np.percentile(cloudmask, pctl)
if not gaps:
    cloudmask = cloudmask > np.percentile(cloudmask, pctl)

# Convert -999999 and -Inf to Nans
data[cloudmask] = -999999
data[data == -999999] = np.nan
data[np.isneginf(data)] = np.nan

# Get indices of non-nan values. These are the indices of the original image array
data_ind = np.where(~np.isnan(data[:, :, 1]))

# Reshape into a 2D array, where rows = pixels and cols = features
data_vector = data.reshape([data.shape[0] * data.shape[1], data.shape[2]])
shape = data_vector.shape

# Remove NaNs
data_vector = data_vector[~np.isnan(data_vector).any(axis=1)]

data_mean = data_vector[:, 0:shape[1] - 1].mean(0)
data_std = data_vector[:, 0:shape[1] - 1].std(0)

# Normalize data - only the non-binary variables
if normalize:
    data_vector[:, 0:shape[1]-1] = (data_vector[:, 0:shape[1]-1] - data_mean) / data_std