In [None]:
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize as sklearn_normalize
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm

np.random.seed(1)

In [None]:
def normalize(x):
    return (np.array(x)) / (3000)

## Load data and make predictions

In [None]:
train_data_dir = '../data/training_sites'

with open(os.path.join(train_data_dir, "negative_data_toa.pkl"), 'rb') as file:
    x_negative = np.array(pickle.load(file))
file.close()
y_negative = np.zeros(len(x_negative))

with open(os.path.join(train_data_dir, "positive_data_toa.pkl"), 'rb') as file:
    x_positive = np.array(pickle.load(file))
file.close()

with open(os.path.join(train_data_dir, "positive_data_test_toa.pkl"), 'rb') as file:
    x_positive_test = np.array(pickle.load(file))
file.close()

In [None]:
ndvi_train = (x_positive[:,7] - x_positive[:,3]) / (x_positive[:,7] + x_positive[:,3])
ndvi_test = (x_positive_test[:,7] - x_positive_test[:,3]) / (x_positive_test[:,7] + x_positive_test[:,3])

lower_bound = -0.1
upper_bound = 0.5
index_train = np.logical_and(ndvi_train > lower_bound, ndvi_train < upper_bound)
index_test = np.logical_and(ndvi_test > lower_bound, ndvi_test < upper_bound)

x_positive = x_positive[index_train]
y_positive = np.ones(len(x_positive))
x_positive_test = x_positive_test[index_test]
y_positive_test = np.ones(len(x_positive_test))

In [None]:
x = np.concatenate((x_positive, x_negative))
y = np.concatenate((y_positive, y_negative))

x, y = shuffle(x, y, random_state=42)
x = normalize(x)
x_positive_test = normalize(x_positive_test)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
print("Num Train:\t\t", len(x_train))
print("Num Test:\t\t", len(x_test))
print(f"Percent Negative Train:\t {100 * sum(y_train == 0.0) / len(y_train):.1f}")
print(f"Percent Negative Test:\t {100 * sum(y_test == 0.0) / len(y_test):.1f}")

x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
x_positive_test = np.expand_dims(x_positive_test, -1)

num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_positive_test = keras.utils.to_categorical(y_positive_test, num_classes)

In [None]:
model = keras.models.load_model('../models/model_65_month_filtered_toa-12-09-2020.h5')

In [None]:
num_test_set_samples = 5000
random_test_indices = np.random.choice(len(x_test), num_test_set_samples)

inputs = np.concatenate((x_positive_test, x_test[random_test_indices]))
preds = model.predict(inputs)
labels = np.concatenate((y_positive_test, y_test[random_test_indices]))

In [None]:
rgb = []
for sample in inputs:
    sample_rgb = np.zeros((1,1,3))
    sample_rgb[0,0,:] = [sample[3], sample[2], sample[1]]
    rgb.append(sample_rgb)
rgb_min = np.array(rgb).min()
rgb_max = np.array(rgb).max()
rgb = np.array([(np.array(element) - rgb_min) / (rgb_max - rgb_min) for element in rgb])

## Compile TP, FP, TN, FN classes

In [None]:
THRESHOLD = 0.90

In [None]:
def isolate_category(labels, preds, label_class, pred_class):
    class_index = []
    for label, pred in zip(labels, preds):
        binary_pred = pred[1] > THRESHOLD
        label = label[1]
        if label == label_class and binary_pred == pred_class:
            class_index.append(1)
        else:
            class_index.append(0)
    return np.array(class_index).astype('bool')

In [None]:
tp_index = isolate_category(labels, preds, 1, 1)
tn_index = isolate_category(labels, preds, 0, 0)
fp_index = isolate_category(labels, preds, 0, 1)
fn_index = isolate_category(labels, preds, 1, 0)

print(f"True Positive Rate: {100 * sum(tp_index) / sum(labels[:,1] == 1):.1f}%")
print(f"True Negative Rate: {100 * sum(tn_index) / sum(labels[:,1] == 0):.1f}%")

## Compute NDVI

In [None]:
ndvi = np.squeeze([(sample[7] - sample[3]) / (sample[7] + sample[3]) for sample in inputs])

In [None]:
plt.figure(figsize=(5,3), dpi=100, facecolor=(1,1,1))
plt.hist(ndvi, bins=100)
plt.xlabel('NDVI Value')
plt.title('NDVI of All Test Samples')
plt.show()

plt.figure(figsize=(5,3), dpi=100, facecolor=(1,1,1))
plt.hist(ndvi, bins=300, cumulative=True, density=True)
plt.xlabel('NDVI Value')
plt.ylabel('Proporion below x-Value')
plt.title('Cumulative NDVI Dist - All Test Samples')
plt.show()

In [None]:
plt.figure(figsize=(5,3), dpi=100, facecolor=(1,1,1))
edges, bins, patches = plt.hist(ndvi[tn_index], bins=100, color='r', alpha=0.5, label='True Negative')
plt.hist(ndvi[tp_index], bins=bins, alpha=0.5, label='True Positive')
plt.xlabel('NDVI Value')
plt.legend()
plt.grid(alpha=0.1)
plt.title('NDVI of all Test Samples')
plt.show()

In [None]:
def plot_pixel_grid(preds, colors, title):
    plt.figure(figsize=(20,20), dpi=50, facecolor=(1,1,1))
    num_samples = np.min([len(preds), 100])
    indices = np.random.choice(len(preds), num_samples)
    for i in range(num_samples):
        index = indices[i]
        pred = preds[index]
        rgb = colors[index] ** gamma
        plt.subplot(10,10,i + 1)
        plt.imshow(rgb)
        plt.axis('off')
        plt.title(f"{pred[1]:.2f}")
    plt.suptitle(title, size=40, y=1.01)
    plt.tight_layout()
    plt.show()

In [None]:
gamma = .85

In [None]:
ndvi_thresh = -0.1
index = ndvi < ndvi_thresh
print(f"{100 * sum(index) / len(index):.1f}% of test samples have an NDVI < {ndvi_thresh}")
plot_pixel_grid(np.stack((ndvi, ndvi), axis=-1)[index], 
                rgb[index], 
                'Colors of Randomly Selected Pixels with NDVI < ' + str(ndvi_thresh))

In [None]:
ndvi_thresh = 0.5
index = ndvi > ndvi_thresh
print(f"{100 * sum(index) / len(index):.1f}% of test samples have an NDVI > {ndvi_thresh}")
plot_pixel_grid(np.stack((ndvi, ndvi), axis=-1)[index], 
                rgb[index], 
                'Colors of Randomly Selected Pixels with NDVI > ' + str(ndvi_thresh))

In [None]:
lower_bound = -0.1
upper_bound = 0.5
index = np.logical_and(ndvi > lower_bound, ndvi < upper_bound)
print(f"{100 * sum(index) / len(index):.1f}% of test samples have an NDVI less than {lower_bound:.1f} and greater {upper_bound:.1f}")
plot_pixel_grid(np.stack((ndvi, ndvi), axis=-1)[index], 
                rgb[index], 
                f'Colors of Randomly Selected Pixels with NDVI between {lower_bound:.1f} and {upper_bound:.1f}')

## Load test images and observe impact of filtering

In [None]:
DATA_DIR = '../data/training_sites'

with open(os.path.join(DATA_DIR, "tpa_patch_histories_toa.pkl"), 'rb') as file:
    positive_histories = pickle.load(file)
file.close()

with open(os.path.join(DATA_DIR, "negative_patch_histories_toa.pkl"), 'rb') as file:
    negative_histories = pickle.load(file)
file.close()

In [None]:
# Visualize TPA Sites

lower_bound = 0
upper_bound = 0.4

for site in positive_histories['2019-07-01']:
    data = positive_histories['2019-06-01'][site]
    img = normalize(np.stack((data['B4'], data['B3'], data['B2']), axis=-1)) ** gamma

    img_ndvi = (data['B8'] - data['B4']) / (data['B8'] + data['B4'])
    index = np.logical_or(img_ndvi < lower_bound, img_ndvi > upper_bound)
    filtered_img = normalize(np.stack((data['B4'], data['B3'], data['B2']), axis=-1)) ** gamma
    filtered_img[index] = 0

    plt.figure(figsize=(6,3), dpi=100, facecolor=(1,1,1))
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.axis('off')
    plt.title('Data with no NDVI Filtering')

    plt.subplot(1,2,2)
    plt.imshow(filtered_img)
    plt.axis('off')
    plt.title(f'Data with NDVI between {lower_bound} and {upper_bound}')
    plt.suptitle(site)
    plt.tight_layout()
    plt.show()

In [None]:
# Visualize non-TPA Sites
for site in negative_histories[2]['2019-07-01']:
    data = negative_histories[2]['2019-06-01'][site]
    img = normalize(np.stack((data['B4'], data['B3'], data['B2']), axis=-1)) ** gamma
    
    img_ndvi = (data['B8'] - data['B4']) / (data['B8'] + data['B4'])
    index = np.logical_or(img_ndvi < lower_bound, img_ndvi > upper_bound)
    filtered_img = normalize(np.stack((data['B4'], data['B3'], data['B2']), axis=-1)) ** gamma
    filtered_img[index] = 0

    plt.figure(figsize=(6,3), dpi=100, facecolor=(1,1,1))
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.axis('off')
    plt.title('Data with no NDVI Filtering')

    plt.subplot(1,2,2)
    plt.imshow(filtered_img)
    plt.axis('off')
    plt.title(f'Data with NDVI between {lower_bound} and {upper_bound}')
    plt.suptitle(site)
    plt.tight_layout()
    plt.show()

## Create a training dataset where NDVI is within a range

In [None]:
train_data_dir = '../data/training_sites'

with open(os.path.join(train_data_dir, "negative_data_toa.pkl"), 'rb') as file:
    x_negative = np.array(pickle.load(file))

with open(os.path.join(train_data_dir, "bootstrap_data_toa.pkl"), 'rb') as file:
    x_negative_bootstrap = np.array(pickle.load(file))
    
x_negative = np.concatenate((x_negative, x_negative_bootstrap))
y_negative = np.zeros(len(x_negative))

with open(os.path.join(train_data_dir, "positive_data_toa.pkl"), 'rb') as file:
    x_positive = np.array(pickle.load(file))

with open(os.path.join(train_data_dir, "positive_data_test_toa.pkl"), 'rb') as file:
    x_positive_test = np.array(pickle.load(file))

In [None]:
ndvi_train = (x_positive[:,7] - x_positive[:,3]) / (x_positive[:,7] + x_positive[:,3])
ndvi_test = (x_positive_test[:,7] - x_positive_test[:,3]) / (x_positive_test[:,7] + x_positive_test[:,3])

lower_bound = 0
upper_bound = 0.4
index_train = np.logical_and(ndvi_train > lower_bound, ndvi_train < upper_bound)
index_test = np.logical_and(ndvi_test > lower_bound, ndvi_test < upper_bound)

x_positive = x_positive[index_train]
y_positive = np.ones(len(x_positive))
x_positive_test = x_positive_test[index_test]
y_positive_test = np.ones(len(x_positive_test))

In [None]:
x = np.concatenate((x_positive, x_negative))
y = np.concatenate((y_positive, y_negative))

x, y = shuffle(x, y, random_state=42)
x = normalize(x)
x_positive_test = normalize(x_positive_test)

### Filter training data by NDVI

In [None]:
plt.figure(figsize=(5,3), dpi=100, facecolor=(1,1,1))
edges, bins, patches = plt.hist(ndvi_train, bins=300, label='Original NDVI')
plt.hist(ndvi_train[index_train], bins=bins, color='r', label='Clipped NDVI')
plt.title('Distribution of NDVI in the Original Train Set')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(5,3), dpi=100, facecolor=(1,1,1))
plt.hist((x_filtered[:,7] - x_filtered[:,3]) / (x_filtered[:,7] + x_filtered[:,3]), bins=300)
plt.title('Distribution of NDVI in Filtered Train Set')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
print("Num Train:\t\t", len(x_train))
print("Num Test:\t\t", len(x_test))
print(f"Percent Negative Train:\t {100 * sum(y_train == 0.0) / len(y_train):.1f}")
print(f"Percent Negative Test:\t {100 * sum(y_test == 0.0) / len(y_test):.1f}")

x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
x_positive_test = np.expand_dims(x_positive_test, -1)

num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_positive_test = keras.utils.to_categorical(y_positive_test, num_classes)

## Train a model on the filtered data

In [None]:
input_shape = np.shape(x_train[0])
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv1D(16, kernel_size=(3), activation="relu"),
        #layers.MaxPooling2D(pool_size=(2)),
        layers.Conv1D(32, kernel_size=(3), activation="relu"),
        #layers.MaxPooling2D(pool_size=(2)),
        layers.Flatten(),
        layers.Dense(32, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(num_classes, activation="softmax"),
    ]
)
model.summary()

In [None]:
model.compile(loss="binary_crossentropy", 
              optimizer="adam", 
              metrics=[keras.metrics.Recall(thresholds=(0.9), name='precision'), 
                       keras.metrics.Precision(thresholds=(0.9), name='recall'),
                       keras.metrics.AUC(curve='PR', name='auc'),
                       "accuracy"],
              #loss_weights = sum(y_train) / len(y_train),
              #weighted_metrics = ['accuracy']
             )

In [None]:
from sklearn.utils import class_weight
negative_weight, positive_weight = class_weight.compute_class_weight('balanced', 
                                                           classes = np.unique(y_train),
                                                           y = y_train[:,1])
#positive_weight /= 3
print(f"Negative Weight: {negative_weight:.2f}")
print(f"Positive Weight: {positive_weight:.2f}")

In [None]:
batch_size = 256
epochs = 30

print("Num Train:\t\t", len(x_train))
print("Num Test:\t\t", len(x_test))
print(f"Percent Negative Train:\t {100 * sum(y_train[:,1] == 0.0) / len(y_train):.1f}")
print(f"Percent Negative Test:\t {100 * sum(y_test[:,1] == 0.0) / len(y_test):.1f}")

model.fit(x_train, 
          y_train, 
          batch_size=batch_size, 
          epochs=epochs, 
          validation_data = (x_test, y_test),
          #validation_split=0.1,
          #class_weight = {0: negative_weight, 1: positive_weight}
         )

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.plot(model.history.history['accuracy'], label='Train Acc')
plt.plot(model.history.history['val_accuracy'], c='r', label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Network Train and Val Accuracy - Weighted')
plt.show()

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.plot(model.history.history['precision'], label='precision')
plt.plot(model.history.history['recall'], c='r', label='recall')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.title('Train and Val AUC')
plt.show()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test[:,1], model.predict(x_test)[:,1] > 0.6, 
                            target_names=['No TPA', 'TPA']))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test[:,1], model.predict(x_test)[:,1] > 0.9, target_names=['No Dump', 'Dump']))

In [None]:
model.save('../models/65_mo_tpa_bootstrap_toa-12-20-2020.h5')

## Train Neural Network Ensemble

In [None]:
num_models = 3
ensemble = []

batch_size = 128
epochs = 30

input_shape = np.shape(x_train[0])

print("Num Train:\t\t", len(x_train))
print("Num Test:\t\t", len(x_test))
print(f"Percent Negative Train:\t {100 * sum(y_train[:,1] == 0.0) / len(y_train):.1f}")
print(f"Percent Negative Test:\t {100 * sum(y_test[:,1] == 0.0) / len(y_test):.1f}")

for i in range(num_models):
    model = keras.Sequential([
                keras.Input(shape=input_shape),
                layers.Conv1D(16, kernel_size=(3), activation="relu"),
                #layers.MaxPooling2D(pool_size=(2)),
                layers.Conv1D(32, kernel_size=(3), activation="relu"),
                #layers.MaxPooling2D(pool_size=(2)),
                layers.Flatten(),
                layers.Dense(32, activation="relu"),
                layers.Dense(32, activation="relu"),
                layers.Dense(32, activation="relu"),
                layers.Dropout(0.2),
                layers.Dense(num_classes, activation="softmax")])
    
    model.compile(loss="binary_crossentropy", 
                  optimizer="adam", 
                  metrics=[keras.metrics.Recall(thresholds=(0.9), name='precision'), 
                           keras.metrics.Precision(thresholds=(0.9), name='recall'),
                           keras.metrics.AUC(curve='PR', name='auc'),
                           "accuracy"])

    model.fit(x_train, 
              y_train, 
              batch_size=batch_size, 
              epochs=epochs, 
              validation_data = (x_test, y_test),
              verbose = 2
             )
    
    plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
    plt.plot(model.history.history['accuracy'], label='Train Acc')
    plt.plot(model.history.history['val_accuracy'], c='r', label='Val Acc')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Network Train and Val Accuracy')
    plt.show()
    
    ensemble.append(model)

In [None]:
output_dir = '../models/01-09-2020_ensemble'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

for index, model in enumerate(ensemble):
    model.save(os.path.join(output_dir, 'model_' + str(index) + '.h5'))

## Train Decision Tree

In [None]:
# Sentinel 2 band descriptions
band_descriptions = {
    'B1': 'Aerosols, 442nm',
    'B2': 'Blue, 492nm',
    'B3': 'Green, 559nm',
    'B4': 'Red, 665nm',
    'B5': 'Red Edge 1, 704nm',
    'B6': 'Red Edge 2, 739nm',
    'B7': 'Red Edge 3, 779nm',
    'B8': 'NIR, 833nm',
    'B8A': 'Red Edge 4, 864nm',
    'B9': 'Water Vapor, 943nm',
    'B11': 'SWIR 1, 1610nm',
    'B12': 'SWIR 2, 2186nm'
}

band_wavelengths = [442, 492, 559, 665, 704, 739, 779, 833, 864, 943, 1610, 2186]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import tree


clf = tree.DecisionTreeClassifier(max_leaf_nodes=512, max_depth=64)
                                  #, class_weight='balanced')
clf = clf.fit(np.squeeze(x_train), y_train[:,1])

print("Accuracy:", clf.score(np.squeeze(x_test), y_test[:,1]))
print("Feature Importances:")
for band, importance in zip(band_descriptions, clf.feature_importances_):
    print(f"{band}: {importance:.3f}")
    
plt.bar(range(len(band_descriptions)), clf.feature_importances_)
plt.xticks(range(len(band_descriptions)), band_descriptions.keys())
plt.show()

In [None]:
mean_importances = []

for i in tqdm(range(20)):
    clf = tree.DecisionTreeClassifier(max_leaf_nodes=512, max_depth=64, random_state=i)
                                      #, class_weight='balanced')
    clf = clf.fit(np.squeeze(x_train), y_train[:,1])
    mean_importances.append(clf.feature_importances_)

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.bar(range(len(band_descriptions)), np.mean(mean_importances, axis=0))
plt.xticks(range(len(band_descriptions)), band_descriptions.keys())
plt.title('Decision Tree Feature Importances')
plt.show()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test[:,1], clf.predict(np.squeeze(x_test)), target_names=['No Dump', 'Dump']))
#plt.imshow(confusion_matrix(y_test[:,1], clf.predict(np.squeeze(x_test))))


In [None]:
def make_predictions_tree(site_name, threshold):
    with open(os.path.join(DATA_DIR, site_name + "_0.03_patch.pkl"), 'rb') as file:
        test_image = pickle.load(file)

    rgb_stack = []
    preds_stack = []
    threshold_stack = []

    for month in tqdm(list(test_image.keys())):
        test_pixel_vectors, width, height = get_pixel_vectors(test_image, month)
        if width > 0:
            test_pixel_vectors = normalize(test_pixel_vectors)

            r = np.reshape(np.array(test_pixel_vectors)[:,3], (width, height))
            g = np.reshape(np.array(test_pixel_vectors)[:,2], (width, height))
            b = np.reshape(np.array(test_pixel_vectors)[:,1], (width, height))
            rgb = np.moveaxis(np.stack((r,g,b)), 0, -1)
            rgb_stack.append(rgb)

            preds = clf.predict(test_pixel_vectors)
            preds_img = np.reshape(preds, (width, height))
            preds_stack.append(preds_img)

            thresh_img = preds_img > threshold
            threshold_stack.append(thresh_img)
    
    output_dir = './figures/tree_classification/12-08-2020'
    if not os.path.exists(output_dir):
            os.mkdir(output_dir)

            
    rgb_median = np.median(rgb_stack, axis=0)
    preds_median = np.median(preds_stack, axis=0)
    threshold_median = np.median(threshold_stack, axis=0)
    
    plt.figure(dpi=150, facecolor=(1,1,1), figsize=(15,5))

    plt.subplot(1,3,1)
    plt.imshow(rgb_median ** gamma)
    plt.title(f'{site_name} Median', size=8)
    plt.axis('off')

    plt.subplot(1,3,2)
    plt.imshow(preds_median, vmin=0, vmax=1, cmap='seismic')
    plt.title('Classification Median', size=8)
    plt.axis('off')

    plt.subplot(1,3,3)
    plt.imshow(threshold_median, vmin=threshold, vmax=1, cmap='gray')
    plt.title(f"Positive Pixels Median: Threshold {threshold}", size=8)
    plt.axis('off')

    title = f"{site_name} Test Set - Median Values - Tree Classification - Threshold {threshold}"
    plt.suptitle(title, y=1.01)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, title + '.png'), bbox_inches='tight')
    plt.show()
    
    
    fig, ax = plt.subplots(dpi=200, facecolor=(1,1,1), figsize=(4,4))
    ax.set_axis_off()
    clipped_img = np.moveaxis([channel * (preds_median > 0) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
    img = plt.imshow(clipped_img / (clipped_img.max()))
    ax.set_title('Threshold 0')
    plt.tight_layout()

    def animate(i):
        i /= 100
        clipped_img = np.moveaxis([channel * (preds_median > i) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
        img.set_data(clipped_img / (clipped_img.max()))
        #img.set_data((preds_stack > i) * 1)
        ax.set_title(site_name + ' Threshold ' + str(i))
        return img,

    ani = animation.FuncAnimation(fig, animate, frames=100, interval=60, blit=True, repeat_delay=500)
    ani.save(os.path.join(output_dir, site_name + 'test_set_threshold_visualization' + '.mp4'))
    plt.close()
    
    return rgb_median, preds_median, threshold_median
    

In [None]:
DATA_DIR = '../data'
site_names = ['bare_earth_4', 'city_7', 'tpa_babandem', 'tpa_bangli', 'tpa_biaung', 'tpa_mandung', 'tpa_jimbaran']
threshold = 0.90

for site_name in site_names:
    rgb_median, preds_median, threshold_median = make_predictions_tree(site_name, threshold)

## Visualize Network Predictions

In [None]:
import sys

from matplotlib import animation

sys.path.append('../')
from scripts.get_s2_data_ee import get_pixel_vectors

In [None]:
def make_predictions(site_name, threshold):
    with open(os.path.join(DATA_DIR, site_name + "_0.03_patch.pkl"), 'rb') as file:
        test_image = pickle.load(file)

    rgb_stack = []
    preds_stack = []
    threshold_stack = []

    for month in tqdm(list(test_image.keys())):
        test_pixel_vectors, width, height = get_pixel_vectors(test_image, month)
        if width > 0:
            test_pixel_vectors = normalize(test_pixel_vectors)

            r = np.reshape(np.array(test_pixel_vectors)[:,3], (width, height))
            g = np.reshape(np.array(test_pixel_vectors)[:,2], (width, height))
            b = np.reshape(np.array(test_pixel_vectors)[:,1], (width, height))
            rgb = np.moveaxis(np.stack((r,g,b)), 0, -1)
            rgb_stack.append(rgb)

            preds = model.predict(np.expand_dims(test_pixel_vectors, axis=-1))
            preds_img = np.reshape(preds, (width, height, 2))[:,:,1]
            preds_stack.append(preds_img)

            thresh_img = preds_img > threshold
            threshold_stack.append(thresh_img)
    
    output_dir = './figures/neural_network/12-09-2020'
    if not os.path.exists(output_dir):
            os.mkdir(output_dir)

            
    rgb_median = np.median(rgb_stack, axis=0)
    preds_median = np.sum(preds_stack, axis=0)
    threshold_median = np.median(threshold_stack, axis=0)
    
    plt.figure(dpi=150, facecolor=(1,1,1), figsize=(15,5))
    gamma = .85
    plt.subplot(1,3,1)
    plt.imshow(rgb_median ** gamma)
    plt.title(f'{site_name} Median', size=8)
    plt.axis('off')

    plt.subplot(1,3,2)
    plt.imshow(preds_median, vmin=0, vmax=1, cmap='seismic')
    plt.title('Classification Median', size=8)
    plt.axis('off')

    plt.subplot(1,3,3)
    plt.imshow(threshold_median, vmin=threshold, vmax=1, cmap='gray')
    plt.title(f"Positive Pixels Median: Threshold {threshold}", size=8)
    plt.axis('off')

    title = f"{site_name} Test Set - Median Values - Neural Network Classification - Threshold {threshold}"
    plt.suptitle(title, y=1.01)
    plt.tight_layout()
    #plt.savefig(os.path.join(output_dir, title + '.png'), bbox_inches='tight')
    plt.show()
    
    
    fig, ax = plt.subplots(dpi=200, facecolor=(1,1,1), figsize=(4,4))
    ax.set_axis_off()
    clipped_img = np.moveaxis([channel * (preds_median >= 0) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
    img = plt.imshow(clipped_img / (clipped_img.max()))
    ax.set_title('Threshold 0')
    plt.tight_layout()

    def animate(i):
        i /= 100
        clipped_img = np.moveaxis([channel * (preds_median >= i) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
        img.set_data(clipped_img / (clipped_img.max()))
        #img.set_data((preds_stack > i) * 1)
        ax.set_title(site_name + ' Threshold ' + str(i))
        return img,

    ani = animation.FuncAnimation(fig, animate, frames=100, interval=60, blit=True, repeat_delay=500)
    #ani.save(os.path.join(output_dir, site_name + 'test_set_threshold_visualization' + '.mp4'))
    plt.close()
    
    return rgb_stack, preds_stack, threshold_stack
    

In [None]:
def make_predictions_comparison(site_name, threshold):
    with open(os.path.join(DATA_DIR, site_name + "_0.03_patch.pkl"), 'rb') as file:
        test_image = pickle.load(file)

    rgb_stack = []
    preds_stack = []
    threshold_stack = []

    for month in tqdm(list(test_image.keys())):
        test_pixel_vectors, width, height = get_pixel_vectors(test_image, month)
        if width > 0:
            test_pixel_vectors = normalize(test_pixel_vectors)

            r = np.reshape(np.array(test_pixel_vectors)[:,3], (width, height))
            g = np.reshape(np.array(test_pixel_vectors)[:,2], (width, height))
            b = np.reshape(np.array(test_pixel_vectors)[:,1], (width, height))
            rgb = np.moveaxis(np.stack((r,g,b)), 0, -1)
            rgb_stack.append(rgb)

            preds = model.predict(np.expand_dims(test_pixel_vectors, axis=-1))
            preds_img = np.reshape(preds, (width, height, 2))[:,:,1]
            preds_stack.append(preds_img)

            thresh_img = preds_img > threshold
            threshold_stack.append(thresh_img)
    
    output_dir = './figures/neural_network/12-09-2020'
    if not os.path.exists(output_dir):
            os.mkdir(output_dir)

            
    rgb_median = np.median(rgb_stack, axis=0)
    preds_median = np.median(preds_stack, axis=0)
    threshold_median = np.median(threshold_stack, axis=0)
    
    plt.figure(dpi=150, facecolor=(1,1,1), figsize=(15,5))
    gamma = .85
    plt.subplot(1,3,1)
    plt.imshow(rgb_median ** gamma)
    plt.title(f'{site_name} Median', size=8)
    plt.axis('off')

    plt.subplot(1,3,2)
    plt.imshow(np.mean(preds_stack, axis=0), vmin=.6, vmax=1, cmap='seismic')
    plt.axis('off')
    #plt.colorbar()
    plt.title('mean')
    plt.subplot(1,3,3)
    plt.imshow(np.median(preds_stack, axis=0), vmin=.6, vmax=1, cmap='seismic')
    plt.axis('off')
    #plt.colorbar()
    plt.title('median')
    plt.tight_layout()
    plt.show()
    
    
    fig, ax = plt.subplots(dpi=200, facecolor=(1,1,1), figsize=(4,4))
    ax.set_axis_off()
    clipped_img = np.moveaxis([channel * (preds_median >= 0) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
    img = plt.imshow(clipped_img / (clipped_img.max()))
    ax.set_title('Threshold 0')
    plt.tight_layout()

    def animate(i):
        i /= 100
        clipped_img = np.moveaxis([channel * (preds_median >= i) for channel in np.moveaxis(rgb_median, -1, 0)], 0, -1)
        img.set_data(clipped_img / (clipped_img.max()))
        #img.set_data((preds_stack > i) * 1)
        ax.set_title(site_name + ' Threshold ' + str(i))
        return img,

    ani = animation.FuncAnimation(fig, animate, frames=100, interval=60, blit=True, repeat_delay=500)
    #ani.save(os.path.join(output_dir, site_name + 'test_set_threshold_visualization' + '.mp4'))
    plt.close()
    
    return rgb_stack, preds_stack, threshold_stack
    

In [None]:
model = keras.models.load_model('../models/model_filtered-12-07-2020.h5')
DATA_DIR = '../data'
site_names = ['bare_earth_4', 'tpa_babandem', 'city_7', 'tpa_bangli', 'tpa_biaung', 'tpa_mandung', 'tpa_jimbaran']
threshold = 0.90

for site_name in site_names:
    rgb_median, preds_median, threshold_median = make_predictions_comparison(site_name, threshold)

In [None]:
plt.figure(figsize=(12,6), dpi=150)
plt.subplot(1,3,1)
plt.imshow(np.median(rgb_median, axis=0))
plt.axis('off')
plt.subplot(1,3,2)
plt.imshow(np.mean(preds_median, axis=0), vmin=.6, vmax=1, cmap='seismic')
plt.axis('off')
#plt.colorbar()
plt.title('mean')
plt.subplot(1,3,3)
plt.imshow(np.median(preds_median, axis=0), vmin=.6, vmax=1, cmap='seismic')
plt.axis('off')
#plt.colorbar()
plt.title('median')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,6), dpi=150)
plt.subplot(1,3,1)
plt.imshow(rgb_median, cmap='seismic')
plt.title('Classification Median', size=8)
plt.axis('off')
plt.subplot(1,3,2)
plt.imshow(preds_median, vmin=6, vmax=9, cmap='seismic')
plt.title('Classification Median', size=8)
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
preds_median.max()

In [None]:
def predict_time_series(patch_histories, site_name, threshold, model):
    rgb_stack = []
    preds_stack = []
    threshold_stack = []
    
    for date in dates:
        rgb = np.stack((tpa_histories[date][site_name]['B4'],
                        tpa_histories[date][site_name]['B3'],
                        tpa_histories[date][site_name]['B2']), axis=-1)
        if len(rgb) > 0:
            rgb_stack.append(rgb / 3000)
        
        width, height = rgb.shape[:2]
        pixel_vectors = []
        for i in range(width):
            for j in range(height):
                pixel_vector = []
                for band in band_descriptions:
                    pixel_vector.append(tpa_histories[date][site_name][band][i][j])
                pixel_vectors.append(pixel_vector)
        
        pixel_vectors = normalize(pixel_vectors)
        if len(pixel_vectors) > 0:
            preds = model.predict(np.expand_dims(pixel_vectors, axis=-1))
            preds_img = np.reshape(preds, (width, height, 2))[:,:,1]
            preds_stack.append(preds_img)
            
    return np.array(rgb_stack), np.array(preds_stack)


## TPA Time Series Predictions - TOA

In [None]:
# Load dataset
base_path = '/Users/ckruse/Documents/earthrise/plastics'
with open(os.path.join(base_path, 'data', 'training_sites', 'tpa_patch_histories_toa.pkl'), 'rb') as file:
    tpa_histories = pickle.load(file)
file.close()
dates = list(tpa_histories.keys())
sites = list(tpa_histories[dates[0]].keys())
bands = list(tpa_histories[dates[0]][sites[0]].keys())
print(len(dates))

In [None]:
model = keras.models.load_model('../models/model_65_month_filtered_toa-12-09-2020.h5')

In [None]:
(rgb_stack, preds_stack) = predict_time_series(tpa_histories, sites[6], 0.9, model)
for rgb, pred in zip(rgb_stack, preds_stack):
    if np.median(rgb) > 0:
        plt.figure(dpi=150)
        plt.subplot(1,3,1)
        plt.imshow(rgb)
        plt.axis('off')
        plt.title('RGB')

        plt.subplot(1,3,2)
        plt.imshow(pred, vmin=0, vmax=1, cmap='seismic')
        plt.axis('off')
        plt.title('Prediction')

        plt.subplot(1,3,3)
        rgb[:,:,0] += pred
        plt.imshow(rgb)
        plt.axis('off')
        plt.title('Composite')

        plt.show()

## TPA Time Series Prediction - SR

In [None]:
# Load dataset
base_path = '/Users/ckruse/Documents/earthrise/plastics'
with open(os.path.join(base_path, 'data', 'training_sites', 'tpa_patch_histories.pkl'), 'rb') as file:
    tpa_histories = pickle.load(file)
file.close()
dates = list(tpa_histories.keys())
sites = list(tpa_histories[dates[0]].keys())
bands = list(tpa_histories[dates[0]][sites[0]].keys())

In [None]:
model = keras.models.load_model('../models/model_filtered-12-07-2020.h5')

In [None]:
(rgb_stack, preds_stack) = predict_time_series(tpa_histories, sites[6], 0.9, model)
for rgb, pred in zip(rgb_stack, preds_stack):
    if np.median(rgb) > 0:
        plt.figure(dpi=150)
        plt.subplot(1,3,1)
        plt.imshow(rgb)
        plt.axis('off')
        plt.title('RGB')

        plt.subplot(1,3,2)
        plt.imshow(pred, vmin=0, vmax=1, cmap='seismic')
        plt.axis('off')
        plt.title('Prediction')

        plt.subplot(1,3,3)
        rgb[:,:,0] += pred
        plt.imshow(rgb)
        plt.axis('off')
        plt.title('Composite')

        plt.show()

In [None]:
for site in sites:
    (rgb_stack, preds_stack) = predict_time_series(tpa_histories, site, 0.75, model)
    fig, ax = plt.subplots(dpi=100, facecolor=(1,1,1))
    ax.set_axis_off()
    images = []
    for rgb, pred in zip(rgb_stack, preds_stack):
        if np.median(rgb) > 0:
            ax.set_title('TOA ' + site)
            overlay = np.copy(rgb)
            overlay[:,:,0] += pred
            divider = np.ones((pred.shape[0], 1, 3))
            pred[pred < 0.9] = 0
            pred[pred >= 0.9] = 1
            pred = np.stack((pred, pred, pred), axis=-1)
            combination = np.concatenate((rgb, divider, overlay, divider, pred), axis=1)
            im = plt.imshow(combination, animated=True)
            
            images.append([im])

    fig.tight_layout()
    ani = animation.ArtistAnimation(fig, images, interval=200, blit=True, repeat_delay=500)
    ani.save(os.path.join('figures', 'videos', site + ' TOA 12-09 preds.mp4'))