In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np 
import pickle
from sklearn import metrics
from tensorflow import keras
from tqdm.notebook import tqdm

from scripts import dl_utils

# Load Data

In [None]:
def compute_ndvi(pixel_arrays):
    return (pixel_arrays[:,7] - pixel_arrays[:,3]) / (pixel_arrays[:,7] + pixel_arrays[:,3])

def filter_ndvi(data, lower_bound=0, upper_bound=0.4):
    ndvi = compute_ndvi(data)
    index = np.logical_and(ndvi > lower_bound, ndvi < upper_bound)
    filtered_data = data[index.all(axis=1)]
    print(f"{len(filtered_data) / len(data):.1%} of samples within NDVI range")
    return filtered_data

def filter_bright(data, brightness_threshold=2500):
    filtered_data = data[np.mean(data, axis=(1,2)) < brightness_threshold]
    filtered_data.shape
    print(f"{len(filtered_data) / len(data) :.1%} of data below brightness limit")
    return filtered_data

In [None]:
train_data_dir = '../../data/training_data/pixel_arrays_3mo-mosaics_2x-int/'

data_files = ['pixel_positive_polygons_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_farm_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_beach_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_river_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_city_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_forest_2019-06-01_2021-06-01_pixel_arrays.pkl',
              'negative_validation_points_bare_2019-06-01_2021-06-01_pixel_arrays.pkl'
             ]
label_files = [f.split('s.pkl')[0] + '_labels.pkl' for f in data_files]

In [None]:
pixel_arrays = []
labels = []
for data, label in tqdm(zip(data_files, label_files), total=len(data_files)):
    with open(os.path.join(train_data_dir, data), 'rb') as f:
        pixel_arrays += pickle.load(f)
    with open(os.path.join(train_data_dir, label), 'rb') as f:
        labels += pickle.load(f)
            
pixel_arrays = np.array(pixel_arrays)
labels = np.array(labels)
positive_arrays = pixel_arrays[labels == 1]
negative_arrays = pixel_arrays[labels == 0]

print(f"Loaded {len(positive_arrays):,} positive pixel arrays and {len(negative_arrays):,} negative pixel arrays")

In [None]:
neg_data = {}
for data in tqdm(data_files):
    if 'negative' in data:
        land_class = data.split('_')[3]
        with open(os.path.join(train_data_dir, data), 'rb') as f:
            neg_data[land_class] = np.array(pickle.load(f))

In [None]:
filtered_positive_arrays = filter_ndvi(positive_arrays)
filtered_positive_arrays = filter_bright(filtered_positive_arrays)

In [None]:
pixel_vectors = np.concatenate((filtered_positive_arrays, negative_arrays))
pixel_labels = np.concatenate((np.ones(len(filtered_positive_arrays)), np.zeros(len(negative_arrays))))

In [None]:
print(f"{len(filtered_positive_arrays):,} Positive Samples")
print(f"{len(negative_arrays):,} Negative Samples")

# Test Single Network

In [None]:
model_name = 'spectrogram_v0.0.7_2021-05-20'
model = keras.models.load_model(f'../../models/{model_name}.h5')

In [None]:
threshold = 0.6

In [None]:
preds = model.predict(np.expand_dims(dl_utils.normalize(pixel_vectors), axis=-1))[:,1]

In [None]:
print(metrics.classification_report(pixel_labels, preds > threshold, target_names=['Not Waste', 'Waste'], digits=4))

In [None]:
print(f'Positive-Class Accuracy: {np.sum(preds[pixel_labels == 1] > threshold) / sum(pixel_labels == 1):.2%}')
print(f'Negative-Class Accuracy: {np.sum(preds[pixel_labels == 0] <= threshold) / sum(pixel_labels == 0):.2%}')

for land_class in neg_data.keys():
    neg_preds = model.predict(np.expand_dims(dl_utils.normalize(neg_data[land_class]), axis=-1))[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_data[land_class]):.2%}')


## Test RGB Network

In [None]:
rgb_model = keras.models.load_model('../../models/spectrogram_vrgb_2021-10-14.h5')
rgb_preds = rgb_model.predict(np.expand_dims(dl_utils.normalize(pixel_vectors)[:,[3,2,1]], axis=-1))[:,1]
print(f'RGB Positive-Class Accuracy: {np.sum(rgb_preds[pixel_labels == 1] > threshold) / sum(pixel_labels == 1):.2%}')
print(f'RGB Negative-Class Accuracy: {np.sum(rgb_preds[pixel_labels == 0] <= threshold) / sum(pixel_labels == 0):.2%}')
print(metrics.classification_report(pixel_labels, rgb_preds > threshold, target_names=['Not Waste', 'Waste'], digits=4))
for land_class in neg_data.keys():
    neg_preds = rgb_model.predict(np.expand_dims(dl_utils.normalize(neg_data[land_class][:,[3,2,1]]), axis=-1))[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_data[land_class]):.2%}')


## Test RGB IR Pixel Classifier

In [None]:
rgb_model = keras.models.load_model('../../models/spectrogram_vrgb_ir_2021-10-14.h5')
rgb_preds = rgb_model.predict(np.expand_dims(dl_utils.normalize(pixel_vectors)[:,[7,3,2,1]], axis=-1))[:,1]
print(f'RGB Positive-Class Accuracy: {np.sum(rgb_preds[pixel_labels == 1] > threshold) / sum(pixel_labels == 1):.2%}')
print(f'RGB Negative-Class Accuracy: {np.sum(rgb_preds[pixel_labels == 0] <= threshold) / sum(pixel_labels == 0):.2%}')
print(metrics.classification_report(pixel_labels, rgb_preds > threshold, target_names=['Not Waste', 'Waste'], digits=4))
for land_class in neg_data.keys():
    neg_preds = rgb_model.predict(np.expand_dims(dl_utils.normalize(neg_data[land_class])[:,[7,3,2,1]], axis=-1))[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_data[land_class]):.2%}')


## Test Atemporal Pixel Classifier

In [None]:
atemporal_model = keras.models.load_model('../../models/spectrogram_vno-spectrogram-3_2021-10-19.h5')
normed = dl_utils.normalize(pixel_vectors)
pixel_labels_atemporal = np.concatenate((pixel_labels, pixel_labels))
preds = atemporal_model.predict(np.expand_dims(np.concatenate((normed[:,:,0], normed[:,:,1])), axis=-1))[:,1]
print(f'Atemporal Positive-Class Accuracy: {np.sum(preds[pixel_labels_atemporal == 1] > threshold) / sum(pixel_labels_atemporal == 1):.2%}')
print(f'Atemporal Negative-Class Accuracy: {np.sum(preds[pixel_labels_atemporal == 0] <= threshold) / sum(pixel_labels_atemporal == 0):.2%}')
print(metrics.classification_report(pixel_labels_atemporal, preds > threshold, target_names=['Not Waste', 'Waste'], digits=4))
for land_class in neg_data.keys():
    normed = dl_utils.normalize(neg_data[land_class])
    neg_preds = atemporal_model.predict(np.expand_dims(np.concatenate((normed[:,:,0], normed[:,:,1])), axis=-1))[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / (len(normed) * 2):.2%}')


## Test Ensemble of Pixel Classifiers

In [None]:
ensemble_name = 'v0.0.11_ensemble-8-25-21'
model_list = dl_utils.load_ensemble(f'../../models/{ensemble_name}')

In [None]:
normed_input = dl_utils.unit_norm_pixel(pixel_vectors)
ensemble_preds = np.array([model.predict(np.expand_dims(normed_input, axis=-1))[:,1] for model in model_list])

In [None]:
print(metrics.classification_report(pixel_labels, np.median(ensemble_preds, axis=0) > threshold, target_names=['Not Waste', 'Waste'], digits=4))

In [None]:
print(f'Positive-Class Accuracy: {np.sum(np.mean(ensemble_preds, axis=0)[pixel_labels == 1] > threshold) / sum(pixel_labels == 1):.2%}')
print(f'Negative-Class Accuracy: {np.sum(np.mean(ensemble_preds, axis=0)[pixel_labels == 0] <= threshold) / sum(pixel_labels == 0):.2%}')

for land_class in neg_data.keys():
    neg_preds = np.array([model.predict(np.expand_dims(dl_utils.unit_norm_pixel(neg_data[land_class]), axis=-1))[:,1] for model in model_list])
    neg_preds = np.median(neg_preds, axis=0)
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_data[land_class]):.2%}')


## Test Tree

In [None]:
with open('../../models/random_forest.pkl', 'rb') as f:
    forest = pickle.load(f)

In [None]:
pixel_vectors.reshape(pixel_vectors.shape[0], -1).shape

In [None]:
normed_input = dl_utils.normalize(pixel_vectors)
forest_preds = forest.predict(normed_input.reshape(normed_input.shape[0], -1))

print(f'RGB Positive-Class Accuracy: {np.sum(forest_preds[pixel_labels == 1] > threshold) / sum(pixel_labels == 1):.2%}')
print(f'RGB Negative-Class Accuracy: {np.sum(forest_preds[pixel_labels == 0] <= threshold) / sum(pixel_labels == 0):.2%}')
print(metrics.classification_report(pixel_labels, forest_preds, target_names=['Not Waste', 'Waste'], digits=4))
for land_class in neg_data.keys():
    normed_input = dl_utils.normalize(neg_data[land_class])
    neg_preds = forest.predict(normed_input.reshape(normed_input.shape[0], -1))
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_data[land_class]):.2%}')

# Test Patch Network

In [None]:
train_data_dir = '../../data/training_data/spectrogram_patches_3mo-mosaics_2x-int'

patch_data_files = [
    'negative_validation_points_farm_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'negative_validation_points_beach_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'negative_validation_points_river_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'negative_validation_points_city_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'negative_validation_points_forest_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'negative_validation_points_bare_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl',
    'pixel_positive_polygons_2019-06-01_2021-06-01_period_3_interval_2_method_min_patch_arrays.pkl'
]

patch_label_files = [f.split('s.pkl')[0] + '_labels.pkl' for f in patch_data_files]

In [None]:
patch_arrays = []
patch_labels = []
for data, label in tqdm(zip(patch_data_files, patch_label_files), total=len(patch_data_files)):
    with open(os.path.join(train_data_dir, data), 'rb') as f:
        data = pickle.load(f)
        patch_arrays += [np.concatenate((dl_utils.unit_norm(dl_utils.pad_patch(patch[0], 28, 28)), 
                                         dl_utils.unit_norm(dl_utils.pad_patch(patch[1], 28, 28))), axis=-1) for patch in data]
    with open(os.path.join(train_data_dir, label), 'rb') as f:
        patch_labels += pickle.load(f)
            
patch_arrays = np.array(patch_arrays)
patch_labels = np.array(patch_labels)
positive_patch_arrays = patch_arrays[patch_labels == 1]
negative_patch_arrays = patch_arrays[patch_labels == 0]

print(f"Loaded {len(positive_patch_arrays):,} positive pixel arrays and {len(negative_patch_arrays):,} negative pixel arrays")

In [None]:
neg_patch_data = {}
for data in tqdm(patch_data_files):
    if 'negative' in data:
        land_class = data.split('_')[3]
        with open(os.path.join(train_data_dir, data), 'rb') as f:
            data =  pickle.load(f)
            neg_patch_data[land_class] = np.array([np.concatenate((dl_utils.unit_norm(dl_utils.pad_patch(patch[0], 28, 28)), 
                                         dl_utils.unit_norm(dl_utils.pad_patch(patch[1], 28, 28))), axis=-1) for patch in data]
)

## Single Model

In [None]:
patch_model = keras.models.load_model('../../models/model_10.h5', custom_objects={'ELU': keras.layers.ELU})

In [None]:
patch_preds = patch_model.predict(patch_arrays)[:,1]

In [None]:
threshold = 0.6
print("Single Model, Model 3")
print(metrics.classification_report(patch_labels, patch_preds > threshold, target_names=['Not Waste', 'Waste'], digits=4))

In [None]:
print(f'Positive-Class Accuracy: {np.sum(patch_preds[patch_labels == 1] > threshold) / sum(patch_labels == 1):.2%}')
print(f'Negative-Class Accuracy: {np.sum(patch_preds[patch_labels == 0] <= threshold) / sum(patch_labels == 0):.2%}')

for land_class in neg_patch_data.keys():
    neg_preds = patch_model.predict(neg_patch_data[land_class])[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_patch_data[land_class]):.2%}')


## Ensemble Model

In [None]:
model_folder = 'patch_ensemble_v2.0'
patch_model_list = []
for path in os.listdir(f'../../models/{model_folder}'):
    patch_model_list.append(keras.models.load_model(f'../../models/{model_folder}/{path}', custom_objects={'ELU': keras.layers.ELU}))

In [None]:
def predict_patch_ensemble(X, models, return_disagreement=False):
    preds = list() #list of model predictions, will be (len(models), len(X), 2) in dimension
    
    #run ensemble
    for model in models:
        preds.append(model.predict(X))
    
    #convert soft predictions to absolute
    for pred in preds:
        for i in range(0, pred.shape[0]):
            y = [0, 0]
            y[np.argmax(pred[i])] = 1.0
            pred[i] = y
    
    #aggregate predictions in votes
    pred_sum = preds[0]
    for i in range(1, len(preds)):
        pred_sum += preds[i]
    
    #pick the prediction with the highest votes
    for i in range(0, len(pred_sum)):
        y = [0, 0]
        y[np.argmax(pred_sum[i])] = 1.0
        pred_sum[i] = y
    
    #compute the std of the votes (a measure of disagreement)
    if return_disagreement:
        return np.asarray(pred_sum), np.std(np.asarray(preds)[:,:,0],0)
    
    return np.asarray(pred_sum)

In [None]:
ensemble_patch_preds = predict_patch_ensemble(patch_arrays, patch_model_list)

In [None]:
ensemble_patch_preds.shape

In [None]:
print("Ensemble of 32 Models")
print(metrics.classification_report(patch_labels, ensemble_patch_preds[:,1], target_names=['Not Waste', 'Waste'], digits=4))

print(f'Positive-Class Accuracy: {np.sum(ensemble_patch_preds[:,1][patch_labels == 1] > threshold) / sum(patch_labels == 1):.2%}')
print(f'Negative-Class Accuracy: {np.sum(ensemble_patch_preds[:,1][patch_labels == 0] <= threshold) / sum(patch_labels == 0):.2%}')

for land_class in neg_patch_data.keys():
    neg_preds = predict_patch_ensemble(neg_patch_data[land_class], patch_model_list)[:,1]
    print(f'{land_class} Accuracy: {np.sum(neg_preds <= threshold) / len(neg_patch_data[land_class]):.2%}')
