In [1]:
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE

In [2]:
# Sentinel 2 band descriptions
band_descriptions = {
    'B1': 'Aerosols, 442nm',
    'B2': 'Blue, 492nm',
    'B3': 'Green, 559nm',
    'B4': 'Red, 665nm',
    'B5': 'Red Edge 1, 704nm',
    'B6': 'Red Edge 2, 739nm',
    'B7': 'Red Edge 3, 779nm',
    'B8': 'NIR, 833nm',
    'B8A': 'Red Edge 4, 864nm',
    'B9': 'Water Vapor, 943nm',
    'B11': 'SWIR 1, 1610nm',
    'B12': 'SWIR 2, 2186nm'
}

def create_img_stack_mean(patch_history):
    mean_stack = []
    dates = list(patch_history.keys())
    for site in patch_history[dates[0]]:
        img_stack = []
        for date in dates:
            spectral_stack = []
            band_shapes = [np.shape(patch_history[date][site][band])[0] for band in band_descriptions]
            if np.array(band_shapes).all() > 0:
                for band in band_descriptions:
                    spectral_stack.append(patch_history[date][site][band])
                img_stack.append(np.rollaxis(np.array(spectral_stack), 0, 3))

        masked_img = []
        for img in img_stack:
            masked_img.append(np.ma.masked_where(img < 0, img))
        
        masked_mean = np.ma.mean(masked_img, axis=0)
        
        num_cloudy_pixels = np.sum(masked_mean.mask)
        cloud_fraction = num_cloudy_pixels / np.size(masked_mean)
        
        print("Cloud Fraction", cloud_fraction)
        if cloud_fraction < 0.2:
            mean_stack.append(masked_mean.data)
            
    return np.array(mean_stack)

In [4]:
data_dir = '../data/caroni_train'

with open(os.path.join(data_dir, '2d_mining_sites_2018-01-01_12.pkl'), 'rb') as f:
    base_positive = pickle.load(f)

with open(os.path.join(data_dir, '2d_negative_sites_2018-01-01_12.pkl'), 'rb') as f:
    base_negative = pickle.load(f)
    
with open('../data/84_px_bolivar_bootstrap_v1_negative.pkl', 'rb') as f:
    bolivar_bootstrap_negatives = pickle.load(f)
    
with open('../data/84_px_bolivar_bootstrap_v2_negative.pkl', 'rb') as f:
    bolivar_bootstrap_negatives_v2 = pickle.load(f)
    
with open('../data/84_px_bolivar_bootstrap_v2_positive.pkl', 'rb') as f:
    bolivar_bootstrap_positives_v2 = pickle.load(f)

with open('../data/caroni_positive_84px_5x_polygon_sampling.pkl', 'rb') as f:
    caroni_positive_sampling = pickle.load(f)
    
with open('../data/84_px_bolivar_inception_bootstrap_v3_positive.pkl', 'rb') as f:
    inception_positive = pickle.load(f)
    
with open('../data/84_px_bolivar_inception_bootstrap_v3_negative.pkl', 'rb') as f:
    inception_negative = pickle.load(f)
    
    
base_positive = create_img_stack_mean(base_positive)
base_negative = create_img_stack_mean(base_negative)

min_dimension = np.min([np.shape(img)[:2] for img in base_positive])
base_positive = np.array([img[:min_dimension, :min_dimension, :] for img in base_positive])
base_negative = np.array([img[:min_dimension, :min_dimension, :] for img in base_negative])

FileNotFoundError: [Errno 2] No such file or directory: '../data/caroni_train/2d_mining_sites_2018-01-01_12.pkl'

In [None]:
def create_img_vectors(img_array):
    img_vecs = []
    for img in img_array:
        img_vecs.append(img.flatten())
    return np.array(img_vecs)

def normalize(array):
    return array / 3000.0

def stretch_histogram(array, min_val=0.1, max_val=0.75, gamma=1.2):
    clipped = np.clip(array, min_val, max_val)
    stretched = np.clip((clipped - min_val) / (max_val - min_val) ** gamma, 0, 1)
    return stretched

def create_rgb(img_array):
    rgb_img = []
    for img in img_array:
        rgb = np.stack((img[:,:,3],
                        img[:,:,2],
                        img[:,:,1]), axis=-1)
        rgb = stretch_histogram(normalize(rgb), 0.1, 1)
        rgb_img.append(rgb)
    return rgb_img

In [None]:

def plot_similar_images(img_stack, title):
    reducer = TSNE(n_components=1)
    reduced = reducer.fit_transform(normalize(create_img_vectors(img_stack)))
    input_img = create_rgb(img_stack)
    num_img = int(np.ceil(np.sqrt(len(input_img))))

    plt.figure(figsize=(num_img, num_img), dpi=100)
    for img_index, sort_index in enumerate(reduced[:,0].argsort()):
        plt.subplot(num_img, num_img, img_index + 1)
        plt.imshow(input_img[sort_index])
        plt.axis('off')
    plt.tight_layout()
    plt.suptitle(title, size = num_img * 12 / 7, y=1.02)
    plt.savefig('../figures/' + title + '.png', bbox_inches='tight')
    plt.show()

In [None]:
img_stacks = [base_positive, 
              base_negative, 
              bolivar_bootstrap_negatives, 
              bolivar_bootstrap_negatives_v2,
              bolivar_bootstrap_positives_v2,
              caroni_positive_sampling,
              inception_positive,
              inception_negative]

stack_names = ['Caroni River Point-Sampled Positive Images',
               'Caroni River Point-Sampled Negative Images',
               'V1 Bootstrapped Negative Bolivar Samples',
               'V2 Bootstrapped Negative Bolivar Samples',
               'V2 Bootstrapped Positive Bolivar Samples',
               'Caroni Positive Site Polygon Sampling',
               'Inception Net Bootstrapped Positive Bolivar Samples',
               'Inception Net Bootstrapped Negative Bolivar Samples']

In [None]:
for stack, name, in zip(img_stacks, stack_names):
    plot_similar_images(stack, name)

In [None]:
all_img = np.concatenate(img_stacks, axis=0)
plot_similar_images(all_img, "All Images")

In [None]:
stack_names

In [None]:
all_positive = np.concatenate([img_stacks[0],
                               img_stacks[4],
                               img_stacks[5],
                               img_stacks[6]], axis=0)
plot_similar_images(all_positive, "All Positive Images")

In [None]:
all_negative = np.concatenate([img_stacks[1],
                               img_stacks[2],
                               img_stacks[3],
                               img_stacks[7]], axis=0)
plot_similar_images(all_negative, "All Negative Images")