In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import descarteslabs as dl
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import pandas as pd
import seaborn as sns
import sys
from tqdm.notebook import tqdm

sys.path.append('../')
from scripts.viz_tools import stretch_histogram, normalize, band_descriptions
from scripts.dl_utils import download_patch, rect_from_point

In [None]:
# Define Parameters for data extraction
START_DATE = '2020-01-01'
END_DATE = '2020-03-31'

OUTPUT_DIR = '../data/training_data/pixel_vectors'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)

In [None]:
def plot_grid(img_stack, title=None, save=False):
    # Plot all images from a stack in a grid
    num_img = int(np.ceil(np.sqrt(len(img_stack))))
    plt.figure(figsize=(12, 12), dpi=150, facecolor=(0,0,0))
    for index, img in enumerate(tqdm(img_stack)):
        plt.subplot(num_img, num_img, index + 1)
        plt.imshow(np.clip(normalize(img[:,:,3:0:-1]), 0, 1))
        plt.axis('off')
    if title != None:
        plt.suptitle(title, color='w', size=14)
    plt.tight_layout()
    if save:
        plt.savefig(os.path.join(OUTPUT_DIR, title + '.png'), bbox_inches='tight')
        plt.close()
    else:
        plt.show()

def save_pixel_vectors(data, name, label_class):
    file_name = f"{name}_{START_DATE}_{END_DATE}"
    with open(os.path.join(OUTPUT_DIR, f"{file_name}_pixel_vectors.pkl"),"wb") as f:
        pickle.dump(data, f)
    with open(os.path.join(OUTPUT_DIR, f"{file_name}_pixel_vector_labels.pkl"),"wb") as f:
        pickle.dump([label_class] * len(data), f)
    plot_grid(img_stack, title=f"{name} - Class {label_class}", save=True)

In [None]:
# Create polygons from point samples

with open('../data/sampling_locations/v_1.1.5_negatives.geojson', 'r') as f:
    candidate_sites = json.load(f)['features']
coords = [feature['geometry']['coordinates'] for feature in candidate_sites]

# Set rect width in pixels
num_pixels = 48
# Convert pixels to degrees (not geographically sound)
rect_width = np.round((num_pixels / 100) / 111.32, 4)

polygons = [rect_from_point(coord, rect_width) for coord in coords]

In [None]:
# Load polygons from geojson
filename = 
geometry_data = '../data/sampling_locations/sri_lanka_validated.geojson'
with open(geometry_data, 'r') as f:
    data = json.load(f)['features']
polygons = [feature['geometry'] for feature in data]

In [None]:
# Download Sentinel Data
img_stack = []
for polygon in tqdm(polygons):
    patches = download_patch(polygon, START_DATE, END_DATE)
    for patch in patches:
        img_stack.append(patch)
print(len(img_stack), "cloud masked patches extracted")

In [None]:
plot_grid(img_stack, title="Negative Test", save=True)

In [None]:
# Create pixel vectors
pixel_vectors = []
for img in img_stack:
    height, width, channels = img.shape
    vectors = img.reshape(height * width, channels)
    [pixel_vectors.append(vector) for vector in vectors if np.mean(vector) > 0]
print(f"{np.shape(pixel_vectors)[0]:,} pixel vectors extracted")

In [None]:
# Plot the mean pixel spectra of the extracted dataset.
# Optional process that can take time with many samples
data = pd.DataFrame(pixel_vectors, columns=band_descriptions.keys()).melt(var_name='band', value_name='value')
plt.figure(figsize=(6,4), dpi=150, facecolor=(1,1,1))
sns.lineplot(x='band', y='value', data=data, ci="sd")
plt.title('Mean Value +/- SD')
plt.show()

In [None]:
# Save pixel vectors
name = 'enter_descriptive_name'
label_class = 0
save_pixel_vectors(data=pixel_vectors, name=name, label_class=label_class)