# Sentinel 2A Plastic Waste Exploration
A starting point for exploring multispectral data from Sentinel 2A for TPA sites in Indonesia.

## Explorations:
### [1. Patch Visualization](#Exploration-1)
For each known plastic waste site, define a rect centered on the known coordinates. Additionally, define an adjacent rect as a control reference. For every rect, extract an image patch from every Sentinel image band and visualize. 

### [2. Patch Comparison](#Exploration-2)
Using the extracted patches from Exploration #1, compare mean/median reflectance across bands between patches from waste sites and their corresponding control sites. In addition to site-by-site comparisons, aggregate these statistics across all sites to assess the trends.

### [3. Temporal Monitoring](#Exploration-3)
At each site, visualize how mean/median reflectance changes over time for each Sentinel imaging band.

### [4. Spectral Signal Clustering](#Exploration-4)
Compile the mean/median values computed at each site and time point in Exploration #3 into a multi-dimensional vector. Compress the dimensionality of the vectors using PCA or tSNE, and visualize whether waste and control sites form separable clusters.

## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
from tqdm.notebook import tqdm
from tqdm.contrib.concurrent import thread_map
from functools import partial
import json
import os
from datetime import datetime
from sklearn.decomposition import PCA

import ee
import geemap
import geemap.eefolium

import folium

%load_ext autoreload
%autoreload 2

In [None]:
ee.Authenticate()

In [None]:
ee.Initialize()

### Sentinel 2 Cloud Filtering
Uses the new S2 Cloud Probability dataset. [Details on the algorithm](https://medium.com/google-earth/more-accurate-and-flexible-cloud-masking-for-sentinel-2-images-766897a9ba5f)

In [None]:
def get_s2_sr_cld_col(aoi, start_date, end_date):
    """
    Creates an ImageCollection for a region and time period.
    ImageCollection is prefiltered by the QA60 cloud mask band
    Prefiltering percentage specified by global `CLOUD_FILTER` variable
    """
    # Import and filter S2 SR.
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', CLOUD_FILTER)))

    # Import and filter s2cloudless.
    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))

    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
    return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{
        'primary': s2_sr_col,
        'secondary': s2_cloudless_col,
        'condition': ee.Filter.equals(**{
            'leftField': 'system:index',
            'rightField': 'system:index'
        })
    }))

def add_cloud_bands(img):
    """
    From the s2_cloud_probability dataset, return an image with
    cloud probabilities below the global `CLD_PRB_THRESH` variable
    """
    # Get s2cloudless image, subset the probability band.
    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')

    # Condition s2cloudless by the probability threshold value.
    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')

    # Add the cloud probability layer and cloud mask as image bands.
    return img.addBands(ee.Image([cld_prb, is_cloud]))

def add_shadow_bands(img):
    """
    Isolate cloud shadows over land
    Cloud shadow thresholds are given by the global `NIR_DRK_THRESH` variable
    CK Note: I don't think this algorithm works over water
    """
    # Identify water pixels from the SCL band.
    not_water = img.select('SCL').neq(6)

    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).
    SR_BAND_SCALE = 1e4
    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')

    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).
    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));

    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})
        .select('distance')
        .mask()
        .rename('cloud_transform'))

    # Identify the intersection of dark pixels with cloud shadow projection.
    shadows = cld_proj.multiply(dark_pixels).rename('shadows')

    # Add dark pixels, cloud projection, and identified shadows as image bands.
    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))

def add_cld_shdw_mask(img):
    """
    Create a mask based on the cloud and cloud shadow images
    """
    # Add cloud component bands.
    img_cloud = add_cloud_bands(img)

    # Add cloud shadow component bands.
    img_cloud_shadow = add_shadow_bands(img_cloud)

    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)

    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.
    is_cld_shdw = (is_cld_shdw.focal_min(2).focal_max(BUFFER*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20})
        .rename('cloudmask'))

    # Add the final cloud-shadow mask to the image.
    return img_cloud_shadow.addBands(is_cld_shdw)

def apply_cld_shdw_mask(img):
    """
    Apply the cloud mask to the all Sentinel bands beginning with `B`
    """
    # Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.
    not_cld_shdw = img.select('cloudmask').Not()

    # Subset reflectance bands and update their masks, return the result.
    return img.select('B.*').updateMask(not_cld_shdw)

## Setup Parameters

In [None]:
DATA_DIR = '../data'

In [None]:
CLOUD_FILTER = 30
CLD_PRB_THRESH = 60
NIR_DRK_THRESH = 0.15
CLD_PRJ_DIST = 1
BUFFER = 50
DATASET = 'COPERNICUS/S2_SR'

In [None]:
# Load tpa_points dataset and create a list of coordinates for the known sites

with open(os.path.join(DATA_DIR, 'tpa_points.json')) as f:
    tpa_points = json.load(f)
    f.close()

tpa_sites = pd.DataFrame({
    'name': [site['properties']['Name'] for site in tpa_points['features']],
    'lon': [site['geometry']['coordinates'][0] for site in tpa_points['features']],
    'lat': [site['geometry']['coordinates'][1] for site in tpa_points['features']],
    'area': [site['properties']['Surface_Ha'] for site in tpa_points['features']],
    'daily_volume': [site['properties']['TOT_Kg/Day'] for site in tpa_points['features']],
    'coords': [site['geometry']['coordinates'] for site in tpa_points['features']]
})


display(tpa_sites)

In [None]:
# Load bare_earth_points dataset and create a list of coordinates for the known sites

with open(os.path.join(DATA_DIR, 'bare_earth_points.json')) as f:
    bare_earth_sites = json.load(f)
    f.close()

bare_earth_sites = pd.DataFrame({
    'name': ['bare_earth_' + str(index) for index in range(len(bare_earth_sites['features']))],
    'lon': [site['geometry']['coordinates'][0] for site in bare_earth_sites['features']],
    'lat': [site['geometry']['coordinates'][1] for site in bare_earth_sites['features']],
    'coords': [site['geometry']['coordinates'][0:2] for site in bare_earth_sites['features']],
})
bare_earth_sites.head()

In [None]:
# Load city_points dataset and create a list of coordinates for the known sites

with open(os.path.join(DATA_DIR, 'city_points.json')) as f:
    city_sites = json.load(f)
    f.close()

city_sites = pd.DataFrame({
    'name': ['city_' + str(index) for index in range(len(city_sites['features']))],
    'lon': [site['geometry']['coordinates'][0] for site in city_sites['features']],
    'lat': [site['geometry']['coordinates'][1] for site in city_sites['features']],
    'coords': [site['geometry']['coordinates'][0:2] for site in city_sites['features']],
})
city_sites.head()

In [None]:
# Sentinel 2 band descriptions
band_descriptions = {
    'B1': 'Aerosols, 442nm',
    'B2': 'Blue, 492nm',
    'B3': 'Green, 559nm',
    'B4': 'Red, 665nm',
    'B5': 'Red Edge 1, 704nm',
    'B6': 'Red Edge 2, 739nm',
    'B7': 'Red Edge 3, 779nm',
    'B8': 'NIR, 833nm',
    'B8A': 'Red Edge 4, 864nm',
    'B9': 'Water Vapor, 943nm',
    'B11': 'SWIR 1, 1610nm',
    'B12': 'SWIR 2, 2186nm'
}

band_wavelengths = [442, 492, 559, 665, 704, 739, 779, 833, 864, 943, 1610, 2186]

## Visualize Cloud-Filtered Imagery

In [None]:
bali_rect = ee.Geometry.Polygon([[116, -8],
                    [116, -9],
                    [114, -9],
                    [114, -8]], None, False)

In [None]:
s2_data = get_s2_sr_cld_col(bali_rect, '2019-06-01', '2019-07-01')
s2_sr_median = s2_data.filterBounds(bali_rect) \
                    .map(add_cld_shdw_mask) \
                    .map(apply_cld_shdw_mask) \
                    .median() \
                    .clip(bali_rect)

In [None]:
# Define visualization parameters
vizParams = {'bands': ['B4', 'B3', 'B2'],
             'min': 0, 'max': 3000}

Map = geemap.eefolium.Map(center=[-8.4, 115.1], zoom=10)
tpa_poly_path = os.path.join(DATA_DIR, 'tpa_polygons', 'tpa_polygons.shp')
tpa_polygons = geemap.shp_to_ee(tpa_poly_path)
Map.addLayer(s2_sr_median.clipToCollection(tpa_sites['polygons'].iloc[0]), vizParams, 'Sentinel 2 Image')

# Add the sites of interest as yellow dots
for i in range(len(tpa_sites)):
    site = tpa_sites.iloc[i]
    roi = create_rect(site['lon'], site['lat'], 0.02)
    Map.addLayer(roi)
    description = f"{site['name']}<br>Size: {site['area']:.1f} Ha<br>Volume: {site['daily_volume'] / 1000:.0f} Tonnes/day"
    folium.CircleMarker([site['lat'], site['lon']], 
                        fill=True, 
                        radius=3,
                        color='#FFCE00',
                        fll_opacity=1,
                        tooltip=description).add_to(Map)

display(Map)

In [None]:
geemap.zonal_statistics(s2_sr_median, tpa_polygons, 'test.csv', statistics_type='MEAN', scale=30)

## Exploration 1
### Patch Extraction and Visualization

In [None]:
def create_rect(lon, lat, width):
    """
    Given a set of coordinates, create an earth engine rect of a fixed width/height
    """
    extent = width / 2
    rect = ee.Geometry.Polygon([[lon + extent, lat + extent],
                                [lon + extent, lat - extent],
                                [lon - extent, lat - extent],
                                [lon - extent, lat + extent]], None, False)
    return rect

In [None]:
# Define the rect width in degrees
# NOTE: I realize that degrees -> meters differs for lat/lon
# This shouldn't matter, but it's good to keep in mind
RECT_WIDTH = 0.002

In [None]:
display(tpa_sites)

In [None]:
# create a list of sites that are adjacent to the patches with dumps. 
# This should keep the distribution the same while isolating dump-specific factors
# could do multiple offset directions and distances. For now, only selecting one

offset = 2 * RECT_WIDTH

control_sites = pd.DataFrame({
    'name': [name + " Control" for name in tpa_sites['name']],
    'lon': [lon + offset for lon in tpa_sites['lon']],
    'lat': [lat for lat in tpa_sites['lat']],
    'coords': [[lon + offset, lat] for lon, lat in zip(tpa_sites['lon'], tpa_sites['lat'])]
})

display(control_sites)

In [None]:
control_sites = bare_earth_sites

In [None]:
tpa_sites['polygons'].iloc[0].get('geometry')

In [None]:
from multiprocessing.dummy import Pool as ThreadPool

def get_sentinel_band(site_name, roi, output_dict, image, band):
    band_img = image.select(band).clipToBoundsAndScale(roi, scale=10)
    #band_img = image.select(band).clipToCollection(roi)
    image_array = geemap.ee_to_numpy(band_img, region=roi, default_value=-999)
    patch = np.squeeze(image_array)
    if patch.all() != None:
        output_dict[band] = np.squeeze(image_array)
    else:
        output_dict[band] = []
    return patch

def get_patches(site_names, site_coords, rect_width, image):
    """
    Multithreaded process to export Sentinel 2 patches as numpy arrays.
    Input lists of site names and site coordinates along with an Earth Engine image.
    Exports each band in image to a dictionary organized by [site name][band][band_img]
    """
    patch_dict = {}
    for name, site in zip(site_names, site_coords):
        print("Processing", name)
        pool = ThreadPool(12)
        roi = create_rect(site[0], site[1], rect_width)
        images = {}
        bands = list(band_descriptions.keys())
        get_sentinel_partial = partial(get_sentinel_band, 
                                       name, 
                                       roi, 
                                       images,
                                       image)
        pool.map(get_sentinel_partial, bands)
        pool.close()
        pool.join()
        patch_dict[name] = images
    return patch_dict

def get_tpa_patches(site_names, polygons, image):
    """
    Multithreaded process to export Sentinel 2 patches as numpy arrays.
    Input lists of site names and site coordinates along with an Earth Engine image.
    Exports each band in image to a dictionary organized by [site name][band][band_img]
    """
    patch_dict = {}
    for name, roi in zip(site_names, polygons):
        print("Processing", name)
        pool = ThreadPool(12)
        images = {}
        bands = list(band_descriptions.keys())
        get_sentinel_partial = partial(get_sentinel_band, 
                                       name, 
                                       roi, 
                                       images,
                                       image)
        pool.map(get_sentinel_partial, bands)
        pool.close()
        pool.join()
        patch_dict[name] = images
    return patch_dict

In [None]:
clipped_tpa = get_tpa_patches(tpa_sites['name'], 
                              tpa_sites['polygons'],
                              s2_sr_median)

### Extract image patches to numpy arrays

In [None]:
start = '2019-01-01'
end = '2019-11-01'
s2_data = get_s2_sr_cld_col(bali_rect, start, end)
s2_sr_median = s2_data.filterBounds(bali_rect) \
                    .map(add_cld_shdw_mask) \
                    .map(apply_cld_shdw_mask) \
                    .median()

In [None]:
with open(os.path.join(DATA_DIR, 'tpa_polygons', 'tpa_polygons.json'), 'r') as f:
    json_tpa = json.load(f)
f.close()

In [None]:
tpa_polygons = [ee.FeatureCollection([element]) for element in list(json_tpa['features'])]
tpa_sites['polygons'] = tpa_polygons

In [None]:
clipped_tpa = get_tpa_patches(tpa_sites['name'], 
                          tpa_sites['polygons'], 
                          s2_sr_median)

In [None]:
for name in clipped_tpa:
    image = clipped_tpa[name]['B3']
    plt.imshow((image - np.min(image)) / (np.max(image) - np.min(image)), cmap='gray')
    plt.title(name)
    plt.show()

In [None]:
control_patches = get_patches(control_sites['name'], control_sites['coords'], RECT_WIDTH, s2_sr_median)

### Visualize patches

In [None]:
def plot_patches(image_dict, output_dir):
    for site in image_dict:
        plt.figure(figsize=(10,8), dpi=100, facecolor='white')
        start_date = start
        finish_date = end
        plt.suptitle(f"{site}: {start_date} - {finish_date}", y=0.95, size=16)
        for index, band in enumerate(band_descriptions):
            plt.subplot(4, 4, index + 1)
            plt.title(f"{band} - {band_descriptions[band]}")
            plt.imshow(image_dict[site][band], cmap='gray')
            plt.axis('off')

        plt.subplot(4, 4, 13)
        rgb = np.moveaxis([image_dict[site]['B4'], image_dict[site]['B3'], image_dict[site]['B2']], 0, -1)
        rgb /= np.max(rgb).astype('float')
        plt.title("RGB")
        plt.imshow(rgb)
        plt.axis('off')

        plt.subplot(4, 4, 14)
        ndvi = (np.array(image_dict[site]['B8']) - np.array(image_dict[site]['B4'])) / \
                (np.array(image_dict[site]['B8']) + np.array(image_dict[site]['B4']))
        plt.title("NDVI")
        plt.imshow(ndvi, cmap='gray')
        plt.axis('off')

        plt.subplot(4, 4, 15)
        ndwi = (np.array(image_dict[site]['B8']) - np.array(image_dict[site]['B11'])) / \
                (np.array(image_dict[site]['B8']) + np.array(image_dict[site]['B11']))
        plt.title("NDWI")
        plt.imshow(ndwi, cmap='gray')
        plt.axis('off')

        plt.subplot(4, 4, 16)
        composite = np.moveaxis([image_dict[site]['B11'], image_dict[site]['B8'], image_dict[site]['B4']], 0, -1)
        composite /= np.max(composite)
        plt.title("Vegetation (B11, B8, B4)")
        plt.imshow(composite)
        plt.axis('off')
        
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.savefig(os.path.join(output_dir, f"{site} Patches - Rect Width {RECT_WIDTH}.png"))
        plt.close()
        #plt.show()
        #print('\n\n\n')

In [None]:
# Plot Patches
output_dir = './figures/patches'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

plot_patches(clipped_tpa, output_dir)
#plot_patches(control_patches, output_dir)


## Exploration 2
### Compare patch reflectance between TPA and control sites

In [None]:
def compute_stats(image_dict):
    stats = {}
    for band in list(band_descriptions):
        mean_values = [np.mean(image_dict[location][band]) for location in image_dict]
        stats[band] = {
            'mean': np.mean(mean_values),
            'median': np.median(mean_values),
            'std': np.std(mean_values)
        } 
    return stats

In [None]:
tpa_stats = compute_stats(clipped_tpa)
control_stats = compute_stats(control_patches)

In [None]:
output_dir = './figures/reflectance'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

plt.figure(figsize=(14,4), dpi=100, facecolor=(1,1,1))
plt.errorbar(band_wavelengths, 
             [tpa_stats[band]['mean'] for band in tpa_stats],
             [tpa_stats[band]['std'] for band in tpa_stats],
             fmt='o-', capsize=3, label='TPA Mean')

plt.errorbar(band_wavelengths, 
             [control_stats[band]['mean'] for band in control_stats], 
             [control_stats[band]['std'] for band in control_stats], 
             fmt='o-', capsize=3, c='r', label='Bare Earth Mean')

#plt.plot([tpa_stats[band]['median'] for band in tpa_stats], label='TPA Median')
#plt.plot([control_stats[band]['median'] for band in control_stats], c='r', label='Control Median')
plt.xticks(band_wavelengths, list(tpa_stats), rotation=45, ha='right')
plt.grid()
plt.xlabel('Band Wavelength')
plt.ylabel('Mean Patch Reflectance')
plt.ylim([200, 3750])
plt.legend()
plt.title(f"TPA Sites vs. Bare Earth Sites - Mean Reflectance - {start} - {end}\nErrorbars: 1 SD")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f"TPA Sites vs. Bare Earth Sites - Mean Reflectance - {start} - {end}.png"))
plt.show()

fig = plt.subplots(len(tpa_patches), 2, figsize=(10,3 * len(tpa_patches)), dpi=100, facecolor=(1,1,1))
grid = gridspec.GridSpec(len(tpa_patches), 2, width_ratios=[5, 1])
for index, (tpa_site, control_site) in enumerate(zip(tpa_patches, control_patches)):
    tpa_mean = []
    tpa_std = []
    control_mean = []
    control_std = []
    for band in list(band_descriptions):
        tpa_mean.append(np.mean(tpa_patches[tpa_site][band]))
        tpa_std.append(np.std(tpa_patches[tpa_site][band]))
        control_mean.append(np.mean(control_patches[control_site][band]))
        control_std.append(np.std(control_patches[control_site][band]))
    ax0 = plt.subplot(grid[index, 0])
    ax0.errorbar(band_wavelengths, tpa_mean, tpa_std, fmt='o-', capsize=3, label='TPA')
    ax0.errorbar(band_wavelengths, control_mean, control_std, fmt='o-', c='r', capsize=3, label='Bare Earth')
    ax0.legend()
    ax0.set_xticks(band_wavelengths)
    ax0.set_xticklabels(list(tpa_stats), rotation=45, ha='right')
    ax0.set_title(f"Mean Value by Site\n{tpa_site} vs. {control_site}")
    ax0.grid()

    ax1 = plt.subplot(grid[index, 1])
    ax1.bar([0], [np.mean(tpa_mean[6:10]) - np.mean(tpa_mean[:4])])
    ax1.bar([1], [np.mean(control_mean[6:10]) - np.mean(control_mean[:4])], color='r')
    ax1.set_title("∆ of Bands 7-9 and Bands 1-4")
    ax1.set_xticks([0, 1])
    ax1.set_xticklabels(['TPA Site', 'Control Site'], rotation=45, ha='right')
plt.tight_layout()
plt.suptitle(f"Reflectance by Site - {start} - {end}", size=18, y=1.01)
plt.savefig(os.path.join(output_dir, f"Reflectance by Site - Bare Earth - {start} - {end}.png"))
plt.show()

## Exploration 3
### Temporal Monitoring

### Compile a dictionary of patches across a time period
History dictionary structure: `{dates: {sites: {bands: patch}`

Note: I can't access Sentinel data older than 2019 using earth engine. I'm not sure why.

In [None]:
tpa_history = {}
control_history = {}

start = '2019-01-01'
num_months = 22

date = ee.Date(start)
for month in tqdm(range(num_months)):
    s2_data = get_s2_sr_cld_col(bali_rect, date, date.advance(1, 'month'))
    s2_sr_median = s2_data.filterBounds(bali_rect) \
                        .map(add_cld_shdw_mask) \
                        .map(apply_cld_shdw_mask) \
                        .median() \
                        .clip(bali_rect)

    tpa_patches = get_patches(tpa_sites['name'], tpa_sites['coords'], RECT_WIDTH, s2_sr_median)
    control_patches = get_patches(control_sites['name'], control_sites['coords'], RECT_WIDTH, s2_sr_median)
    
    date_text = str(datetime.fromtimestamp(date.getInfo()['value'] // 1000 + 86400).date())
    tpa_history[date_text] = tpa_patches
    control_history[date_text] = control_patches
    
    date = date.advance(1, 'month')


### Visualize time series by site

In [None]:
output_dir = './figures/time_series'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

for tpa_site, control_site in zip(tpa_sites['name'], control_sites['name']):
    plt.figure(figsize=(12,12), dpi=100, facecolor=(1,1,1))
    for index, band in enumerate(band_descriptions):
        tpa_means = []
        control_means = []
        for month in tpa_history.keys():
            if len(tpa_history[month][tpa_site][band]) > 0:
                tpa_means.append(np.mean(tpa_history[month][tpa_site][band]))
            else: 
                tpa_means.append(None)
                
            if len(control_history[month][control_site][band]) > 0:
                control_means.append(np.mean(control_history[month][control_site][band]))
            else: 
                control_means.append(None)
            
        plt.subplot(4, 3, index + 1)
        plt.plot(list(tpa_history.keys()), tpa_means, '-o', label='TPA Site')
        plt.plot(list(control_history.keys()), control_means, '-o', c='r', label='Control Site')
        plt.xticks(rotation=45, ha='right')
        plt.ylim([0, 4500])
        plt.legend()
        plt.title(f'Band {band}')
    plt.tight_layout()
    plt.suptitle(tpa_site, y=1.02, size=16)
    plt.savefig(os.path.join(output_dir, f'{tpa_site} time series.png'), bbox_inches='tight')
    plt.close()

## Exploration 4
### Spectral Clustering

For each site, compile a vector of mean band reflectance values for each month in the record from Exploration #3. Reduce dimensionality of each point from the number of bands to 2 using PCA and plot the 2D points.

In [None]:
output_dir = './figures/clustering'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

all_tpa_vectors = []
all_control_vectors = []
for tpa_site, control_site in zip(tpa_sites['name'], control_sites['name']):
    tpa_site_vectors = []
    control_site_vectors = []
    for month in tpa_history.keys():
        tpa_means = []
        control_means = []
        for index, band in enumerate(band_descriptions):
            if len(tpa_history[month][tpa_site][band]) > 0:
                tpa_means.append(np.mean(tpa_history[month][tpa_site][band]))
            if len(control_history[month][control_site][band]) > 0:
                control_means.append(np.mean(control_history[month][control_site][band]))
                
        if len(tpa_means) > 0:
            tpa_site_vectors.append(tpa_means)
            all_tpa_vectors.append(tpa_means)
        if len(control_means) > 0:
            control_site_vectors.append(control_means)
            all_control_vectors.append(control_means)
    
    pca = PCA(n_components=2)
    pca.fit(tpa_site_vectors)
    tpa_pca = pca.transform(tpa_site_vectors)
    control_pca = pca.transform(control_site_vectors)
    plt.figure(figsize=(8,5), dpi=100)
    plt.scatter(tpa_pca[:,0], tpa_pca[:,1], label='TPA')
    plt.scatter(control_pca[:,0], control_pca[:,1], c='r', label='Control')
    plt.title(tpa_site)
    plt.legend()
    plt.savefig(os.path.join(output_dir, f"{tpa_site} reflectance scatter PCA.png"))
    plt.close()

pca = PCA(n_components=2)
pca.fit(all_tpa_vectors)
tpa_pca = pca.transform(all_tpa_vectors)
control_pca = pca.transform(all_control_vectors)

plt.figure(figsize=(8,5), dpi=100)
plt.scatter(tpa_pca[:,0], tpa_pca[:,1], label='TPA')
plt.scatter(control_pca[:,0], control_pca[:,1], c='r', label='Control')
plt.legend()
plt.title('All Sites')
plt.savefig(os.path.join(output_dir, f"All sites reflectance scatter PCA.png"))
plt.close()

In [None]:
import matplotlib.animation as animation

for tpa_site, control_site in zip(tpa_sites['name'], control_sites['name']):
    fig, ax = plt.subplots(dpi=100, facecolor=(1,1,1))
    ax.set_axis_off()
    ax.set_title(f"{tpa_site}\nLeft: TPA, Right: Control")
    fig.tight_layout()
    images = []
    for date in control_history:
        data_control = control_history[date][control_site]
        rgb_control = np.array([data_control['B4'], data_control['B3'], data_control['B2']])
        rgb_control = np.moveaxis(rgb_control, 0, -1)
        
        data_tpa = tpa_history[date][tpa_site]
        rgb_tpa = np.array([data_tpa['B4'], data_tpa['B3'], data_tpa['B2']])
        rgb_tpa = np.moveaxis(rgb_tpa, 0, -1)
        if len(rgb_tpa) > 0 and len(rgb_control) > 0:
            combined = np.concatenate((rgb_tpa, np.ones((len(rgb_tpa), 1,3)) * 3000, rgb_control), axis=1)
        
            im = plt.imshow(combined / 3000, animated=True)
            images.append([im])

    ani = animation.ArtistAnimation(fig, images, interval=120, blit=True, repeat_delay=500)
    ani.save(os.path.join('figures', 'videos', '22-months_both_' + tpa_site + '.gif'))

In [None]:
output_dir = './figures/clustering'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

tpa_pixel_vectors = []
control_pixel_vectors = []
for tpa_site, control_site in zip(tpa_sites['name'], control_sites['name']):
    tpa_site_vectors = []
    control_site_vectors = []
    for month in tpa_history.keys():
        if np.shape(tpa_history[month][tpa_site]['B2']) != (0,):
            width, height = np.shape(tpa_history[month][tpa_site]['B2'])
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(tpa_history[month][tpa_site][band][i][j])
                    tpa_site_vectors.append(pixel_vector)
                    tpa_pixel_vectors.append(pixel_vector)
    
    for month in control_history.keys():
        if np.shape(control_history[month][control_site]['B2']) != (0,):
            width, height = np.shape(control_history[month][control_site]['B2'])
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(control_history[month][control_site][band][i][j])
                    control_site_vectors.append(pixel_vector)
                    control_pixel_vectors.append(pixel_vector)
    
    pca = PCA(n_components=2)
    pca.fit(np.concatenate((tpa_site_vectors, control_site_vectors)))
    tpa_pca = pca.transform(tpa_site_vectors)
    control_pca = pca.transform(control_site_vectors)
    plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
    plt.scatter(tpa_pca[:,0], tpa_pca[:,1], s=0.5, alpha=0.5, label='TPA')
    plt.scatter(control_pca[:,0], control_pca[:,1], c='r', s=0.5, alpha=0.5, label='Control')
    plt.title(tpa_site)
    plt.axis('off')
    plt.legend()
    plt.savefig(os.path.join(output_dir, f"{tpa_site} per-pixel reflectance scatter PCA.png"))
    plt.show()

In [None]:
from sklearn.manifold import TSNE
for tpa_site, control_site in zip(tpa_sites['name'], control_sites['name']):
    tpa_site_vectors = []
    control_site_vectors = []
    for month in tpa_history.keys():
        if np.shape(tpa_history[month][tpa_site]['B2']) != (0,):
            width, height = np.shape(tpa_history[month][tpa_site]['B2'])
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(tpa_history[month][tpa_site][band][i][j])
                    tpa_site_vectors.append(pixel_vector)
    
    for month in control_history.keys():
        if np.shape(control_history[month][control_site]['B2']) != (0,):
            width, height = np.shape(control_history[month][control_site]['B2'])
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(control_history[month][control_site][band][i][j])
                    control_site_vectors.append(pixel_vector)
    
    tsne = TSNE(n_components=2)
    embedded = tsne.fit_transform(np.concatenate((tpa_site_vectors, control_site_vectors)))
    #tpa_pca = tsne.transform(tpa_site_vectors)
    #control_pca = tsne.transform(control_site_vectors)
    plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
    plt.scatter(embedded[:len(tpa_site_vectors),0], embedded[:len(tpa_site_vectors),1], s=1, alpha=0.5, label='TPA')
    plt.scatter(embedded[len(tpa_site_vectors):,0], embedded[len(tpa_site_vectors):,1], s=1, alpha=0.5, c='r', label='Control')
    plt.title(tpa_site)
    plt.legend()
    plt.axis('off')
    plt.savefig(os.path.join(output_dir, f"{tpa_site} per-pixel reflectance scatter tSNE.png"))
    plt.show()

In [None]:
from sklearn.model_selection import train_test_split
X = np.concatenate((control_pixel_vectors, tpa_pixel_vectors))
y = np.concatenate((np.zeros(len(control_pixel_vectors)), np.ones(len(tpa_pixel_vectors))))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn import tree

clf = tree.DecisionTreeClassifier(max_leaf_nodes=32, max_depth=8)
clf = clf.fit(X_train, y_train)
print("Accuracy:", clf.score(X_test, y_test))
print("Feature Importances:")
for band, importance in zip(band_descriptions, clf.feature_importances_):
    print(f"{band}: {importance:.3f}")

In [None]:
site = tpa_sites['name'][1]
data_source = tpa_history
month = list(data_source.keys())[4]
for index, month in enumerate(data_source.keys()):
    if np.shape(data_source[month][site]['B2']) != (0,):
        width, height = np.shape(data_source[month][site]['B2'])
        classification = np.zeros((width, height))
        data = data_source[month][site]
        rgb = np.array([data['B4'], data['B3'], data['B2']])
        rgb = np.moveaxis(rgb, 0, -1)
        for i in range(width):
            for j in range(height):
                pixel_vector = []
                for band in band_descriptions:
                    pixel_vector.append(data_source[month][site][band][i][j])
                classification[i,j] = clf.predict([pixel_vector])
        plt.figure(dpi=150, facecolor=(1,1,1))
        plt.subplot(1,2,1)
        plt.imshow(rgb / 3000)
        plt.title(site)
        plt.axis('off')
        plt.subplot(1,2,2)
        plt.imshow(classification, cmap='seismic')
        plt.axis('off')
        plt.title(month)

    else:
        print("No data found for", month)

In [None]:
from sklearn.model_selection import train_test_split
X = np.concatenate((all_control_vectors, all_tpa_vectors))
y = np.concatenate((np.zeros(len(all_control_vectors)), np.ones(len(all_tpa_vectors))))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

clf = tree.DecisionTreeClassifier(max_leaf_nodes=32, max_depth=8)
clf = clf.fit(X_train, y_train)
print("Accuracy:", clf.score(X_test, y_test))
print("Feature Importances:")
for band, importance in zip(band_descriptions, clf.feature_importances_):
    print(f"{band}: {importance:.3f}")

In [None]:
output_dir = './figures/tree_classification'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

data_source = tpa_history
plt.figure(dpi=100, facecolor=(1,1,1), figsize=(20,4))
for index, site in enumerate(tpa_sites['name']):
    classification_images = []
    rgb_images = []
    for month in data_source.keys():
        if np.shape(data_source[month][site]['B2']) != (0,):
            width, height = np.shape(data_source[month][site]['B2'])
            classification = np.zeros((width, height))
            data = data_source[month][site]
            rgb = np.array([data['B4'], data['B3'], data['B2']])
            rgb = np.moveaxis(rgb, 0, -1)
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(data_source[month][site][band][i][j])
                    classification[i,j] = clf.predict([pixel_vector])
            classification_images.append(classification)
            rgb_images.append(rgb)
    plt.subplot(2,10,2*index + 1)
    plt.imshow(np.median(rgb_images, axis=0) / 3000)
    plt.axis('off')
    plt.title(site, size=8)
    plt.subplot(2,10,2*index + 2)
    im = plt.imshow(np.mean(classification_images, axis=0), cmap='seismic', vmin=0, vmax=1)
    plt.axis('off')
    plt.title('Classification', size=8)
    plt.colorbar(im,fraction=0.045, pad=0.02, ticks=[0, 1])

plt.suptitle('TPA Patches')
plt.tight_layout()
#plt.savefig(os.path.join(output_dir, f"Control Site Decision Tree Classification.png"), bbox_inches='tight')
plt.show()

In [None]:
plt.subplot(1,2,1)
plt.imshow(np.median(rgb_images, axis=0) / 3000)
plt.imshow(np.mean(classification_images, axis=0), alpha=0.5)
plt.show()

In [None]:
output_dir = './figures/clustering'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

all_tpa_vectors = []
all_control_vectors = []
for tpa_site in tpa_sites['name']:
    tpa_site_vectors = []
    for month in tpa_history.keys():
        tpa_means = []
        for index, band in enumerate(band_descriptions):
            if len(tpa_history[month][tpa_site][band]) > 0:
                tpa_means.append(np.mean(tpa_history[month][tpa_site][band]))
        if len(tpa_means) > 0:
            tpa_site_vectors.append(tpa_means)
            all_tpa_vectors.append(tpa_means)
            
for control_site in control_sites['name']:
    control_site_vectors = []
    for month in control_history.keys():
        control_means = []
        for index, band in enumerate(band_descriptions):
            if len(control_history[month][control_site][band]) > 0:
                control_means.append(np.mean(control_history[month][control_site][band]))
        if len(control_means) > 0:
            control_site_vectors.append(control_means)
            all_control_vectors.append(control_means)


In [None]:
for band in range(np.shape(all_tpa_vectors)[1]):
    plt.hist(np.array(all_tpa_vectors)[:,band], 20, histtype='step', density=True, label='TPA')
    plt.hist(np.array(all_control_vectors)[:,band], 20, color='r', histtype='step', density=True, label='Bare Earth')
    plt.title(band)
    plt.legend()
    plt.show()

In [None]:
norm_tpa = (all_tpa_vectors - np.min(all_tpa_vectors)) / (np.max(all_tpa_vectors) - np.min(all_tpa_vectors))
norm_control = (all_control_vectors - np.min(all_control_vectors)) / (np.max(all_control_vectors) - np.min(all_control_vectors))

In [None]:
pca = PCA(n_components=2)
pca.fit(np.concatenate((norm_tpa, norm_control)))
tpa_pca = pca.transform(norm_tpa)
control_pca = pca.transform(norm_control)

plt.figure(figsize=(8,5), dpi=100)
plt.scatter(control_pca[:,0], control_pca[:,1], label='Bare Earth')
plt.scatter(tpa_pca[:,0], tpa_pca[:,1], c='r',label='TPA')
plt.legend()
plt.title('All Sites')
#plt.savefig(os.path.join(output_dir, f"All sites reflectance scatter PCA.png"))
plt.show()

In [None]:
import pickle

f = open(os.path.join(DATA_DIR, "city_history.pkl"),"wb")
pickle.dump(control_history, f)
f.close()

In [None]:
#create city reflectance history dictionary
history = {}
sites = tpa_sites
start = '2019-01-01'
num_months = 22

date = ee.Date(start)
for month in tqdm(range(num_months)):
    s2_data = get_s2_sr_cld_col(bali_rect, date, date.advance(1, 'month'))
    s2_sr_median = s2_data.filterBounds(bali_rect) \
                        .map(add_cld_shdw_mask) \
                        .map(apply_cld_shdw_mask) \
                        .median() \
                        .clip(bali_rect)
    
    patches = get_tpa_patches(sites['name'], sites['polygons'], s2_sr_median)
    date_text = str(datetime.fromtimestamp(date.getInfo()['value'] // 1000 + 86400).date())
    history[date_text] = patches
    
    date = date.advance(1, 'month')


In [None]:
import pickle
f = open(os.path.join(DATA_DIR, "tpa_history_clipped.pkl"),"wb")
pickle.dump(history, f)
f.close()

In [None]:
with open(os.path.join(DATA_DIR, "tpa_history_clipped.pkl"), 'rb') as file:
    tpa_history = pickle.load(file)
with open(os.path.join(DATA_DIR, "bare_earth_history.pkl"), 'rb') as file:
    bare_earth_history = pickle.load(file)
with open(os.path.join(DATA_DIR, "adjacent_history.pkl"), 'rb') as file:
    adjacent_history = pickle.load(file)
with open(os.path.join(DATA_DIR, "city_history.pkl"), 'rb') as file:
    city_history = pickle.load(file)

In [None]:
vectors = []
data_source = adjacent_history
for site in data_source[list(data_source.keys())[0]]:
    for month in data_source.keys():
        means = []
        for index, band in enumerate(band_descriptions):
            if len(data_source[month][site][band]) > 0:
                means.append(np.mean(data_source[month][site][band]))
        if len(means) > 0:
            vectors.append(means)
adjacent_vectors = vectors

In [None]:
vectors = []
data_source = city_history
for site in data_source[list(data_source.keys())[0]]:
    for month in data_source.keys():
        means = []
        for index, band in enumerate(band_descriptions):
            if len(data_source[month][site][band]) > 0:
                means.append(np.mean(data_source[month][site][band]))
        if len(means) > 0:
            vectors.append(means)
city_vectors = vectors

In [None]:
vectors = []
data_source = tpa_history
for site in data_source[list(data_source.keys())[0]]:
    for month in data_source.keys():
        means = []
        for index, band in enumerate(band_descriptions):
            if len(data_source[month][site][band]) > 0:
                means.append(np.mean(data_source[month][site][band]))
        if len(means) > 0:
            vectors.append(means)
tpa_vectors = vectors

In [None]:
vectors = []
data_source = bare_earth_history
for site in data_source[list(data_source.keys())[0]]:
    for month in data_source.keys():
        means = []
        for index, band in enumerate(band_descriptions):
            if len(data_source[month][site][band]) > 0:
                means.append(np.mean(data_source[month][site][band]))
        if len(means) > 0:
            vectors.append(means)
bare_earth_vectors = vectors

In [None]:
def get_pixel_vectors(data_source):
    pixel_vectors = []
    for site in data_source[list(data_source.keys())[0]]:
        for month in data_source.keys():
            if np.shape(data_source[month][site]['B2']) != (0,):
                width, height = np.shape(data_source[month][site]['B2'])
                for i in range(width):
                    for j in range(height):
                        pixel_vector = []
                        for band in band_descriptions:
                            pixel_vector.append(data_source[month][site][band][i][j])
                        pixel_vectors.append(pixel_vector)
    return pixel_vectors

In [None]:
bare_earth_pixel_vectors = get_pixel_vectors(bare_earth_history)
city_pixel_vectors = get_pixel_vectors(city_history)
adjacent_pixel_vectors = get_pixel_vectors(adjacent_history)
tpa_pixel_vectors = get_pixel_vectors(tpa_history)

In [None]:
# Filter null values set at -999
print(len(tpa_pixel_vectors))
tpa_pixel_vectors = [element for element in tpa_pixel_vectors if -999 not in element]
print(len(tpa_pixel_vectors))

In [None]:
output_dir = './figures/clustering'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

pca = PCA(n_components=2)
pca.fit(np.concatenate((tpa_vectors, city_vectors, bare_earth_vectors, adjacent_vectors)))
tpa_pca = pca.transform(tpa_vectors)
adjacent_pca = pca.transform(adjacent_vectors)
city_pca = pca.transform(city_vectors)
bare_earth_pca = pca.transform(bare_earth_vectors)

plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.scatter(bare_earth_pca[:,0], bare_earth_pca[:,1], s=12, alpha=0.65, label='Bare Earth')
#plt.scatter(city_pca[:,0], city_pca[:,1], s=12, c='gray', alpha=0.65, label='City')
#plt.scatter(adjacent_pca[:,0], adjacent_pca[:,1], s=12, c='green', alpha=0.65, label='Adjacent')
plt.scatter(tpa_pca[:,0], tpa_pca[:,1], s=12, alpha=0.65,  c='r', label='TPA')
plt.legend()
plt.xticks([])
plt.yticks([])
title = 'Multiclass Reflectance PCA'
plt.title(title)
#plt.savefig(os.path.join(output_dir, f"{title}.png"), bbox_inches='tight')
plt.show()

In [None]:
tsne = TSNE(n_components=2)
embedded = tsne.fit_transform(np.concatenate((all_tpa_vectors, all_control_vectors, bare_earth_vectors)))
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.scatter(embedded[:len(all_tpa_vectors),0], embedded[:len(all_tpa_vectors),1], s=10, alpha=0.5, label='TPA')
plt.scatter(embedded[len(all_tpa_vectors):len(bare_earth_vectors),0], 
            embedded[len(all_tpa_vectors):len(bare_earth_vectors),1], s=10, alpha=0.5, c='gray', label='City')
plt.scatter(embedded[len(all_tpa_vectors) + len(bare_earth_vectors):,0], 
            embedded[len(all_tpa_vectors) + len(bare_earth_vectors):,1], s=10, alpha=0.5, c='r', label='Bare Earth')
plt.title(tpa_site)
plt.legend()
plt.xticks([])
plt.yticks([])
title = 'City vs. Bare Earth vs. TPA - Reflectance scatter tSNE'
plt.title(title)
plt.savefig(os.path.join(output_dir, f"{title}.png"), bbox_inches='tight')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import tree
X = np.concatenate((city_vectors, 
                    bare_earth_vectors, 
                    adjacent_vectors, 
                    tpa_vectors))

y = np.concatenate((np.zeros(len(city_vectors)), 
                    np.ones(len(bare_earth_vectors)), 
                    np.zeros(len(adjacent_vectors)), 
                    np.ones(len(tpa_vectors))))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

clf = tree.DecisionTreeClassifier(max_leaf_nodes=16, max_depth=64, class_weight={0: 500, 1:1})
clf = clf.fit(X_train, y_train)
print("Accuracy:", clf.score(X_test, y_test))
print("Feature Importances:")
for band, importance in zip(band_descriptions, clf.feature_importances_):
    print(f"{band}: {importance:.3f}")

In [None]:
output_dir = './figures/tree_classification'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

data_source = adjacent_history
plt.figure(dpi=100, facecolor=(1,1,1), figsize=(20,4))
for index, site in enumerate(list(data_source[list(data_source.keys())[0]].keys())[:10]):
    classification_images = []
    rgb_images = []
    for month in data_source.keys():
        if np.shape(data_source[month][site]['B2']) != (0,):
            width, height = np.shape(data_source[month][site]['B2'])
            classification = np.zeros((width, height))
            data = data_source[month][site]
            rgb = np.array([data['B4'], data['B3'], data['B2']])
            rgb = np.moveaxis(rgb, 0, -1)
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(data_source[month][site][band][i][j])
                    classification[i,j] = clf.predict([pixel_vector])
            classification_images.append(classification)
            rgb_images.append(rgb)
    plt.subplot(2,10,2*index + 1)
    plt.imshow(np.median(rgb_images, axis=0) / 3000)
    plt.axis('off')
    plt.title(site, size=8)
    plt.subplot(2,10,2*index + 2)
    im = plt.imshow(np.mean(classification_images, axis=0), cmap='seismic', vmin=0, vmax=1)
    plt.axis('off')
    plt.title('Classification', size=8)
    plt.colorbar(im,fraction=0.045, pad=0.02, ticks=[0, 1])

plt.suptitle('TPA Patches')
plt.tight_layout()
#plt.savefig(os.path.join(output_dir, f"Control Site Decision Tree Classification.png"), bbox_inches='tight')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import tree
X = np.concatenate((city_pixel_vectors, 
                    bare_earth_pixel_vectors, 
                    adjacent_pixel_vectors, 
                    tpa_pixel_vectors))

y = np.concatenate((np.zeros(len(city_pixel_vectors)), 
                    np.zeros(len(bare_earth_pixel_vectors)) + 0, 
                    np.zeros(len(adjacent_pixel_vectors)), 
                    np.zeros(len(tpa_pixel_vectors)) + 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

clf = tree.DecisionTreeClassifier(max_leaf_nodes=256, max_depth=8, class_weight={0: 2, 1:1})
clf = clf.fit(X_train, y_train)
print("Accuracy:", clf.score(X_test, y_test))
print("Feature Importances:")
for band, importance in zip(band_descriptions, clf.feature_importances_):
    print(f"{band}: {importance:.3f}")

In [None]:
output_dir = './figures/tree_classification'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

data_source = city_history
plt.figure(dpi=100, facecolor=(1,1,1), figsize=(20,4))
for index, site in enumerate(list(data_source[list(data_source.keys())[0]].keys())[:10]):
    classification_images = []
    rgb_images = []
    for month in data_source.keys():
        if np.shape(data_source[month][site]['B2']) != (0,):
            width, height = np.shape(data_source[month][site]['B2'])
            classification = np.zeros((width, height))
            data = data_source[month][site]
            rgb = np.array([data['B4'], data['B3'], data['B2']])
            rgb = np.moveaxis(rgb, 0, -1)
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(data_source[month][site][band][i][j])
                    classification[i,j] = clf.predict([pixel_vector])
            classification_images.append(classification)
            rgb_images.append(rgb)
    plt.subplot(2,10,2*index + 1)
    plt.imshow(np.median(rgb_images, axis=0) / 3000)
    plt.axis('off')
    plt.title(site, size=8)
    plt.subplot(2,10,2*index + 2)
    im = plt.imshow(np.mean(classification_images, axis=0), cmap='seismic', vmin=0, vmax=1)
    plt.axis('off')
    plt.title('Classification', size=8)
    plt.colorbar(im,fraction=0.045, pad=0.02, ticks=[0, 1])

plt.suptitle('TPA Patches')
plt.tight_layout()
#plt.savefig(os.path.join(output_dir, f"Control Site Decision Tree Classification.png"), bbox_inches='tight')
plt.show()

In [None]:
output_dir = './figures/clustering'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

pca = PCA(n_components=2)
pca.fit(np.concatenate((tpa_pixel_vectors, city_pixel_vectors, bare_earth_pixel_vectors, adjacent_pixel_vectors)))
tpa_pca = pca.transform(tpa_pixel_vectors)
adjacent_pca = pca.transform(adjacent_pixel_vectors)
city_pca = pca.transform(city_pixel_vectors)
bare_earth_pca = pca.transform(bare_earth_pixel_vectors)

plt.figure(figsize=(8,5), dpi=200, facecolor=(1,1,1))
plt.scatter(bare_earth_pca[:,0], bare_earth_pca[:,1], s=.1, alpha=0.05, label='Bare Earth')
plt.scatter(city_pca[:,0], city_pca[:,1], s=0.1, c='gray', alpha=0.05, label='City')
plt.scatter(adjacent_pca[:,0], adjacent_pca[:,1], s=.1, c='green', alpha=0.05, label='Adjacent')
plt.scatter(tpa_pca[:,0], tpa_pca[:,1], s=.1, alpha=0.05,  c='r', label='TPA')
legend = plt.legend(markerscale=10)
for element in legend.legendHandles:
    element.set_alpha(1)
plt.xticks([])
plt.yticks([])
title = 'Per Pixel Multiclass Reflectance PCA'
plt.title(title)
plt.savefig(os.path.join(output_dir, f"{title}.png"), bbox_inches='tight')
plt.show()

In [None]:
output_dir = './figures/reflectance'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
vectors = [tpa_pixel_vectors, city_pixel_vectors, bare_earth_pixel_vectors, adjacent_pixel_vectors]
names = ['TPA', 'City', 'Bare Earth', 'Adjacent']
color = ['C0', 'gray', 'red', 'green']

plt.figure(figsize=(12, 5), facecolor=(1,1,1), dpi=150)
for index in range(len(vectors)):
    vector = vectors[index]
    band_means = []
    for band in range(np.shape(vector)[1]):
        band_means.append(np.mean(np.array(vector)[:,band]))
    plt.plot(band_wavelengths, band_means, color=color[index], label=names[index])
plt.xticks(band_wavelengths, list(band_descriptions), rotation=45, ha='right')
plt.grid()
plt.legend()
title = f"Mean Pixel Spectral Profile by Ground Cover Type - 2019-01-01 - 2020-11-01"
plt.title(title)
plt.savefig(os.path.join(output_dir, title + '.png'), bbox_inches='tight')
plt.show()

In [None]:
start = '2019-01-01'
end = '2019-11-01'
s2_data = get_s2_sr_cld_col(bali_rect, start, end)
s2_sr_median = s2_data.filterBounds(bali_rect) \
                    .map(add_cld_shdw_mask) \
                    .map(apply_cld_shdw_mask) \
                    .median()
test_patch = get_patches([control_sites['name'].iloc[5]], [control_sites['coords'].iloc[5]], 0.02, s2_sr_median)

In [None]:
#create city reflectance history dictionary
history = {}
sites = bare_earth_sites.iloc[4:5]
start = '2019-01-01'
num_months = 22

date = ee.Date(start)
for month in tqdm(range(num_months)):
    s2_data = get_s2_sr_cld_col(bali_rect, date, date.advance(1, 'month'))
    s2_sr_median = s2_data.filterBounds(bali_rect) \
                        .map(add_cld_shdw_mask) \
                        .map(apply_cld_shdw_mask) \
                        .median() \
                        .clip(bali_rect)
    
    patches = get_patches(sites['name'], sites['coords'], 0.03, s2_sr_median)
    date_text = str(datetime.fromtimestamp(date.getInfo()['value'] // 1000 + 86400).date())
    history[date_text] = patches
    
    date = date.advance(1, 'month')
bare_earth_site = history

In [None]:
output_dir = './figures/tree_classification'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

data_source = tpa_test_site
plt.figure(dpi=100, facecolor=(1,1,1), figsize=(20,4))
for index, site in enumerate(list(data_source[list(data_source.keys())[0]].keys())[:10]):
    classification_images = []
    rgb_images = []
    for month in tqdm(data_source.keys()):
        if np.shape(data_source[month][site]['B2']) != (0,):
            width, height = np.shape(data_source[month][site]['B2'])
            classification = np.zeros((width, height))
            data = data_source[month][site]
            rgb = np.array([data['B4'], data['B3'], data['B2']])
            rgb = np.moveaxis(rgb, 0, -1)
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(data_source[month][site][band][i][j])
                    classification[i,j] = clf.predict([pixel_vector])
            classification_images.append(classification)
            rgb_images.append(rgb)

In [None]:
plt.figure(dpi=200, facecolor=(1,1,1), figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(np.median(rgb_images, axis=0) / 3000)
plt.axis('off')
plt.title(site, size=8)
plt.subplot(1,2,2)
im = plt.imshow(np.mean(classification_images, axis=0), cmap='seismic', vmin=0, vmax=1)
plt.axis('off')
plt.title('Classification', size=8)
plt.colorbar(im,fraction=0.045, pad=0.02, ticks=[0, 1])
title = f"Bare Earth Site Decision Tree Classification - 0.03 Patch"
plt.suptitle(title)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, title + '.png'), bbox_inches='tight')
plt.show()

In [None]:
#create city reflectance history dictionary
history = {}
sites = city_sites.iloc[7:8]
start = '2019-01-01'
num_months = 22

date = ee.Date(start)
for month in tqdm(range(num_months)):
    s2_data = get_s2_sr_cld_col(bali_rect, date, date.advance(1, 'month'))
    s2_sr_median = s2_data.filterBounds(bali_rect) \
                        .map(add_cld_shdw_mask) \
                        .map(apply_cld_shdw_mask) \
                        .median() \
                        .clip(bali_rect)
    
    patches = get_patches(sites['name'], sites['coords'], 0.03, s2_sr_median)
    date_text = str(datetime.fromtimestamp(date.getInfo()['value'] // 1000 + 86400).date())
    history[date_text] = patches
    
    date = date.advance(1, 'month')
tpa_test_site = history


In [None]:
#create city reflectance history dictionary
history = {}
sites = bare_earth_sites.iloc[4:5]
start = '2019-01-01'
num_months = 22

date = ee.Date(start)
for month in tqdm(range(num_months)):
    s2_data = get_s2_sr_cld_col(bali_rect, date, date.advance(1, 'month'))
    s2_sr_median = s2_data.map(add_cld_shdw_mask) \
                        .map(apply_cld_shdw_mask) \
                        .median()
    
    patches = get_patches(['TPA Jimbaran'], [[115.1658103, -8.7944717]], 0.03, s2_sr_median)
    date_text = str(datetime.fromtimestamp(date.getInfo()['value'] // 1000 + 86400).date())
    history[date_text] = patches
    
    date = date.advance(1, 'month')
tpa_site = history

In [None]:
f = open(os.path.join(DATA_DIR, "tpa_jimbaran_0.03_patch.pkl"),"wb")
pickle.dump(tpa_site, f)
f.close()

In [None]:

output_dir = './figures/tree_classification'
if not os.path.exists(output_dir):
        os.mkdir(output_dir)

data_source = tpa_test_site
plt.figure(dpi=100, facecolor=(1,1,1), figsize=(20,4))
for index, site in enumerate(list(data_source[list(data_source.keys())[0]].keys())[:10]):
    classification_images = []
    rgb_images = []
    for month in tqdm(data_source.keys()):
        if np.shape(data_source[month][site]['B2']) != (0,):
            width, height = np.shape(data_source[month][site]['B2'])
            classification = np.zeros((width, height))
            data = data_source[month][site]
            rgb = np.array([data['B4'], data['B3'], data['B2']])
            rgb = np.moveaxis(rgb, 0, -1)
            for i in range(width):
                for j in range(height):
                    pixel_vector = []
                    for band in band_descriptions:
                        pixel_vector.append(data_source[month][site][band][i][j])
                    classification[i,j] = clf.predict([pixel_vector])
            classification_images.append(classification)
            rgb_images.append(rgb)
            
plt.figure(dpi=200, facecolor=(1,1,1), figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(np.median(rgb_images, axis=0) / 3000)
plt.axis('off')
plt.title(site, size=8)
plt.subplot(1,2,2)
im = plt.imshow(np.mean(classification_images, axis=0), cmap='seismic', vmin=0, vmax=1)
plt.axis('off')
plt.title('Classification', size=8)
plt.colorbar(im,fraction=0.045, pad=0.02, ticks=[0, 1])
title = f"TPA Site Decision Tree Classification - 0.03 Patch"
plt.suptitle(title)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, title + '.png'), bbox_inches='tight')
plt.show()