## Compare Training Site Spectral Signatures between SR and TOA Data

In [None]:
# Import GEE & initialize
import os
import ee
# import geemap
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt

# Initialize GEE
try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()

### Import Training Data

In [None]:
# Source Assets
assets_locn = "projects/ee-abnatcap/assets/sargassum"
# SR samples
sr_samples = "samples_S2sr_20190507_allbands"
samples_sr_ee = ee.FeatureCollection(os.path.join(assets_locn, sr_samples))
print("Column names: ", samples_sr_ee.first().propertyNames().getInfo())
print('Count: ', samples_sr_ee.size().getInfo())
# TOA samples
toa_samples = "samples_S2toa_20190507_allbands"
samples_toa_ee = ee.FeatureCollection(os.path.join(assets_locn, toa_samples))
print("Column names: ", samples_toa_ee.first().propertyNames().getInfo())
print('Count: ', samples_toa_ee.size().getInfo())

In [None]:
## Export Training Samples to Google Drive  -- One Time only
google_drive_local = '/Users/arbailey/Google Drive/My Drive'
output_folder = 'geeout'

# Only needed to do this once or when the training sites change
# # SR Ouptut
# task = ee.batch.Export.table.toDrive(collection=samples_sr_ee,
#                                          description=sr_samples,
#                                          folder=output_folder,
#                                          fileFormat='GeoJSON')
# task.start()
# # TOA Ouptut
# task = ee.batch.Export.table.toDrive(collection=samples_toa_ee,
#                                          description=toa_samples,
#                                          folder=output_folder,
#                                          fileFormat='GeoJSON')
# task.start()

In [None]:
# Import Samples into GeoDataFrame
samples_sr_geojson = os.path.join(google_drive_local,output_folder,sr_samples + ".geojson")
samples_toa_geojson = os.path.join(google_drive_local,output_folder,toa_samples  + ".geojson")

samples_sr_gdf = gpd.read_file(samples_sr_geojson)
samples_toa_gdf = gpd.read_file(samples_toa_geojson)

print(samples_sr_gdf.crs)
print(samples_sr_gdf.columns)
print(samples_sr_gdf.head)
print(samples_toa_gdf.head)

### Reshape and Plot Signatures

In [None]:
# Reshape data frame to a normalized format for bands/indices plotting

model_bands = ['B2','B5','B8','B8A','B12','FAI','SEI', 'NDVI','NDVI_median','NDVI_min','NDVI_dmed']

def normalize_gdf(samples_gdf): 
    normalized_gdf = samples_gdf.melt(id_vars=['lc_code'], value_vars=model_bands,
        var_name='band_index', value_name='value')
    conditions = [
        (normalized_gdf['lc_code'] == 0),
        (normalized_gdf['lc_code'] == 1),
        (normalized_gdf['lc_code'] == 2),
        (normalized_gdf['lc_code'] == 3),
        (normalized_gdf['lc_code'] == 4),
        ]
    lc_names = ['sargassum', 'other veg', 'beach', 'buildings', 'clouds']
    normalized_gdf['lc'] = np.select(conditions, lc_names)
    return normalized_gdf

samples_sr_norm_gdf = normalize_gdf(samples_sr_gdf)
samples_sr_norm_gdf

samples_toa_norm_gdf = normalize_gdf(samples_toa_gdf)
samples_toa_norm_gdf

#### Grouped Boxplot for All Land Cover -- SR & TOA

In [None]:
lc_palette = {"sargassum": "gold", "other veg": "limegreen", "beach":"beige", 'buildings':'brown', 'clouds':'lightgrey'}

def group_boxplot(samples_norm_gdf):
    """ Creates a grouped box plot for a all landcover codes"""
    sns.set_style("whitegrid")
    sns.set_context("notebook", font_scale=2, rc={"lines.linewidth": 1})
    f, ax = plt.subplots(figsize=(24, 12))
    plt.ylim(-1, 1)
    sns.boxplot(x='band_index', y='value', hue='lc', data=samples_norm_gdf, palette=lc_palette)    

group_boxplot(samples_sr_norm_gdf)
group_boxplot(samples_toa_norm_gdf)

#### Box Plots for each Land Cover Individually -- SR & TOA

In [None]:
def single_boxplot(samples_norm_gdf, lc_code):
    """ Creates a box plot for a single landcover code"""
    sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 1})
    f, ax = plt.subplots(figsize=(10, 10))
    plt.ylim(-1, 1)
    sns.boxplot(x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == lc_code], palette=lc_palette)

In [None]:
# Single BoxPlots - SR
single_boxplot(samples_sr_norm_gdf, 0)
single_boxplot(samples_sr_norm_gdf, 1)
single_boxplot(samples_sr_norm_gdf, 2)
single_boxplot(samples_sr_norm_gdf, 3)
single_boxplot(samples_sr_norm_gdf, 4)


In [None]:
# Single BoxPlots - TOA
single_boxplot(samples_toa_norm_gdf, 0)
single_boxplot(samples_toa_norm_gdf, 1)
single_boxplot(samples_toa_norm_gdf, 2)
single_boxplot(samples_toa_norm_gdf, 3)
single_boxplot(samples_toa_norm_gdf, 4)

In [None]:
def multi_boxplot(samples_norm_gdf):
    """ Creates a set of box plots for all land cover codes"""
    sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 1})
    f, axes = plt.subplots(2, 3,figsize=(24, 12), sharey=True)
    plt.ylim(-1, 1)
    sns.boxplot(ax=axes[0, 0], x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == 0], palette=lc_palette)
    sns.boxplot(ax=axes[0, 1], x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == 1], palette=lc_palette)
    sns.boxplot(ax=axes[0, 2], x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == 4], palette=lc_palette)
    sns.boxplot(ax=axes[1, 1], x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == 2], palette=lc_palette)
    sns.boxplot(ax=axes[1, 2], x='band_index', y='value', hue='lc', data=samples_norm_gdf[samples_norm_gdf["lc_code"] == 3], palette=lc_palette)
    f.delaxes(ax=axes[1,0])

In [None]:
multi_boxplot(samples_sr_norm_gdf)


In [None]:
multi_boxplot(samples_toa_norm_gdf)

In [None]:
def dual_boxplot(sr_norm_gdf,toa_norm_gdf, lc_code):
    """ Creates a set of box plots for all land cover codes"""
    sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 1})
    f, axes = plt.subplots(1, 2,figsize=(24, 12), sharey=True)
    plt.ylim(-1, 1)
    sns.boxplot(ax=axes[0], x='band_index', y='value', hue='lc', data=sr_norm_gdf[sr_norm_gdf["lc_code"] == lc_code], palette=lc_palette)
    axes[0].set_title(label="SR")
    sns.boxplot(ax=axes[1], x='band_index', y='value', hue='lc', data=toa_norm_gdf[toa_norm_gdf["lc_code"] == lc_code], palette=lc_palette)
    axes[1].set_title(label="TOA")

In [None]:
dual_boxplot(samples_sr_norm_gdf, samples_toa_norm_gdf, 0)
dual_boxplot(samples_sr_norm_gdf, samples_toa_norm_gdf, 1)
dual_boxplot(samples_sr_norm_gdf, samples_toa_norm_gdf, 4)
dual_boxplot(samples_sr_norm_gdf, samples_toa_norm_gdf, 3)
dual_boxplot(samples_sr_norm_gdf, samples_toa_norm_gdf, 2)
