In [None]:
import hvplot.xarray  # noqa: F401
import numpy as np
from fsspec.implementations.http import HTTPFileSystem
from dea_tools.spatial import xr_vectorize
import geohash
import folium
import odc.geo.xr
import geopandas as gpd
from dea_tools.spatial import xr_rasterize
import holoviews as hv
from holoviews import opts
import xarray as xr
import json
import shapely

from emit_tools import emit_xarray
from utils import get_rgb_dataset, get_earthdata_token

In [None]:
# See README.md for instructions on how to get an Earthdata token
token = get_earthdata_token()

In [None]:
%%time
# Loading data can take around 3-4 minutes on a 100 Mbps connection

# Refer to the README.md for instructions on how to find granule IDs
granule = "EMIT_L2A_RFL_001_20230316T045211_2307503_006" # Canberra

s3_url = "s3://lp-prod-protected/EMITL2ARFL.001/" + granule + "/" + granule + ".nc"
http_url = s3_url.replace("s3://", "https://data.lpdaac.earthdatacloud.nasa.gov/")

fs = HTTPFileSystem(headers={
    "Authorization": f"bearer {token}"
})
ds = emit_xarray(fs.open(http_url))
ds

In [None]:
# Clean up empty bands.
ds = ds.fillna(np.nan).where(ds.reflectance!=-0.01)

In [None]:
# Create a water layer
high = ds.reflectance.sel(bands=450, method="nearest")
low = ds.reflectance.sel(bands=1275, method="nearest")

water = ((high - low) / (high + low)) > 0
ds["water"] = water.fillna(float("nan")).where(water)

In [None]:
ds.water.hvplot(aspect="equal")

In [None]:
MIN_AREA = 1  # Hectares

def add_geohash(row):
    return geohash.encode(row.geometry.centroid.y, row.geometry.centroid.x, precision=9)

    
# Create polygons from the water layer
water_polygons = xr_vectorize(ds.water, crs="epsg:4326", mask=ds.water.values==1)
water_polygons["area"] = water_polygons.to_crs("epsg:3577").area / 10000

# Drop geopandas rows where the area is less than MIN_AREA
water_polygons = water_polygons.drop(water_polygons[water_polygons['area'] < MIN_AREA].index)

# Compute a geohash for each polygon at level 9
geohashes = []
for _, row in water_polygons.iterrows():
    geohashes.append(add_geohash(row))

water_polygons["geohash"] = geohashes

# # Add an ID row
water_polygons['id'] = range(1, water_polygons.shape[0] + 1)

# # Show us what we've got
print(f"Found {water_polygons.shape[0]} water polygons that are larger than {MIN_AREA} hectare(s)")

In [None]:
# View the water layer on an interactive map
m = folium.Map(control_scale=True, tiles=None)

for _, row in water_polygons.iterrows():
    geojson = folium.GeoJson(
        data=json.dumps(shapely.geometry.mapping(row.geometry)),
        style_function=lambda x: {"fillColor": "blue", "Color": "blue"},
        tooltip=f"{row.geohash}"
    )
    folium.Popup(f"<p><strong>geohash:</strong> {row.geohash}<br><strong>area:</strong> {row['area'] / 10000:.3f} Ha</p>").add_to(
        geojson
    )
    geojson.add_to(m)

# Zoom map
m.fit_bounds(ds.odc.map_bounds())

tile = folium.TileLayer(
    tiles="https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
    attr="Esri",
    name="Esri Satellite",
    control=True,
).add_to(m)

folium.LayerControl().add_to(m)
display(m)

In [None]:
# Rasterise the polygons again, so we can join on the geohash later
water_raster = xr_rasterize(water_polygons, ds, attribute_col="id", crs="epsg:4326")

# Join the rasterised polygons to the dataset
ds["id"] = xr.DataArray(water_raster, dims=("latitude", "longitude"))

# Create another empty array of strings
ds["geohash"] = xr.DataArray(
      np.full((ds.latitude.size, ds.longitude.size), "", dtype="U9"),
      dims=("latitude", "longitude"),
)

for _, row in water_polygons.iterrows():
   # I think 'where' works the opposite of what you'd expect
   ds["geohash"] = ds.geohash.where(ds.id != row.id, row.geohash)

# Mask the empty values
ds["geohash"] = ds.geohash.where(ds.geohash != "", drop=False)

ds.geohash

In [None]:
%%capture --no-stdout

means = ds.groupby("geohash").mean()
std_dev = ds.groupby("geohash").std()
min = ds.groupby("geohash").min()
max = ds.groupby("geohash").max()

# Create a new dataset with the mean, standard deviation, min and max values
# for each geohash
water_summaries = xr.Dataset(
    {
        "mean": means.reflectance,
        "std_dev": std_dev.reflectance,
        "min": min.reflectance,
        "max": max.reflectance,
    }
)

In [None]:
# Violin plots
hv.Violin(water_summaries, "geohash", ["mean"], invert=True).opts(opts.Violin(width=800))

In [None]:
# Mean and min-max plots
color_cycle = hv.Cycle("Category20")

plots = []
for geohash in water_summaries.geohash.values:
    row = water_summaries.sel(geohash=geohash)

    plots.append(
        (
            hv.Spread(
                row,
                vdims=["min", "max"],
                label=f"{geohash}"
            )
            * hv.Curve(
                row,
                vdims="mean",
                label=f"{geohash}"
            )
        )
    )

hv.Overlay(plots).opts(
    opts.Spread(color=color_cycle),
    opts.Curve(color=color_cycle),
    opts.Overlay(
        show_title=False, frame_width=600, show_legend=True, legend_position="bottom"
    ),
)

In [None]:
options = {
    "width": 600,
    "height": 200,
    "show_legend": False,
    "color": color_cycle,
}

hv.Layout(plots).cols(1).opts(
    opts.Curve(**options),
    opts.Spread(**options),
)