In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json

import odc.geo  # noqa: F401
from odc.stac import configure_rio, load
from pystac_client import Client
from numpy import sqrt

from utils import hls_config

In [None]:
catalog = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/"

# Searching across both landsat and sentinel at 30 m
collections = ["HLSS30.v2.0", "HLSL30.v2.0"]

client = Client.open(catalog)

In [None]:
# BBOX over Precipitous Bluff in Tasmania
ll = (-43.55, 146.45)
ur = (-43.35, 146.75)
bbox = [ll[1], ll[0], ur[1], ur[0]]

# Search for items in the collection
items = list(
    client.search(collections=collections, bbox=bbox, datetime="2023-01-01/2023-12-31").items()
)

print(f"Found {len(items)} items")

In [None]:
with open("secrets.json") as f:
    data = json.load(f)
    token = data["earthdata"]["token"]

In [None]:
# Configure GDAL. You need to export your earthdata token as an environment variable.
header_string = f"Authorization: Bearer {token}"
configure_rio(cloud_defaults=True, GDAL_HTTP_HEADERS=header_string)

data = load(
    items,
    bbox=bbox,
    crs="epsg:6933",
    resolution=30,
    chunks={"x": 2500, "y": 2500, "time": 1},
    groupby="solar_day",
    stac_cfg=hls_config,
    bands=["red", "green", "blue", "nir", "fmask"]
)
data

In [None]:
for var in ["red", "green", "blue", "nir"]:
    data[var] = data[var] / 10000
    data[var] = data[var].where(data[var] > 0, 0)
  
data["ndvi"] = (data.nir - data.red) / (data.nir + data.red)

data["evi2"] = 2.5 * (data.nir - data.red) / (data.nir + 2.4 * data.red + 1)

# (2 * NIR + 1 – sqrt ((2 * NIR + 1)2 – 8 * (NIR - R))) / 2
data["msavi"] = (
    2 * data.nir + 1 - 
    sqrt((2 * data.nir + 1) ** 2 - (8 * (data.nir - data.red)))
    ) / 2

In [None]:
# flags_definition = {
#     {"cloud": {"bits": 1, "values": {0: "no", 1: "yes"}}},
#     {"cloud_or_shadow_adjacent": {"bits": 2, "values": {0: "no", 1: "yes"}}},
#     {"cloud_shadow": {"bits": 3, "values": {0: "no", 1: "yes"}}},
#     {"snow_ice": {"bits": 4, "values": {0: "no", 1: "yes"}}}
#     {"water": {"bits": 5, "values": {0: "no", 1: "yes"}}}
# }

# Want to mask cloud and cloud shadow
mask_str = "00001010"
mask_int = int(mask_str, base=2)

In [None]:
# Show the raw data
data[["red", "green", "blue"]].isel(time=slice(0, 12)).to_array().plot.imshow(
    col="time", col_wrap=4, vmin=0, vmax=3000
)

In [None]:
mask_str = "00001110"
mask_int = int(mask_str, base=2)
nodata = data.fmask == data.fmask.odc.nodata
mask = (data.fmask & mask_int != 0) | nodata

masked = data.where(~mask)

In [None]:
masked.isel(time=2).ndvi.plot.imshow(vmin=-1, vmax=1)

In [None]:
# Plot the masked data
masked[["red", "green", "blue"]].isel(time=slice(0, 12)).to_array().plot.imshow(
    col="time", col_wrap=4, vmin=0, vmax=3000
)

In [None]:
subset = masked[["ndvi", "evi2", "msavi"]].isel(x=slice(0, 100), y=slice(0, 100)).compute()
subset

In [None]:
mean = subset.resample(time="2W").mean()
mean

In [None]:
# Plot the mean of NDVI, EVI2, MSAVI over time
mean[["ndvi", "evi2", "msavi"]].mean(dim=["x", "y"]).to_array().plot.line(x="time", size=10)

In [None]:
# Create a simple cloud-free median now we have masked data
median = masked.median("time").compute()

In [None]:
# Plot the median. This is just one month, so we expect
# some areas to be missing due to clouds
median.odc.explore(vmin=0, vmax=1000)