## Imports

In [None]:
import json

from odc.stac import configure_rio, load
from pystac_client import Client
import geopandas as gpd
import numpy as np

from utils import hls_config

### Set up Dask client

In [None]:
import dask.distributed
from IPython.display import display

client = dask.distributed.Client()
display(client)

## Connect to EarthData STAC

In [None]:
catalog = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/"

# Searching across both landsat and sentinel at 30 m
collections = ["HLSS30.v2.0", "HLSL30.v2.0"]

client = Client.open(catalog)

## Set up query

### Get bounding box from area of interest geojson

In [None]:
# Get bounding box from AOI
aoi = gpd.read_file("aoi.geojson")

aoi_geom = aoi.iloc[0].geometry

### Set start and end date

In [None]:
start_date = "2022-06-01"
end_date = "2023-06-01"
date_range = f"{start_date}/{end_date}"

## Run query to identify STAC items

In [None]:
# Check search for geopolygon arg instead of bbox if possible
# Search for items in the collection
items = list(
    client.search(
        collections=collections, intersects=aoi_geom, datetime=date_range
    ).items()
)

print(f"Found {len(items)} items")

## Load using odc-stac

### Get EarthData token from secrets.json and authenticate

In [None]:
with open("secrets.json") as f:
    data = json.load(f)
    token = data["earthdata"]["token"]

In [None]:
# Configure GDAL. You need to export your earthdata token as an environment variable.
header_string = f"Authorization: Bearer {token}"
configure_rio(cloud_defaults=True, GDAL_HTTP_HEADERS=header_string)

### Run the load by passing in the items list

In [None]:
data = load(
    items,
    resolution=30,
    crs="EPSG:5530",
    chunks={},
    groupby="solar_day",
    stac_cfg=hls_config,
    bands=["red", "green", "blue", "nir", "fmask"],
    geopolygon=aoi_geom,
)
data

## View the data

In [None]:
# Define a simple plotting function to reuse
def plot_rgb(data):
    data[["red", "green", "blue"]].isel(time=slice(4, 7)).to_array().plot.imshow(
        col="time", col_wrap=3, vmin=0, vmax=3000
    )


plot_rgb(data)

## Apply masking

In [None]:
# Write a function to reuse


def apply_cloud_mask(data):
    mask_str = "00001110"
    mask_int = int(mask_str, base=2)
    nodata = data.fmask == data.fmask.odc.nodata
    mask = (data.fmask & mask_int != 0) | nodata

    masked = data.astype(float).where(~mask, other=np.nan)
    masked.drop_vars(["fmask"])

    return masked


masked = apply_cloud_mask(data)

In [None]:
# Show the masked data
plot_rgb(masked)

## Calculate a vegetation index

In [None]:
def scale_offset(band):
    band = band / 10000
    band = band.where(band > 0, other=np.nan)

    return band


def calculate_evi2(data):
    nir = scale_offset(data.nir)
    red = scale_offset(data.red)

    evi2 = 2.5 * (nir - red) / (nir + 2.4 * red + 1)

    return evi2


masked["evi2"] = calculate_evi2(masked)

In [None]:
def plot_evi2(data):
    data.evi2.isel(time=slice(4, 7)).plot.imshow(col="time", col_wrap=3)


plot_evi2(masked)

# Identify the period of max vegetation index

## Start by computing the monthly median to infill areas that have been affected by cloud

In [None]:
# Create a monthly median now we have masked data
median = masked.resample(time="1MS").median()

# Get Dask to run processing
median = median.compute()

In [None]:
# Show the median data
median

In [None]:
median.evi2.plot.imshow(col="time", col_wrap=4)

In [None]:
mean_median_evi = median["evi2"].mean(["x", "y"])

mean_median_evi.plot()

## Get historical imagery for a given period

### Set up query and get data

In [None]:
year_range = range(2010, 2024)

peak_vegetation_items = []

for year in year_range:
    start_date = f"{year}-12-01"
    end_date = f"{year+1}-02-28"
    date_range = f"{start_date}/{end_date}"

    # Search for items in the collection
    items = list(
        client.search(
            collections=collections, intersects=aoi_geom, datetime=date_range
        ).items()
    )

    peak_vegetation_items.extend(items)

In [None]:
trend_data = load(
    peak_vegetation_items,
    resolution=30,
    crs="EPSG:5530",
    chunks={},
    groupby="solar_day",
    stac_cfg=hls_config,
    bands=["red", "green", "blue", "nir", "fmask"],
    geopolygon=aoi,
)
trend_data

## Calculate evi and mask data

In [None]:
trend_data["evi2"] = calculate_evi2(trend_data)

trend_data_masked = apply_cloud_mask(trend_data)

## Resample to three monthly, starting in December of each year

In [None]:
# Resample data to quarters, from beginning of December
quarters = trend_data_masked.resample(time="QS-DEC").mean()

# Only keep quaryers where the month start is December
DJF_average = quarters.sel(time=quarters.time.dt.month == 12)

# Compute
DJF_average = DJF_average.compute()

In [None]:
DJF_average["evi2"].plot.imshow(col="time", col_wrap=4)

## Get as a time series by taking the average EVI value for each year

In [None]:
evi_over_time = DJF_average["evi2"].mean(["x", "y"])

In [None]:
evi_over_time.plot.scatter()