# Compute Zonal Means

In [None]:
import typing

import geopandas as gpd
import numpy as np
import pandas as pd
import regionmask

# rioxarray is not directly referenced, but its `rio` extension of `xarray` is
import rioxarray
import xarray as xr
import xrspatial.zonal

Read our distinct geometries for our outbreak regions (previously computed):


In [None]:
geolocation_gdf = typing.cast(gpd.GeoDataFrame, gpd.read_file("geolocations.geojson"))
geolocation_gdf

Fetch the URL for the Land Surface Temperature (LST) data for a specific date
from the
[Centre for Environmental Data Analysis Archive](https://archive.ceda.ac.uk/)
(CEDA Archive).


In [None]:
def global_lst_url(*, year: int, month: int) -> str:
    """Return the URL for the global average Land Surface Temperature data file
    for a specific year and month."""

    # Can check status of CEDA core archives at https://stats.uptimerobot.com/vZPgQt7YnO
    # Currently `dap` is down.

    return (
        "https://dap.ceda.ac.uk/neodc/esacci/land_surface_temperature/data/"
        f"MULTISENSOR_IRCDR/L3S/0.01/v2.00/monthly/{year}/{month:02d}/"
        f"ESACCI-LST-L3S-LST-IRCDR_-0.01deg_1MONTHLY_DAY-{year}{month:02d}01000000-fv2.00.nc"
        # Must add `#mode=bytes` to the end.
        # See https://github.com/Unidata/netcdf4-python/issues/1043
        "#mode=bytes"
    )

Read land surface temperatures for a specific date using `xarray` and select
only values within the bounding box for all of our outbreak regions:


In [None]:
minx, miny, maxx, maxy = geolocation_gdf.total_bounds

with xr.open_dataset(global_lst_url(year=2020, month=11)) as ds:
    africa_ds = typing.cast(
        xr.Dataset,
        ds.squeeze(drop=True)
        .rio.write_crs("EPSG:4326")
        .sel(lon=slice(minx, maxx), lat=slice(miny, maxy)),
    )

display(africa_ds.rio.crs)
display(africa_ds)

Extract the `lst` data variable, convert from Kelvin to Celsius, and plot:


In [None]:
africa_lst_celsius_da = typing.cast(xr.DataArray, africa_ds.lst - 273.15)
# africa_lst_celsius_da.plot(cmap="coolwarm")
africa_lst_celsius_da

Compute zones so we can compute zonal statistics:

In [None]:
zones_da = typing.cast(
    xr.DataArray,
    regionmask.mask_geopandas(
        geolocation_gdf,
        africa_ds.lon,
        africa_ds.lat,
    ),
)

zones_da

Count the number of non-`nan` zones:

In [None]:
len(np.unique(zones_da.values[~np.isnan(zones_da.values)]))

Compute zonal means for LST:


In [None]:
mean_lst_df: pd.DataFrame = (
    xrspatial.zonal.stats(zones_da, africa_lst_celsius_da, stats_funcs=["mean"])
    .set_index("zone")  # type: ignore
    .rename(columns={"mean": "mean_lst"})  # type: ignore
)

mean_lst_df

Join the means to the geometries and plot the zonal means:

In [None]:
mean_lst_gdf = geolocation_gdf.join(mean_lst_df, how="inner").dropna()
mean_lst_gdf.plot("mean_lst", cmap="coolwarm", legend=True)  # type: ignore
mean_lst_gdf

Create new dataframe that does not include geometries:

In [None]:
mean_lst_lct_df = mean_lst_gdf.drop(columns=["geometry"])
mean_lst_lct_df