# Use Case: Using land cover products to quantify urbanization trends.

## Import packages

In [None]:
import geopandas as gpd
import matplotlib.colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import regionmask
import scipy.stats
import shapely.geometry
import xarray as xr
from c3s_eqc_automatic_quality_control import download, utils

plt.style.use("seaborn-v0_8-notebook")

## Define Parameters

In [None]:
# Years to download
years = range(1992, 2021, 2)

# Region of interest (Iberian Peninsula)
lon_slice = slice(-10, 4)
lat_slice = slice(45, 35)

# Shapefile with regions (NUTS2)
shapefile_url = "https://gisco-services.ec.europa.eu/distribution/v2/nuts/shp/NUTS_RG_20M_2021_4326.shp.zip"

## Define request

In [None]:
# List of requests to retrieve data
collection_id = "satellite-land-cover"
requests = [
    {
        "variable": "all",
        "format": "zip",
        "version": "v2.0.7cds" if year < 2016 else "v2.1.1",
        "year": year,
    }
    for year in years
]

## Define request

In [None]:
# Download and regionalize by AoI
ds = download.download_and_transform(
    collection_id,
    requests,
    transform_func=utils.regionalise,
    transform_func_kwargs={"lon_slice": lon_slice, "lat_slice": lat_slice},
)
ds = ds.assign_coords(year=ds["time"].dt.year).swap_dims(time="year").drop("time")

## Mask regions

In [None]:
lon_bounds = [-10.18, 1]
lat_bounds = [36.5, 43]
crs = "epsg:4326"

lon_bounds += sorted(lon_bounds, reverse=True)
lat_bounds = [lat for lat in lat_bounds for _ in range(2)]
bbox = shapely.Polygon(zip(lon_bounds, lat_bounds))

gdf = gpd.read_file(shapefile_url)
gdf = gdf[gdf["LEVL_CODE"] == 2]
gdf = gdf[gdf.intersects(bbox)]
gdf = gdf[gdf["NUTS_ID"].str.startswith(("ES", "PT"))]
gdf = gdf.to_crs(crs)

ds.rio.write_crs(crs, inplace=True)

regions = regionmask.from_geopandas(gdf, names="NUTS_NAME")
mask = regions.mask(ds["longitude"], ds["latitude"])

## Compute cell area

In [None]:
scaling_factor = 0.002778
resolution_km = scaling_factor * 111.195
indexes = xr.DataArray(range(ds.sizes["latitude"]), dims="latitude")
lats = ds["latitude"].max() - indexes * scaling_factor
grid_cell_area = resolution_km**2 * np.cos(np.radians(lats))
grid_cell_area.attrs = {
    "standard_name": "cell_area",
    "long_name": "Area",
    "units": "km2",
}

## Compute urban area and trends

In [None]:
da_grouped = (
    grid_cell_area.where(ds["lccs_class"] == 190)
    .groupby(mask.rename("region"))
    .sum(("latitude", "longitude"))
)
da_grouped.attrs = {"long_name": "Urban Area", "units": "km$^2$"}
da_grouped = da_grouped.assign_coords(region=regions.names).compute()

df_dict = {"slope": [], "intercept": [], "tau": [], "p-value": []}
for region in gdf["NUTS_NAME"]:
    da_region = da_grouped.sel(region=region).squeeze()
    slope, intercept, *_ = scipy.stats.theilslopes(da_region, da_region["year"])
    tau, p_value = scipy.stats.kendalltau(da_region["year"], da_region)
    df_dict["slope"].append(slope)
    df_dict["intercept"].append(intercept)
    df_dict["tau"].append(tau)
    df_dict["p-value"].append(p_value)
gdf[list(df_dict)] = pd.DataFrame(df_dict, index=gdf.index)

## Define plotting functions

In [None]:
def plot_trend(da, df, **kwargs):
    facet = da.plot(col="region", marker="o", sharex=False, **kwargs)
    for ax, sel_dict in zip(facet.axs.flatten(), facet.name_dicts.flatten()):
        if not sel_dict:
            continue
        da_sel = da.sel(sel_dict)
        df_region = df[df["NUTS_NAME"] == sel_dict["region"]].squeeze()
        ax.plot(
            da_sel["year"],
            da_sel["year"] * df_region["slope"] + df_region["intercept"],
            label="\n".join(
                [
                    f"slope = {df_region['slope']:.3f} km$^2$/year",
                    f"p-value = {df_region['p-value']:.3f}",
                    f"tau = {df_region['tau']:.3f}",
                    f"intercept = {df_region['intercept']:.3f} km$^2$",
                ]
            ),
            color="k",
            ls="--",
        )
        ax.set_xticks(da_sel["year"][::2])
        ax.set_title(sel_dict["region"])
        ax.legend()
        ax.grid()
    return facet


def plot_maps(da, vmin=None, vmax=None, cmap="viridis"):
    if vmin is None:
        vmin = da.min()
    if vmax is None:
        vmax = da.max()
    norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
    cbar = plt.cm.ScalarMappable(norm=norm, cmap=cmap)

    n_cols = min(3, da.sizes["year"])
    n_rows = int(np.ceil(da.sizes["year"] / n_cols))
    fig, axs = plt.subplots(
        n_rows, n_cols, figsize=(n_cols * 6, n_rows * 3), squeeze=False
    )
    axs_iter = iter(axs.flatten())
    for year, da in da.groupby("year"):
        ax = next(axs_iter)
        gdf.plot(da.values, cmap=cmap, norm=norm, edgecolor="k", ax=ax)
        ax.axis("off")
        ax.set_title(f"{year=}")
    for ax in axs_iter:
        ax.axis("off")

    plt.tight_layout()
    fig.subplots_adjust(right=0.85)
    cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.7])
    fig.colorbar(
        cbar, cax=cbar_ax, label=f"{da.attrs['long_name']} [{da.attrs['units']}]"
    )
    return fig, axs

## Plot trends

In [None]:
_ = plot_trend(da_grouped, gdf, col_wrap=3, aspect=1.5)

## Plot highest trends

In [None]:
regions_of_interest = gdf.sort_values("slope", ascending=False)["NUTS_NAME"][:3].values
_ = plot_trend(da_grouped.sel(region=regions_of_interest), gdf, col_wrap=3, aspect=1.5)

## Plot urban area per year

In [None]:
fig, axs = plot_maps(da_grouped)

## Plot urban area change per year maps

In [None]:
with xr.set_options(keep_attrs=True):
    da_grouped_diff = da_grouped.diff("year")
da_grouped_diff.attrs["long_name"] += " Change"
da_grouped_diff["year"] = [
    f"{y1} - {y0}"
    for y1, y0 in zip(da_grouped["year"][1:].values, da_grouped["year"][:-1].values)
]
vmax = np.abs(da_grouped_diff).max()
fig, axs = plot_maps(da_grouped_diff, vmax=vmax, vmin=-vmax, cmap="RdBu_r")

## Plot urban area change per region bars

In [None]:
n_cols = min(3, da_grouped_diff.sizes["region"])
n_rows = int(np.ceil(da_grouped_diff.sizes["region"] / n_cols))
axs = da_grouped_diff.to_pandas().plot(
    kind="bar",
    subplots=True,
    layout=(n_rows, n_cols),
    figsize=(n_cols * 6, n_rows * 3),
    legend=False,
    ylabel=f"{da_grouped_diff.attrs['long_name']} [{da_grouped_diff.attrs['units']}]",
)
for ax in axs.flatten():
    ax.grid()

## Plot trend map

In [None]:
vmax = gdf["slope"].abs().max()
ax = gdf.plot(
    gdf["slope"],
    cmap="viridis",
    edgecolor="k",
    legend=True,
    legend_kwds={"label": "Slope [km$^2$/year]"},
)
_ = ax.axis("off")

## Compute and show percentage difference in urban area

In [None]:
da_grouped_perc = (
    100 * da_grouped.isel(year=[0, -1]).diff("year") / da_grouped.isel(year=0)
).squeeze()
da_grouped_perc.attrs = {
    "units": "%",
    "long_name": "Percentage Difference of " + da_grouped.attrs["long_name"],
}
df_grouped_perc = da_grouped_perc.to_pandas()
df_grouped_perc

## Plot percentage difference in urban area

In [None]:
fig, axs = plot_maps(da_grouped.isel(year=[0, -1]))
plt.show()

ax = gdf.set_index("NUTS_NAME").plot(
    df_grouped_perc,
    cmap="viridis",
    edgecolor="k",
    legend=True,
    legend_kwds={
        "label": f"{da_grouped_perc.attrs['long_name']} [{da_grouped_perc.attrs['units']}]"
    },
)
ax.set_title(
    f"From {da_grouped['year'].isel(year=0).values} to {da_grouped['year'].isel(year=-1).values}"
)
_ = ax.axis("off")