# Reliabily of the Land Cover classification

## Import packages

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import regionmask
import shapely.geometry
import xarray as xr
from c3s_eqc_automatic_quality_control import download, plot, utils

plt.style.use("seaborn-v0_8-notebook")

## Define parameters

In [None]:
years = [1992, 2020]

# Region of interest
lon_slice = slice(-10, 4)
lat_slice = slice(45, 35)

# Shapefile with regions
shapefile_url = "https://gisco-services.ec.europa.eu/distribution/v2/nuts/shp/NUTS_RG_20M_2021_4326.shp.zip"

## Define request

In [None]:
collection_id = "satellite-land-cover"
requests = [
    {
        "variable": "all",
        "format": "zip",
        "version": "v2.0.7cds" if year < 2016 else "v2.1.1",
        "year": year,
    }
    for year in years
]

## Download and regionalise

In [None]:
# Download and regionalize
ds = download.download_and_transform(
    collection_id,
    requests,
    transform_func=utils.regionalise,
    transform_func_kwargs={"lon_slice": lon_slice, "lat_slice": lat_slice},
)
ds = ds.assign_coords(year=ds["time"].dt.year).swap_dims(time="year").drop("time")

## Define custom classification

In [None]:
# Define IPCC Labels dictionary
ipcc_dict = {
    "No Data": ("#000000", [0]),
    "Agriculture": ("#ffffcc", [10, 11, 12, 20, 30, 40]),
    "Forest": ("#4c9900", [50, 60, 61, 62, 70, 71, 72, 80, 81, 82, 90, 100, 160, 170]),
    "Grassland": ("#ccff99", [110, 130]),
    "Settlement": ("#ff0000", [190]),
    "Wetland": ("#99ffff", [180]),
    "Other": ("#0000ff", [120, 121, 122, 140, 150, 151, 152, 153, 200, 201, 202, 210]),
}

## Plot maps

In [None]:
for legend_dict in [None, ipcc_dict]:
    plot.lccs_map(
        ds["lccs_class"],
        row="year",
        legend_dict=legend_dict,
        interpolation="antialiased",
    )
    plt.show()

## Mask regions

In [None]:
lon_bounds = [-10.18, 1]
lat_bounds = [36.5, 43]
crs = "epsg:4326"

lon_bounds += sorted(lon_bounds, reverse=True)
lat_bounds = [lat for lat in lat_bounds for _ in range(2)]
bbox = shapely.Polygon(zip(lon_bounds, lat_bounds))

gdf = gpd.read_file(shapefile_url)
gdf = gdf[gdf["LEVL_CODE"] == 2]
gdf = gdf[gdf.intersects(bbox)]
gdf = gdf[gdf["NUTS_ID"].str.startswith(("ES", "PT"))]
gdf = gdf.to_crs(crs)

ds.rio.write_crs(crs, inplace=True)

regions = regionmask.from_geopandas(gdf, names="NUTS_NAME")
mask = regions.mask(ds["longitude"], ds["latitude"])

## Compute cell area

In [None]:
scaling_factor = 0.002778
resolution_km = scaling_factor * 111.195
indexes = xr.DataArray(range(ds.sizes["latitude"]), dims="latitude")
lats = ds["latitude"].max() - indexes * scaling_factor
grid_cell_area = resolution_km**2 * np.cos(np.radians(lats))
grid_cell_area.attrs = {
    "standard_name": "cell_area",
    "long_name": "Area",
    "units": "km2",
}
ds = ds.assign_coords(cell_area=grid_cell_area)

## Plot bars

In [None]:
for index, ds_region in ds.groupby(mask):
    region = regions[int(index)].name
    area = ds_region["cell_area"]
    total_area = float(area.sum().values)
    area_perc = 100 * area / total_area
    area_perc.attrs = {
        "long_name": "Area Coverage",
        "units": "%",
    }
    for labels_dict in [None, ipcc_dict]:
        for year, ds_year_region in ds_region.groupby("year"):
            for da in [area, area_perc]:
                plot.lccs_bar(
                    da,
                    ds_year_region["lccs_class"],
                    labels_dict,
                    reduction="sum",
                    title=f"{region=} {year=} {total_area=:f} {area.attrs['units']}",
                )
                plt.show()