# FAPAR satellite data completeness

## Import packages

In [None]:
import fsspec
import geopandas
import matplotlib.pyplot as plt
import shapely.geometry
from c3s_eqc_automatic_quality_control import download, utils

plt.style.use("seaborn-v0_8-notebook")

## Define variables

In [None]:
# Request parameters
year_start = 2014
year_stop = 2014
nominal_days = [3, 13, 21, 23, 24]
variables = ["fapar"]

# Region
lon_slice = slice(-13, 35)
lat_slice = slice(72, 30)

shapefile_url = "https://figshare.com/ndownloader/files/23392280"

## Set the data request

In [None]:
# Define request
collection_id = "satellite-lai-fapar"
request = {
    "satellite": "proba",
    "sensor": "vgt",
    "horizontal_resolution": "1km",
    "product_version": "V2",
    "year": [str(year) for year in range(year_start, year_stop + 1)],
    "month": [f"{month:02d}" for month in range(1, 12 + 1)],
    "nominal_day": [f"{day:02d}" for day in nominal_days],
    "format": "zip",
    "variable": variables,
}

## Define function to compute missing values count

In [None]:
def compute_missing_values_count(ds):
    ds.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
    ds.rio.write_crs("epsg:4326", inplace=True)

    da_mvc = ds["fAPAR"].isnull().sum("time") / ds.sizes["time"] * 100
    da_mvc.attrs["long_name"] = "Missing values"
    da_mvc.attrs["units"] = "%"

    return da_mvc.to_dataset(name="fAPAR")

## Download and preprocess data

In [None]:
# Download and cache
ds = download.download_and_transform(
    collection_id,
    request,
    transform_func=utils.regionalise,
    transform_func_kwargs={
        "lon_slice": lon_slice,
        "lat_slice": lat_slice,
    },
    chunks={"year": 1, "nominal_day": 1, "variable": 1},
    cached_open_mfdataset_kwargs={"combine": "nested", "concat_dim": "time"},
)

# Shapefile
with fsspec.open(f"simplecache::{shapefile_url}") as file:
    world_shape = geopandas.read_file(file, layer="Continents")

## Define plotting function

In [None]:
def imshow_and_hist(da, shape):
    """Plot map and histogram side-by-side.

    Parameters
    ----------
    da: DataArray
        DataArray to plot
    shape: GeoDataFrame
        Geopandas object with polygons

    Returns
    -------
    figure, axes
    """
    fig, (ax_imshow, ax_hist) = plt.subplots(
        1, 2, figsize=[10, 5], gridspec_kw={"width_ratios": [3, 2]}
    )

    da = da.rio.clip(
        shape.geometry.apply(shapely.geometry.mapping),
        shape.crs,
        drop=True,
    )
    da.plot.imshow(ax=ax_imshow)
    ax_imshow.set_title("Map")

    da.plot.hist(bins=50, ax=ax_hist)
    ax_hist.set_ylabel("Frequency")
    ax_hist.yaxis.set_label_position("right")
    ax_hist.yaxis.tick_right()

    # Compute and show no data percentage
    missing_data_perc = (da == 100).sum() / da.notnull().sum() * 100
    ax_hist.set_title(
        f"Percentage of pixels with no data respect the total: {float(missing_data_perc):f} %"
    )

    fig.suptitle(", ".join(list(shape.CONTINENT)))
    return fig, (ax_imshow, ax_hist)

## Plot Europe map

In [None]:
da_mvc = ds["fAPAR"].isnull().sum("time") / ds.sizes["time"] * 100
da_mvc.attrs["long_name"] = "Missing values"
da_mvc.attrs["units"] = "%"

da_mvc.rio.write_crs("epsg:4326", inplace=True)
imshow_and_hist(da_mvc, world_shape[world_shape.CONTINENT == "Europe"])