# Data completeness: Check the spatial coverage of satellite soil moisture data

## Import packages

In [None]:
import warnings

import geopandas
import matplotlib.pyplot as plt
import shapely.geometry
from c3s_eqc_automatic_quality_control import download

warnings.filterwarnings("ignore")
plt.style.use("seaborn-v0_8-notebook")

## Define variables

In [None]:
year_start = 1997
year_stop = 1998

shapefile_url = "https://figshare.com/ndownloader/files/23392280"

## Set the data request

In [None]:
collection_id = "satellite-soil-moisture"
request = {
    "variable": "surface_soil_moisture",
    "type_of_sensor": "active",
    "time_aggregation": "month_average",
    "year": [str(year) for year in range(year_start, year_stop + 1)],
    "month": [f"{month:02d}" for month in range(1, 12 + 1)],
    "day": "01",
    "type_of_record": "cdr",
    "version": "v201706",
}

## Define function to compute missing values count

In [None]:
def compute_missing_values_count(ds):
    ds.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
    ds.rio.write_crs("epsg:4326", inplace=True)

    da_mvc = ds["sm"].isnull().sum("time") / ds.sizes["time"] * 100
    da_mvc.attrs["long_name"] = "Missing values"
    da_mvc.attrs["units"] = "%"

    return da_mvc.to_dataset(name="mvc")

## Download and preprocess data

In [None]:
# Dataset
ds = download.download_and_transform(
    collection_id,
    request,
    chunks={"year": 1},
    transform_func=compute_missing_values_count,
    transform_chunks=False,
)

# Shapefile
world_shape = geopandas.read_file(shapefile_url)

## Define plotting function

In [None]:
def imshow_and_hist(da, shape):
    """Plot map and histogram side-by-side.

    Parameters
    ----------
    da: DataArray
        DataArray to plot
    shape: GeoDataFrame
        Geopandas object with polygons

    Returns
    -------
    figure, axes
    """
    fig, (ax_imshow, ax_hist) = plt.subplots(
        1, 2, figsize=[10, 5], gridspec_kw={"width_ratios": [3, 2]}
    )

    da = da.rio.clip(
        shape.geometry.apply(shapely.geometry.mapping),
        shape.crs,
        drop=True,
    )
    da.plot.imshow(ax=ax_imshow)
    ax_imshow.set_title("Map")

    da.plot.hist(bins=50, ax=ax_hist)
    ax_hist.set_ylabel("Frequency")
    ax_hist.yaxis.set_label_position("right")
    ax_hist.yaxis.tick_right()

    # Compute and show no data percentage
    missing_data_perc = (da == 100).sum() / da.notnull().sum() * 100
    ax_hist.set_title(
        f"Percentage of area with missing data: {float(missing_data_perc):f} %"
    )

    fig.suptitle(", ".join(list(shape.CONTINENT)))
    return fig, (ax_imshow, ax_hist)

## Plot world

In [None]:
fig, axes = imshow_and_hist(ds["mvc"], world_shape)

## Plot continents

In [None]:
for continent in world_shape.CONTINENT:
    imshow_and_hist(ds["mvc"], world_shape[world_shape.CONTINENT == continent])