# Glaciers Distribution

## Import packages

In [None]:
import fsspec
import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import regionmask
from c3s_eqc_automatic_quality_control import download

plt.style.use("seaborn-v0_8-notebook")

## Define Parameters

In [None]:
period_start = "1975_1976"
period_stop = "2020_2021"
assert all("_" in period and len(period) == 9 for period in (period_start, period_stop))

## Define requests

In [None]:
request_extent = (
    "insitu-glaciers-extent",
    {
        "variable": "all",
        "format": "zip",
        "version": "6_0",
    },
)
shapefile_url = "https://www.glims.org/RGI/rgi60_files/00_rgi60_regions.zip"

y0_start, y1_start = map(int, period_start.split("_"))
y0_stop, y1_stop = map(int, period_stop.split("_"))
request_mass_change = (
    "derived-gridded-glacier-mass-change",
    {
        "variable": "glacier_mass_change",
        "product_version": "wgms_fog_2022_09",
        "format": "zip",
        "hydrological_year": [
            f"{y0}_{str(y1)[-2:]}"
            for y0, y1 in zip(
                range(y0_start, y0_stop + 1), range(y1_start, y1_stop + 1)
            )
        ],
    },
)

## Download and open extent data

In [None]:
# Takes a couple of minutes
df = download.download_and_transform(*request_extent).to_pandas()
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["CENLON"], df["CENLAT"]),
    crs="EPSG:4326",
)

# Convert dates from string to datetime, and add digitalization year
columns = []
for column in ("BGNDATE", "ENDDATE"):
    years = gdf[column].str[:4]
    months = gdf[column].str[4:6].replace("99", "01")
    days = gdf[column].str[6:8].replace("99", "01")
    series = years + months + days
    series = series.where(~series.str.startswith("-"))
    columns.append(pd.to_datetime(series))
gdf["year"] = pd.DataFrame(columns).mean().dt.year.astype("Int64")

## Download and transform mass change data

In [None]:
# Get data
ds = download.download_and_transform(
    *request_mass_change,
    chunks={"hydrological_year": 1},
)

# Mask data
shapefile_url = "https://www.glims.org/RGI/rgi60_files/00_rgi60_regions.zip"
gdf_mask = gpd.read_file(shapefile_url)
gdf_mask = gdf_mask.dissolve(by="RGI_CODE", as_index=False)
regions = regionmask.from_geopandas(
    gdf_mask, numbers="RGI_CODE", names="FULL_NAME", abbrevs="WGMS_CODE"
)
mask = regions.mask_3D(ds)
ds = ds.where(mask)

# Compute cumulative fields
ds = ds.sum(("latitude", "longitude"), keep_attrs=True)
ds["time"] = ds["time"].dt.year
ds["time"].attrs |= {"long_name": "Time", "units": "yr"}
for da in ds.data_vars.values():
    da.attrs["units"] += " $yr^{-1}$"
    da.attrs["long_name"] = da.attrs["long_name"].replace("_", " ").title()

# Compute and add cumulative
cumulative = ds["Glacier"].cumsum("time")
cumulative.attrs = {
    "units": ds["Glacier"].attrs["units"].split()[0],
    "long_name": f"Cumulative {ds['Glacier'].attrs['long_name']}",
}
ds["Cumulative"] = cumulative

## Print some statistics

In [None]:
n_glaciers = len(gdf)
total_area = gdf["AREA"].sum()
print(
    " ".join(
        [
            f"A total number of {n_glaciers} glaciers is present in the dataset,",
            f"covering a total area of {total_area:.2f} km^2.",
        ]
    )
)

## Plotting function

In [None]:
def plot_map(gdf, var_name=None, label=None, title=None, **kwargs):
    kwargs = {"markersize": 5, "legend": var_name is not None} | kwargs
    if var_name:
        kwargs = {"c": var_name, "column": var_name} | kwargs
        kwargs.setdefault("legend_kwds", {"shrink": 0.49, "extend": "both"})
        if label is not None:
            kwargs["legend_kwds"].setdefault("label", label)
    shapefile_url = "https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_countries.zip"
    with fsspec.open(f"simplecache::{shapefile_url}") as f:
        gdf_countries = gpd.read_file(f)

    ax = gdf_countries.boundary.plot(
        figsize=(20, 16), facecolor="none", edgecolor="black", linewidth=0.25
    )
    gdf.plot(ax=ax, **kwargs)
    ax.axis("off")
    if title:
        ax.set_title(title, fontsize=25)
    return ax

## Plot glaciers location

In [None]:
ax = plot_map(
    gdf,
    color="red",
    title="Glacier distribution around the year 2000 according to the RGI v6.0",
)

## Plot glaciers distribution over years: Bars

In [None]:
size = gdf.set_index("year").groupby("year").size()
size = size.reindex(range(gdf["year"].min(), gdf["year"].max() + 1), fill_value=0)
missing = gdf["year"].isnull().sum()
missing_perc = 100 * (missing / len(gdf["year"]))
ax = size.plot.bar(
    figsize=(15, 5),
    grid=True,
    ylabel="Number of glaciers",
)
_ = ax.text(
    *(0, 1),
    f"Date of digitization data are missing for {missing} glaciers or {missing_perc:.2f}% of the dataset.",
    transform=ax.transAxes,
    bbox={"facecolor": "white", "edgecolor": "black"},
)

## Plot glaciers distribution over years: Map

In [None]:
cmap = mpl.cm.turbo
norm = mpl.colors.BoundaryNorm(range(1940, 2021, 10), cmap.N)
ax = plot_map(
    gdf,
    var_name="year",
    cmap=cmap,
    norm=norm,
    label="year",
    title="Glacier distribution around the year 2000 according to the RGI v6.0",
)

# Plot year VS region

In [None]:
def weighted_average(df, field_name, weights_name):
    df = df[df[field_name].notnull() & df[weights_name].notnull()]
    weights = df[weights_name]
    return (df[field_name] * weights).sum() / weights.sum()


gdf["region"] = gdf["RGIID"].str[6:8].astype(int)
grouped = gdf[["year", "AREA", "region"]].groupby("region")
means = {
    "arithmetic": grouped["year"].mean(),
    "weighted": grouped.apply(weighted_average, "year", "AREA", include_groups=False),
}
text = []
for label, mean in means.items():
    ax = mean.plot(label=label)
    text.append(f"The global {label} mean year of digitization is {mean.mean():.2f} AD")
ax.set_xlabel("RGI region number")
ax.set_ylabel("Year of digitalization")
ax.grid()
ax.legend()
_ = ax.text(
    *(0, 0),
    "\n".join(text),
    transform=ax.transAxes,
    bbox={"facecolor": "white", "edgecolor": "black"},
)

## Plot cumulative mass change

In [None]:
facet = ds["Cumulative"].plot(col="region", col_wrap=5)
for ax in facet.axs.flatten():
    ax.grid()

# Under/over estimation

In [None]:
year = 2000
interpolated = ds["Cumulative"].interp(time=means["weighted"].to_xarray())
volume = 1.091 * (interpolated - ds["Cumulative"].sel(time=year)).compute()
for region, result in volume.groupby("region", squeeze=False):
    print(
        f"The volume estimate in RGI region {region:>2}"
        f" for the year {year} AD is"
        f" {'under' if result <0 else 'over':>5}estimated"
        f" by {abs(float(result)):.2f} km3."
    )

total = volume.sum()
print(
    f"\nThe volume estimate at the global scale"
    f" for the year {year} AD is"
    f" {'under' if result <0 else 'over'}estimated"
    f" by {abs(float(total)):.2f} km3."
)