# ERA5 vs CERRA Extremes

## Import packages

In [None]:
import calendar

import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import xarray as xr
from c3s_eqc_automatic_quality_control import download, plot, utils

plt.style.use("seaborn-v0_8-notebook")

## Define Parameters

In [None]:
# Select time range
year_start = 1985
year_stop = 1985

# Select month of interest
month = 8
assert month in range(1, 13), f"{month=} is invalid"

# Select region of interest
region = "Lazio"
region_slices = {
    "Alps": {"lon_slice": slice(5, 15), "lat_slice": slice(48, 43)},
    "Scandinavia": {"lon_slice": slice(10, 42), "lat_slice": slice(72, 62)},
    "Lazio": {"lon_slice": slice(11, 14), "lat_slice": slice(43, 41)},
    "Italy": {"lon_slice": slice(6, 19), "lat_slice": slice(48, 36)},
}
assert region in region_slices, f"{region=} is invalid"

## Define request

In [None]:
requests_dict = {
    "reanalysis-era5-single-levels": {
        "product_type": "reanalysis",
        "format": "grib",
        "variable": "2m_temperature",
        "time": [f"{i:02d}:00" for i in range(24)],
    },
    "reanalysis-cerra-single-levels": {
        "variable": "2m_temperature",
        "level_type": "surface_or_atmosphere",
        "data_type": "reanalysis",
        "product_type": "analysis",
        "time": [f"{i:02d}:00" for i in range(0, 24, 3)],
        "format": "grib",
    },
}

## Functions to cache

In [None]:
def regionalised_max(ds, lon_slice, lat_slice):
    time = ds["forecast_reference_time"].mean()
    ds = utils.regionalise(ds, lon_slice=lon_slice, lat_slice=lat_slice)
    ds = ds.max("forecast_reference_time", keep_attrs=True)
    return ds.expand_dims(time=[time.values])

## Download and transform

In [None]:
dataarrays = {}
for collection_id, request in requests_dict.items():
    requests = []
    for year in range(year_start, year_stop + 1):
        time_request = {
            "year": year,
            "month": month,
            "day": list(range(1, calendar.monthrange(year, month)[1] + 1)),
        }
        requests.append(request | time_request)
    ds = download.download_and_transform(
        collection_id,
        requests,
        transform_func=regionalised_max,
        transform_func_kwargs=region_slices[region],
    )
    da = ds["t2m"].max("time", keep_attrs=True)
    with xr.set_options(keep_attrs=True):
        da -= 273.15
    da.attrs["units"] = "°C"
    dataarrays[collection_id] = da

## Plot maps

In [None]:
# Set common cbar and projection
vmin = min([da.min().values for da in dataarrays.values()])
vmax = max([da.max().values for da in dataarrays.values()])
lon_slice = region_slices[region]["lon_slice"]
lat_slice = region_slices[region]["lat_slice"]
projection = ccrs.LambertConformal(
    central_longitude=(lon_slice.start + lon_slice.stop) / 2,
    central_latitude=(lat_slice.start + lat_slice.stop) / 2,
)
extent = [
    min(lon_slice.start, lon_slice.stop),
    max(lon_slice.start, lon_slice.stop),
    min(lat_slice.start, lat_slice.stop),
    max(lat_slice.start, lat_slice.stop),
]

# Plot maps
for collection_id, da in dataarrays.items():
    plot.projected_map(da, projection=projection, vmin=vmin, vmax=vmax, cmap="turbo")
    plot_obj = plt.title(
        f"{collection_id.split('-')[1].upper()} Extrema "
        f"- {calendar.month_name[month]} ({year_start}, {year_stop})"
    )
    plot_obj.axes.set_extent(extent)
    gl = plot_obj.axes._gridliners[0]
    gl.x_inline = False
    gl.xlabel_style = {"rotation": 0}
    plt.show()

## Plot historgram

In [None]:
stacked_data = {
    collection_id: da.stack(dim=da.dims).values
    for collection_id, da in dataarrays.items()
}
fig, ax = plt.subplots()
ax.hist(
    stacked_data.values(),
    bins=30,
    density=True,
    color=["#155084", "#fd3c06"],
    label=[label.split("-")[1].upper() for label in dataarrays],
)
ax.set_yscale("log")
ax.set_xlabel(f"{da.attrs['long_name']} [{da.attrs['units']}]")
ax.set_ylabel("Probability density")
ax.legend()
_ = ax.set_title(f"Extrema - {calendar.month_name[month]} ({year_start}, {year_stop})")