In [None]:
# data access

import xarray as xr

In [None]:
# visualisation

import display

In [None]:
reference_period = slice("1991", "2020")

events = {
    "italy-2023-05": {
        "area": {"latitude": slice(48, 34), "longitude": slice(5, 20)},
        "location": {"latitude": 44.2, "longitude": 11.9, "method": "nearest"},
        "time": ["2023-05-17", "2023-05-18"],
        "vmax": 150,
    },
    "greece-2023-09": {
        "area": {"latitude": slice(41, 34), "longitude": slice(19, 28)},
        "location": {"latitude": 39.25, "longitude": 21.9, "method": "nearest"},
        "time": ["2023-09-06", "2023-09-07"],
        "vmax": 400,
    },
    "pakistan-2022-06": {
        "area": {"latitude": slice(38, 23), "longitude": slice(60, 78)},
        "location": {"latitude": 29, "longitude": 67, "method": "nearest"},
        "time": ["2022-08-23", "2022-08-24", "2022-08-25", "2022-08-26", "2022-08-27", "2022-08-28", "2022-08-29", "2022-08-30"],
        "vmax": 300,
    },
}

selected_event = "greece-2023-09"

area = events[selected_event]["area"]
location = events[selected_event]["location"]
time = events[selected_event]["time"]
year, month = events[selected_event]["time"][0].split("-")[:2]
vmax = events[selected_event]["vmax"]

In [None]:
# activate dask distributed
#
# import distributed
# client = distributed.Client("tcp://dask-scheduler:8786")
# client

In [None]:
# data access configuration

dataset = "s3://ecmwf-era5-land/reanalysis-era5-land-no-antartica-v0.zarr"

In [None]:
ds = xr.open_dataset(dataset, chunks={}, engine="zarr").astype("float32")
ds

In [None]:
%%time

days_ds = ds.sel(valid_time=time, **area)
days_tp = days_ds.tp.sum("valid_time").compute()

CDS - ERA5 precipitation for 16 and 17 May 2023

- **Time to retrieve form CDS: 2 s**
- Number of fields: 48
- Size of original data: 100 Mb
- Size of downloaded data: 250 kb

In [None]:
display.map(days_tp, vmax=vmax, title="Storm Daniel precipitation")

In [None]:
%%time

years = [
    "1991", "1992", "1993",
    "1994", "1995", "1996",
    "1997", "1998", "1999",
    "2000", "2001", "2002",
    "2003", "2004", "2005",
    "2006", "2007", "2008",
    "2009", "2010", "2011",
    "2012", "2013", "2014",
    "2015", "2016", "2017",
    "2018", "2019", "2020",
]
days = [
    "01", "02", "03",
    "04", "05", "06",
    "07", "08", "09",
    "10", "11", "12",
    "13", "14", "15",
    "16", "17", "18",
    "19", "20", "21",
    "22", "23", "24",
    "25", "26", "27",
    "28", "29", "30",
]

month_reference_time = [f"{y}-{month}-{d}" for y in years for d in days]

month_reference_ds = ds.sel(valid_time=month_reference_time, **area)
month_reference_tp = (month_reference_ds.tp.sum("valid_time") / len(years)).compute()

CDS - ERA5 total precipitation for May 1990-2019

- **Time to retrieve from CDS: 30 m**
- Number of fields: 22,320
- Size of original data: 46 Gb
- Size of downloaded data: 115 Mb

In [None]:
display.maps(
    [days_tp, month_reference_tp],
    vmax=vmax,
    axs_set=[
        {"title": "Storm Daniel precipitation"},
        {"title": "Average precipitation in September"},
    ],
)

In [None]:
%%time

import datetime

location_year = ds.tp.sel(**location).sel(valid_time=year).groupby("valid_time.time")[datetime.time()].compute()
location_reference = ds.tp.sel(**location).sel(valid_time=reference_period).groupby("valid_time.time")[datetime.time()].compute()
location_reference

CDS - ERA5 precipitation for 1990-2023

- **Time to retrieve form CDS: 7 h**
- Number of fields: 300,000
- Size of original data: 600 Gb
- Size of downloaded data: 1.5 Gb

In [None]:
%%time

display.compare(location_year, location_reference, time="valid_time", ylim=[0, 1600])