In [None]:
# era5land_monthly_download.py
import cdsapi, time, os, xarray as xr


In [None]:

DATASET = "reanalysis-era5-land-monthly-means"
VARS = [
    "soil_temperature_level_1",
    "snow_cover",
    "volumetric_soil_water_layer_1",
    "volumetric_soil_water_layer_2",
    "volumetric_soil_water_layer_3",
]
YEARS = list(range(2024, 2026))  # edit as needed
MONTHS = [f"{m:02d}" for m in range(1,13)]

# Optional: subset to a region (N, W, S, E). Comment out 'area' to get global.
AREA = None  # e.g., [90, -180, -90, 180] for global; or [60, -130, 20, -60] (CONUS-ish)

OUT_DIR = "era5l_monthly_nc"
os.makedirs(OUT_DIR, exist_ok=True)


In [None]:

def retrieve_year(year, attempt=1, max_attempts=5):
    c = cdsapi.Client(timeout=600)
    req = {
        "product_type": "monthly_averaged_reanalysis",
        "variable": VARS,
        "year": f"{year}",
        "month": MONTHS,
        "time": "00:00",           # monthly means carry a nominal time
        "format": "netcdf",        # NetCDF out
    }
    if AREA:
        req["area"] = AREA        # N, W, S, E (CDS order)
    target = os.path.join(OUT_DIR, f"era5l_monthly_{year}.nc")
    if os.path.exists(target):
        print(f"[skip] {target} exists.")
        return target
    try:
        print(f"[CDS] Requesting {year} ...")
        c.retrieve(DATASET, req, target)
        print(f"[ok] Wrote {target}")
        return target
    except Exception as e:
        if attempt < max_attempts:
            wait = 30 * attempt
            print(f"[warn] {e}\nRetrying year {year} in {wait}s (attempt {attempt+1}/{max_attempts})")
            time.sleep(wait)
            return retrieve_year(year, attempt+1, max_attempts)
        raise


In [None]:

# 1) retrieve year-by-year
paths = [retrieve_year(y) for y in YEARS]


# Command to use to merge: cdo -O mergetime era5l_monthly_*_unzipped.nc ERA5L_monthly_merged.nc

In [None]:

# 2) merge to a single file (preserves CF, times)
print("[merge] Concatenating yearly NetCDFs ...")
dsets = [xr.open_dataset(p) for p in paths]
ds = xr.concat(dsets, dim="time")
for d in dsets: d.close()

# 3) (optional) ensure variable names are exactly what you expect
# (They already are in ERA5-Land monthly means.)
# print(list(ds.data_vars))

# 4) write one consolidated file
out_all = os.path.join(OUT_DIR, f"era5l_monthly_{YEARS[0]}_{YEARS[-1]}.nc")
encoding = {v: {"zlib": True, "complevel": 4} for v in ds.data_vars}
encoding.update({c: {"zlib": True, "complevel": 4} for c in ["latitude","longitude"] if c in ds.coords})
ds.to_netcdf(out_all, encoding=encoding)
print(f"[done] {out_all}")
