In [1]:
import xarray as xr, pandas as pd, numpy as np, xoak
from statsmodels.nonparametric.smoothers_lowess import lowess

import cartopy, regionmask, geopandas as gpd
from shapely.geometry import Polygon

import glob, re
from matplotlib import pyplot as plt

my_dir = "/home/jovyan/my_materials/"


# method to wrap longitude from (0,360) to (-180,180)
def wrap_lon(ds):
    
    if "longitude" in ds.coords:
        lon = "longitude"
        lat = "latitude"
    elif "lon" in ds.coords:
        lon = "lon"
        lat = "lat"
    else: 
        # can only wrap longitude
        return ds
    
    if ds[lon].max() > 180:
        ds[lon] = (ds[lon].dims, (((ds[lon].values + 180) % 360) - 180), ds[lon].attrs)
        
    if lon in ds.dims:
        ds = ds.reindex({ lon : np.sort(ds[lon]) })
        ds = ds.reindex({ lat : np.sort(ds[lat]) })
    return ds

# Download data from the Climate Explorer  

We can get observations from [KNMI's Climate Explorer tool](https://climexp.knmi.nl/start.cgi?id=someone@somewhere)
- sign in to save your searches
- go to 'daily fields' > ERA5 0.25°
- enter the lat/lon bounds of a large region including the area you want to study
- select 'subset of the field' and 'convert to Celsius' / 'convert to mm/day'
- click 'create'
- download the file by copying the link into the cell below

In [2]:
! cd $my_dir; wget https://climexp.knmi.nl/data/era5_tmax_daily_eu_5-20E_40-55N_firstyear-lastyear_su.nc

--2025-02-10 10:28:14--  https://climexp.knmi.nl/data/era5_tmax_daily_eu_5-20E_40-55N_firstyear-lastyear_su.nc
Resolving proxy.geomar.de (proxy.geomar.de)... 193.174.124.200
Connecting to proxy.geomar.de (proxy.geomar.de)|193.174.124.200|:8080... connected.
Proxy request sent, awaiting response... 200 OK
Length: 281782710 (269M) [application/x-netcdf]
Saving to: ‘era5_tmax_daily_eu_5-20E_40-55N_firstyear-lastyear_su.nc.1’


2025-02-10 10:28:19 (60.9 MB/s) - ‘era5_tmax_daily_eu_5-20E_40-55N_firstyear-lastyear_su.nc.1’ saved [281782710/281782710]



# Choosing the region

## Large-scale maps of the 'event'

Start by looking at observations to get an idea of the right region

In [3]:
da = wrap_lon(xr.open_dataset(my_dir+"era5_tmax_daily_eu_5-20E_40-55N_firstyear-lastyear_su.nc").tmax)

# cut out just the time slice we're interested in
da_event = da.sel(time = slice("2024-07-01","2024-07-10"))

In [None]:
fig = da_event.plot(cmap = "YlOrRd",      # set the colourmap
                    col = "time",         # plot each day in a separate column
                    col_wrap = 5,         # plot 5 panels per row
                    subplot_kws = {"projection" : cartopy.crs.PlateCarree()})    # set geographic projection so we can add geographic features

# add box around area of interest
xn, xx, yn, yx = [8,12,48,52]

# add coastlines and borders
for ax in fig.axs.flat:
    ax.coastlines()
    ax.add_feature(cartopy.feature.BORDERS, alpha = 0.5, ls = "--")
    ax.gridlines(color = "k", alpha = 0.1, draw_labels = ["bottom", "left"])

    ax.plot([xn,xn,xx,xx,xn], [yn,yx,yx,yn,yn], color = "blue", lw = 2, alpha = 0.5)

## Define the study region

**Questions to consider**

- where did the impacts actually occur?
- how far did the associated weather system extend?
- is the proposed region relatively homogeneous in terms of topography?
- is the proposed region relatively homogeneous in terms of climatology?

### Rectangular region

In [None]:
# set the lons & lats to define the edges of the region
xn, xx, yn, yx = [8,12,48,52]

# create a GeoDataFrame with the area we're interested in
gdf = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry = [Polygon(zip([xn,xn,xx,xx,xn], [yn,yx,yx,yn,yn]))])

# save as a shapefile to use for the models
gdf.to_file(my_dir+"sf_studyregion")

### Pre-defined region (eg country)

In [None]:
gdf = regionmask.defined_regions.natural_earth_v5_1_2.countries_50.to_geodataframe().set_crs("epsg:4326")
gdf = gdf.loc[gdf.names == "Austria"]

# save as a shapefile to use for the models
gdf.to_file(my_dir+"sf_studyregion")

### Load a shapefile

eg. river basins: https://www.hydrosheds.org/products/hydrobasins

In [None]:
gdf = gpd.read_file(my_dir+"your_shapefile_name")

## Event maps with study region

In [None]:
fig = da_event.plot(cmap = "YlOrRd",      # set the colourmap (YlGnBu is good for precip, RdBu_r or YlOrRd for temperature)
                    col = "time",         # plot each day in a separate column
                    col_wrap = 5,         # plot 5 panels per row
                    subplot_kws = {"projection" : cartopy.crs.PlateCarree()})    # set geographic projection so we can add geographic features

# add coastlines and borders
for ax in fig.axs.flat:
    ax.coastlines()
    ax.add_feature(cartopy.feature.BORDERS, alpha = 0.5, ls = "--")
    ax.gridlines(color = "k", alpha = 0.1, draw_labels = ["bottom", "left"])

    gdf.boundary.plot(ax = ax, color = "blue")

## Check physical geography

Source data: http://research.jisao.washington.edu/data_sets/elevation/

In [7]:
!wget http://research.jisao.washington.edu/data_sets/elevation/elev.0.25-deg.nc

--2025-02-10 10:29:07--  http://research.jisao.washington.edu/data_sets/elevation/elev.0.25-deg.nc
Resolving proxy.geomar.de (proxy.geomar.de)... 193.174.124.200
Connecting to proxy.geomar.de (proxy.geomar.de)|193.174.124.200|:8080... connected.
Proxy request sent, awaiting response... 200 OK
Length: 2083492 (2.0M) [application/x-netcdf]
Saving to: ‘elev.0.25-deg.nc’


2025-02-10 10:29:08 (1.77 MB/s) - ‘elev.0.25-deg.nc’ saved [2083492/2083492]



In [1]:
elev = wrap_lon(xr.open_dataset(my_dir+"elev.0.25-deg.nc", decode_times = False).data.squeeze(drop = True))

NameError: name 'wrap_lon' is not defined

In [None]:
fig, ax = plt.subplots(subplot_kw = {"projection" : cartopy.crs.PlateCarree(),
                                     "extent" : (da.lon.min(),da.lon.max(),da.lat.min(),da.lat.max())})
elev_kwargs = {"cmap" : "terrain",
               "vmin" : -250, 
               "vmax" : 1000}    # aim to keep vmin:vmax at 1:4 ratio to change from blue to green at zero

elev.plot(ax = ax, **elev_kwargs)

ax.gridlines(crs=cartopy.crs.PlateCarree(), linewidth=0.5, color='black', draw_labels=["bottom", "left"], alpha=0.2)
ax.coastlines()
ax.add_feature(cartopy.feature.BORDERS, lw = 1, alpha = 0.7, ls = "--", zorder = 99)

# add box around area of interest
gdf.boundary.plot(ax = ax, color = "blue")

## Extract daily time series

Once you're happy with the study region, we need to extract a time series of the average temp/precip over the area

In [10]:
# identify cells with centres inside the defined region
rm = regionmask.mask_geopandas(gdf, da)

# average all values over the selected region and save as a netcdf
ts = da.where(~np.isnan(rm)).mean(["lat", "lon"])
ts.to_netcdf(my_dir+"era5_tmax_daily.nc")

# Choosing the duration & season

**Questions to consider**

- did the impacts arise from a short-term event or was the whole month/season unusual?
- was the event unusual only for a given time of year? For a particular month(s) or season?

In [11]:
# define axis labels for DOY plotting
days = pd.date_range(start = "2020-01-01", end = "2021-01-01")
labelticks = [i+1 for i in range(366) if (days[i].day) == 15]
labels = [days[i].strftime("%b") for i in range(366) if (days[i].day) == 15]

In [None]:
# How unusual was this year?

fig, ax = plt.subplots(ncols = 1, figsize = (5,3), dpi = 100)

# loop over each year & plot the annual time series
for y in np.unique(ts.time.dt.year):
    ts_y = ts.sel(time = str(y))
    ax.plot(ts_y.time.dt.dayofyear, ts_y, color = "tab:blue", alpha = 0.3)

# make nicer x-axis labels
[ax.axvline(i, alpha = 0.1) for i in range(367) if (days[i].day) == 1]
ax.set_xticks(labelticks)
ax.set_xticklabels(labels)

# add the event year in a different colour
ts_y = ts.sel(time = "2024")
ax.plot(ts_y.time.dt.dayofyear, ts_y, color = "k")

In [None]:
# Compare a few accumulations

fig, axs = plt.subplots(ncols = 4, figsize = (20,3), dpi = 100, sharey = True)

# loop over each year & plot the annual time series
for i in range(4):
    ax = axs[i]
    ndays = [1,3,5,11][i]     # change the numbers of days to compare different accumulations
    
    ts_nday = ts.rolling(time = ndays, center = False).mean()
    
    for y in np.unique(ts.time.dt.year):
        ts_y = ts_nday.sel(time = str(y))
        ax.plot(ts_y.time.dt.dayofyear, ts_y, color = "tab:blue", alpha = 0.3)
        
    ax.set_xticks(labelticks)
    ax.set_xticklabels(labels)
    [ax.axvline(i, alpha = 0.1) for i in range(367) if (days[i].day) == 1]
    
    ax.set_title(str(ndays)+"-day average")
    
    # # add a rectangle shading the time period of interest (get & set ylims to avoid resizing the plot)
    # ylim = ax.get_ylim()
    # ax.add_patch(Rectangle((pr_ts.sel(time = "2023-07-01").time.dt.dayofyear.values,-5), 20, 300, color = "gold", alpha = 0.3)) # 20-day period starting from July 1st
    # ax.set_ylim(ylim)
    
    # add this year in a different colour
    ts_y = ts_nday.sel(time = "2024")
    ax.plot(ts_y.time.dt.dayofyear, ts_y, color = "k") 

# Computing the final time series

In [14]:
# compute the selected n-day average
ts_nday = ts.rolling(time = 1, center = False).mean()

# choose which months to include in the analysis
ts_nday = ts_nday.sel(time = ts_nday.time.dt.month.isin([9]))

# get the annual maximum
ts_annual = ts_nday.resample(time = "YS-JAN").max()

# relabel dates as years
ts_annual = ts_annual.assign_coords(time = ts_annual.time.dt.year).rename(time = "year")

In [15]:
# save as a .csv to import into R for further analysis
ts_annual.to_dataframe().to_csv(my_dir+"ts_era5.csv")

In [None]:
# quick plot - what does the time series look like?
ts_annual.plot(drawstyle = "steps-mid", color = "k")

# Next steps

Repeat for more observational datasets
- CPC (global, temperature & precipitation)
- E-Obs (Europe, temperature & precipitation)
- CHIRPS (Global up to 60N, precipitation)
- You may be able to find other datasets for specific regions

Do all datasets show similar trends?

**Next: load the data in R and do some statistical analysis**