# Import required packages

In [None]:
import xarray as xr                                                  # handles gridded data
import numpy as np                                                   # numerical functions etc
import pandas as pd                                                  # handles tables of data (.csv, spreadsheets etc)
import re                                                            # allows manipulation of text strings

import matplotlib                                                    # plotting functions generally
import matplotlib.pyplot as plt                                      # specific plotting function we'll use a lot, so we'll give it a shorter name
from matplotlib.patches import Rectangle                             # specific plotting function with an annoyingly long name

import statsmodels.nonparametric.smoothers_lowess as sm_lowess       # nonparametric smoother

import cartopy                                                       # plotting geographical features
import regionmask                                                    # convert shapefile to binary 0,1 mask
import geopandas as gpd                                              # shapefiles

from geopy.geocoders import Nominatim                                # get lat & lon coordinates from a place name
from dateutil.relativedelta import relativedelta                     # calculate differences in times (needed to decode weird timestamps)

# filter out any annoying user warnings
import warnings
warnings.filterwarnings("ignore", category = UserWarning)

# you might get an error like 'ERROR 1: PROJ: proj_create_from_database: Open of /rds/general/user/cb2714/home/anaconda3/envs/wwa/share/proj failed' - you can ignore this!

# Useful functions

In [None]:
def wrap_lon(ds):
    
    # 'wrap' longitudes from (0,360) to (-180,180) & sort into ascending order
    
    if "longitude" in ds.coords:
        lon = "longitude"
        lat = "latitude"
    elif "lon" in ds.coords:
        lon = "lon"
        lat = "lat"
    else: 
        # can only wrap longitude
        return ds
    
    if ds[lon].max() > 180:
        ds[lon] = (ds[lon].dims, (((ds[lon].values + 180) % 360) - 180), ds[lon].attrs)
        
    if lon in ds.dims:
        ds = ds.reindex({ lon : np.sort(ds[lon]) })
        ds = ds.reindex({ lat : np.sort(ds[lat]) })
    return ds

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def decode_times(ts):
    
    # manually decode times
    
    inc = re.sub(" .+", "", ts.time.units)
    startdate = pd.Timestamp(re.sub(".+since ", "", ts.time.units)+' 00:00:00.000000').to_pydatetime()
    
    if inc == "years":
        new_times = [np.datetime64(startdate + relativedelta(years = i)) for i in ts.time.values]
    elif inc == "months":
        new_times = [np.datetime64(startdate + relativedelta(months = i)) for i in ts.time.values]
    elif inc == "days":
        new_times = [np.datetime64(startdate + relativedelta(days = i)) for i in ts.time.values]
    elif inc == "hours":
        new_times = [np.datetime64(startdate + relativedelta(hours = i)) for i in ts.time.values]
    else:
        print("TBD: " +inc)
        return
        
    ts = ts.assign_coords(time = new_times)
    
    return ts

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get_latlon(city):
    
    # retrieve lat & lon for given place name
    location = Nominatim(user_agent="GetLoc").geocode(city)
    if location is None:
        return {"lon" : None, "lat" : None}
    else:
        return {"lon" : location.longitude, "lat" : location.latitude}
        
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def doy_axes(ax = None):

    # make nicer axis labels when you're plotting against day of year
    if ax is None: ax = plt.gca()
    
    # define axis labels for DOY plotting
    days = pd.date_range(start = "2020-01-01", end = "2020-12-31")
    lineticks = [i+1 for i in range(366) if (days[i].day) == 1]
    labelticks = [i+1 for i in range(366) if (days[i].day) == 16]
    labels = [days[i].strftime("%b") for i in range(366) if (days[i].day) == 16]

    # actually draw the axes & gridlines
    ax.set_xticks(labelticks)
    ax.set_xticklabels(labels)
    [ax.axvline(x, color = "k", alpha = 0.1, lw = 1) for x in lineticks]
    ax.set_xlabel("")

# Exercises with gridded data

In [None]:
# load the data, call it 'ds'
ds = wrap_lon(xr.open_dataset("era5_tp_daily_eu_extended_10.7-25E_46-52.3N_1950-2024_su.nc"))

# select just the 'total precipitation' variable (tp)
ds = ds.tp

## Making a map for a single day

The `xarray` package can produce nice maps without much user input, but you can also customise them to show the features you're most interested in.  
- Modify the code below to show one of the days during Storm Boris in mid-September
- Choose a colour scheme and range of values that highlights the severity of the event
- Modify the label on the scale bar to say 'Precipitation (mm/day)'.

In [None]:
# select a single day of data to plot
ds_event = ds.sel(time = "2024-09-01")

# get the max & min lat & lon so we can fix the plotting area
data_area = (ds.lon.min(), ds.lon.max(), ds.lat.min(), ds.lat.max())

# MAKE THE PLOT
fig = ds_event.plot(cmap = "YlGnBu",                                                       # colour scheme to use for plotting
                    vmin = 0, vmax = 100,                                                  # set min & max values for colour scale
                    subplot_kws = {"projection" : cartopy.crs.PlateCarree(),               # tell it which projection to use
                                   "extent" : data_area                                    # only plot the area covered by the data
                                  },     
                    cbar_kwargs = {"location" : "bottom",                                  # move the colourbar (scale bar) to the bottom
                                   "label" : "precipitation (mmday)"})                     # relabel the colourbar

# Add some extra details to the figure
ax = fig.axes                                                                              # give a name to the figure axes so that we can add extra elements
ax.add_feature(cartopy.feature.BORDERS, ls = "--", alpha = 0.6)                            # add national borders
ax.gridlines(color = "k", alpha = 0.1, draw_labels = ["bottom", "left"])                   # add gridlines
ax.coastlines(color = "k")                                                                 # add coastlines

## Plotting several days 

We've been looking at Storm Boris but there was also extensive flooding in the same area in July 1997.  
- Modify the code to identify the 4 wettest days in July 1997 and customise the plot as you did above.  
- Modify the code to show 4 panels on each row, not 3
- Give each panel a title
- If you're feeling brave, try resizing the colourbar by modifying the 'fraction' and 'pad' values in cbar_kwargs

In [None]:
# select a range of dates to plot (not too many or the plots will be tiny!)
ds_event = ds.sel(time = slice("2024-09-12", "2024-09-16"))

# get the extent of the data (this will help to avoid messy plots when we add geographic features)
data_area = (ds.lon.min(), ds.lon.max(), ds.lat.min(), ds.lat.max())

# MAKE THE PLOT
fig = ds_event.plot(col = "time",                                       # each panel should show a different time
                    col_wrap = 3,                                       # number of panels to show per row
                    cmap = "YlGnBu",                                    # colour scheme to use for plotting
                    vmin = 0, vmax = 100,                               # set min & max values for colour scale
                    # specify the projection for each panel
                    subplot_kws = {
                        "projection" : cartopy.crs.PlateCarree(),       # map projection so that we can add geographic features
                        "extent" : data_area                            # only plot the area covered by the data
                    },     
                    # specify how the colour bar should look
                    cbar_kwargs = {
                        "location" : "bottom",                          # move the colourbar (scale bar) to the bottom
                        "label" : "Precipitation (mm/day)",             # relabel the colourbar
                        "fraction" : 0.025,                             # how big the colourbar should be
                        "pad" : 0.1                                     # how much space between the colourbar and the panels
                    })            

# this time, we have to add the borders and gridlines to each panel individually
for ax in fig.axs.flat:
    ax.add_feature(cartopy.feature.BORDERS, ls = "--", alpha = 0.6)                # add national borders
    ax.gridlines(color = "k", alpha = 0.1)                                         # add gridlines but don't draw the labels
    ax.coastlines(color = "k")                                                     # add coastlines
    
for ax in fig.axs[:,0]: ax.gridlines(alpha = 0, draw_labels = ["left"])            # y-labels on leftmost column
for ax in fig.axs[-1,:]: ax.gridlines(alpha = 0, draw_labels = ["bottom"])         # x-labels on the bottom row 

# manually set the title of each panel (make sure you make as many titles as there are panels!)
titles = ["Day 1", "Day 2", "Day 3", "Day 4", "Day 5"]
for ax, title in zip(fig.axs.flat, titles):
    ax.set_title(title)

## Plotting an anomaly

We might also want to see how the event compares to the average rainfall during the wet season (approx. May-September in this region).  
- Modify the definition of 'ds_rainyseason' to include all months from May-September
- Change the climatology period to 1990-2020


In [None]:
# select just the months of the rainy season
ds_rainyseason = ds.sel(time = ds.time.dt.month.isin([6,7,8]))

# calculate the mean daily rainfall from 1990-2020
ds_clim = ds_rainyseason.sel(time = slice("1980", "2010")).mean("time")

# calculate the daily anomalies with respect to this climatology
ds_anom = ds - ds_clim

Now plot the anomalies during Storm Boris (you'll need to customise the plots as before)

- Change the colourmap ('cmap') to one centred at zero (['diverging colourmaps'](https://matplotlib.org/stable/users/explain/colors/colormaps.html#diverging))
- Change vmin and vmax so that the scale is centred at zero
- Change the panel titles

In [None]:
# select a range of dates to plot (not too many or the plots will be tiny!)
ds_event = ds_anom.sel(time = slice("2024-09-12", "2024-09-16"))

# get the extent of the data (this will help to avoid messy plots when we add geographic features)
data_area = (ds.lon.min(), ds.lon.max(), ds.lat.min(), ds.lat.max())

# MAKE THE PLOT
fig = ds_event.plot(col = "time",                                       # each panel should show a different time
                    col_wrap = 3,                                       # number of panels to show per row
                    cmap = "YlGnBu",                                    # colour scheme to use for plotting
                    vmin = 0, vmax = 100,                               # set min & max values for colour scale
                    # specify the projection for each panel
                    subplot_kws = {
                        "projection" : cartopy.crs.PlateCarree(),       # map projection so that we can add geographic features
                        "extent" : data_area                            # only plot the area covered by the data
                    },     
                    # specify how the colour bar should look
                    cbar_kwargs = {
                        "location" : "bottom",                          # move the colourbar (scale bar) to the bottom
                        "label" : "Precipitation (mm/day)",             # relabel the colourbar
                        "fraction" : 0.025,                             # how big the colourbar should be
                        "pad" : 0.1                                     # how much space between the colourbar and the panels
                    })            

# this time, we have to add the borders and gridlines to each panel individually
for ax in fig.axs.flat:
    ax.add_feature(cartopy.feature.BORDERS, ls = "--", alpha = 0.6)                # add national borders
    ax.gridlines(color = "k", alpha = 0.1)                                         # add gridlines but don't draw the labels
    ax.coastlines(color = "k")                                                     # add coastlines
    
for ax in fig.axs[:,0]: ax.gridlines(alpha = 0, draw_labels = ["left"])            # y-labels on leftmost column
for ax in fig.axs[-1,:]: ax.gridlines(alpha = 0, draw_labels = ["bottom"])         # x-labels on the bottom row 

# manually set the title of each panel (make sure you make as many titles as there are panels!)
titles = ["Day 1", "Day 2", "Day 3", "Day 4", "Day 5"]
for ax, title in zip(fig.axs.flat, titles):
    ax.set_title(title)

Now make a copy of the cell above and do the same for the 1997 storm

## Get the daily mean over a rectangular region

To choose an area, redraw the plots from above, including the box
- (optional) add some more place names where impacts were reported (or just towns/cities in the region)
- move and resize the box until it captures the impacts and/or worst rainfall during the storm

In [None]:
# make a list of the locations where impacts were reported 
affected_sites = ["Vienna", "Graz", "Glucholazy", "Litovel", "Lower Austria"]

px = pd.DataFrame([get_latlon(s) for s in affected_sites])         # get the coordinates and turn into a table

In [None]:
# set the region we want to draw a box around
xn,xx,yn,yx = [14,18,47,51]
# xn,xx,yn,yx = [20,24,50,52]

# select a range of dates to plot (not too many or the plots will be tiny!)
ds_event = ds.sel(time = slice("2024-09-12", "2024-09-16"))

# get the extent of the data (this will help to avoid messy plots when we add geographic features)
data_area = (ds.lon.min(), ds.lon.max(), ds.lat.min(), ds.lat.max())

# MAKE THE PLOT
fig = ds_event.plot(col = "time",                                       # each panel should show a different time
                    col_wrap = 3,                                       # number of panels to show per row
                    cmap = "YlGnBu",                                    # colour scheme to use for plotting
                    vmin = 0, vmax = 100,                               # set min & max values for colour scale
                    # specify the projection for each panel
                    subplot_kws = {
                        "projection" : cartopy.crs.PlateCarree(),       # map projection so that we can add geographic features
                        "extent" : data_area                            # only plot the area covered by the data
                    },     
                    # specify how the colour bar should look
                    cbar_kwargs = {
                        "location" : "bottom",                          # move the colourbar (scale bar) to the bottom
                        "label" : "Precipitation (mm/day)",             # relabel the colourbar
                        "fraction" : 0.025,                             # how big the colourbar should be
                        "pad" : 0.1                                     # how much space between the colourbar and the panels
                    })            

# this time, we have to add the borders and gridlines to each panel individually
for ax in fig.axs.flat:
    ax.add_feature(cartopy.feature.BORDERS, ls = "--", alpha = 0.6)                          # add national borders
    ax.gridlines(color = "k", alpha = 0.1)                                                   # add gridlines but don't draw the labels
    ax.coastlines(color = "k")                                                               # add coastlines
    ax.plot([xn,xn,xx,xx,xn], [yn,yx,yx,yn,yn], color = "darkred", lw = 2, alpha = 0.5)      # draw the box
    
for ax in fig.axs[:,0]: ax.gridlines(alpha = 0, draw_labels = ["left"])                      # y-labels on leftmost column
for ax in fig.axs[-1,:]: ax.gridlines(alpha = 0, draw_labels = ["bottom"])                   # x-labels on the bottom row 

Once you're happy with the selected region, get the daily mean and save as a separate file

In [None]:
# extract the data over this smaller region only
ds_box = ds.sel(lon = slice(xn,xx), lat = slice(yn,yx))

# compute spatial average
ts = ds_box.mean(["lat", "lon"])

# save the data we've extracted
ts.to_netcdf("timeseries_box.nc")

# Exercises with time series

In [None]:
# load the time series data and select the 'tp' variable
ts = xr.open_dataset("timeseries_box.nc").tp

## Plot the seasonal cycle
First, it's useful to check the seasonal cycle

- Change `sm_days` to calculate a 15-day smoothed seasonal cycle
- Change the climatology period to 1990-2020
- Give the plot a title

In [None]:
# first smooth the daily time series (mainly for precipitation - for temperatures you could use a shorter window, or even no smoothing, by setting sm_days = 1)
sm_days = 1
ts_sm = ts.rolling(time = sm_days, center = True).mean()

ts_clim = ts_sm.sel(time = slice("1980", "2010"))                   # select the time period to compute the climatology
mean_by_doy = ts_clim.groupby("time.dayofyear").mean()              # then average over each day of the year and plot

# plot the mean & quantiles
mean_by_doy.plot()

# add nicer axis labels and title
ax = plt.gca()                                            # give a name to the axis so that we can add more elements to it below
doy_axes()                                                # add nicer labels using user-defined function
ax.set_title("")           # give a more meaningful title
ax.set_ylabel("Precipitation (mm/day)")                   # give proper units to y-axis

## Compare 2024 to previous years
Now we check how unusual the n-day average was, compared to previous years
- We start by checking the daily data. Check whether the 3-day, 5-day and 9-day rolling mean were also unusual.
- Make the previous years' rainfall more transparent by reducing `alpha`
- Instead of a rolling mean, calculate the rolling sum. What difference does this make?

In [None]:
# first, take the rolling average of the data (set ndays to 1 to see daily)
ndays = 1
ts_nday = ts.rolling(time = ndays, center = False).mean()

# plot just this year to start with
ts_y = ts_nday.sel(time = "2024")
plt.plot(ts_y.time.dt.dayofyear, ts_y, color = "k", zorder = 9)       # 'zorder' sets the order the elements appear in the plot - higher zorders are on top of lower zorders

# add the climatological mean for reference
ts_nday_mean = ts_nday.sel(time = slice("1990", "2020")).groupby("time.dayofyear").mean()
plt.plot(ts_nday_mean.dayofyear, ts_nday_mean, color = "darkblue", alpha = 0.7, zorder = 5)

# loop over the other years & plot the annual time series
for y in np.unique(ts_nday.time.dt.year):
    ts_y = ts_nday.sel(time = str(y))
    plt.plot(ts_y.time.dt.dayofyear, ts_y, color = "tab:blue", alpha = 1)
    
# add nicer axis labels and title
ax = plt.gca()                                            # give a name to the axis so that we can add more elements to it below
doy_axes()                                                # add nicer labels using user-defined function
ax.set_title(str(ndays)+"-day precipitation")             # give a more meaningful title
ax.set_ylabel("Precipitation (mm/day)")                   # give proper units to y-axis

## Annual maximum over selected months
If the event is unusual partly because of the time of year when it occurred, then you might want to look at only a specific month (or months)
- Make a series of the 4-day rolling mean
- Modify the definition of 'ts_rainyseason' to include all months from May-September
- Change the 5-year running mean to a 15-year running mean - which is easier to 

In [None]:
# first, take the rolling average of the data
ndays = 1
ts_nday = ts.rolling(time = ndays, center = False).mean()

# first we choose just the months we're interested in - for example, we might only want to look at the wet season
ts_rainyseason = ts_nday.sel(time = ts_nday.time.dt.month.isin([7,8]))

# then take the annual maximum
ts_max = ts_rainyseason.resample(time = "YS-JAN").max()

When we plot the data it's useful to include a running mean so that we can more clearly see trends over time
- Change the 5-year running mean to a 15-year running mean - which is easier to read?
- Change the colour of the running mean to dark blue ([available colours](https://matplotlib.org/stable/gallery/color/named_colors.html#css-colors))
- Change the line width of the running mean to 2
- Change the label on the running mean to '15-year running mean'

In [None]:
# compute the running mean of the annual datas
ts_sm = ts_max.rolling(time = 5, center = True).mean()

# plot the annual data with the running mean
ts_max.plot(drawstyle = "steps-mid")
ts_sm.plot(color = "tab:blue", lw = 1, label = "running mean")

plt.legend()

## Plot a linear trend dependent on GMST

To say anything about the effect of climate change, we have to estimate the strength of the relationship between GMST and our data

In [None]:
# first, load the GMST time series and select just the period we're interested in
gmst = decode_times(xr.open_dataset("igiss_al_gl_a_4yrlo.nc", decode_times = False)).Ta

# select the same years that are in our annual time series
gmst = gmst.sel(time = gmst.time.dt.year.isin(ts_max.time.dt.year))

# fit a linear trend to GMST by replacing time with gmst values
gmst_coeffs = ts_max.assign_coords(time = gmst.values).polyfit("time", deg = 1).polyfit_coefficients

# use the estimated coefficients to get the expected temperature in each year
gmst_fitted = xr.polyval(gmst, gmst_coeffs)

# print the change in annual maxima for each 1 degree of global warming
print("Change per 1deg GMST increase: ", gmst_coeffs.sel(degree = 1).round(2).values)

It's useful to compare this to a nonparametric smoother: if the GMST trend is similar to the nonparametric trend, it's likely that GMST is having an effect
- Try changing `frac` (the fraction of data included in the smoothing) to understand how the value affects the wiggliness of the line

In [None]:
# get the smoothed values: we want to smooth the values over time
ts_np = sm_lowess.lowess(ts_max, ts_max.time.dt.year, frac = 2/3)    # 'frac' determines the fraction of the data used to smooth each point - smaller frac means wigglier line (default is 2/3)

# turn the smoothed values back into a dataarray so they're easier to plot
ts_np = xr.DataArray(data = ts_np[:,1], dims = ["time"], name = "z", coords = dict(ts_max.coords))

Now we can plot the data with the GMST trend and the nonparametric trend
- Change the colour of the nonparametric smoother or the GMST trend, so that the contrast between the two is stronger
- Do you think the GMST trend fits the data well in this case?

In [None]:
# plot the fitted trend against the obs to see if it makes sense (also useful to compare to the nonparametric smoother to see if they look similar)
ts_max.plot(drawstyle = "steps-mid")
ts_np.plot(color = "darkgreen", lw = 2, label = "Nonparametric smoother")
gmst_fitted.plot(color = "k", lw = 2, label = "GMST trend")

plt.legend()

**Now make a copy of this notebook, download some new data from the Climate Explorer, and try to come up with some useful visualisations!**