# Import modules and define functions

In [1]:
import glob
import holoviews as hv
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from pandas import Series, DataFrame, Panel 
import xarray as xr 

Calculates daily climatology with a 15-day rolling window 

In [2]:
def daily_climatology (tmax):
    
    rolling_mean_dailytmax = tmax.rolling(time=15, center=True).mean()
    
    climatology = rolling_mean_dailytmax.groupby('time.dayofyear').mean('time')
    
    return climatology 

Calculates daily climatology of 90th percentile with a 15-day rolling window 

In [3]:
def climatology_90 (tmax):
    rolling_mean_dailytmax = tmax.rolling(time=15, center=True).mean()
    percentile_90 = rolling_mean_dailytmax.groupby('time.dayofyear').quantile(0.9)
    
    return percentile_90

Identifies consecutive days on which the daily tmax exceeded the threshold 

In [4]:
def consecutive(data, stepsize=1):
    return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)

Filters out events that lasted for a minimum of 3 days and returns day of the year and the duration of an event #doesn't work as expected 

In [5]:
def find_heat(x):
    if x.size >= 3:
        return x[0], x.size

In [6]:
def duration (events):
    return events[0], events.size   #problem with this function is that there is no iteration 

In [7]:
def duration2 (events):
    return [(x[0], x.size) for x in events]

# Opening yearly ERA5 surface Tmax files and dropped leap days

In [8]:
files = sorted(glob.glob('/g/data/e14/cp3790/Charuni/ERA5-new/era5_dailytmax_*.nc'))

#Slice the data for single lat lon in NSW
era5_dailytmax_aus = xr.open_mfdataset(files, combine='by_coords').sel(time=slice('1982-12-25', '2013-01-07'), longitude=141.25, latitude=-33.75).load()
daily_tmax = era5_dailytmax_aus["dmax"].sel(time=~((era5_dailytmax_aus["dmax"].time.dt.month == 2) & (era5_dailytmax_aus["dmax"].time.dt.day == 29)))
daily_tmax.attrs['units'] = 'deg C'

# Code

## For single year

Calculate daily mean climatology and 90th percentile 

In [9]:
daily_mean = daily_climatology (daily_tmax)
threshold = climatology_90 (daily_tmax)

Select one year (2010 in this case) and compare each day of the year against the corresponding threshold value and return boolean 1s and 0s.

The days on which the daily tmax exceeded the threshold are then filtered out.

Consecutive days are identified 

In [10]:
year_2010 = daily_tmax.sel(time='2010')
heatwave_days = (year_2010.groupby('time.dayofyear') > threshold).astype(int)
heatwave_events = np.where(heatwave_days==1)
counters = consecutive(heatwave_events[0].tolist())

Events that last for at least 3 consecutive days are filtered and assigned to new array 'hottest' and gives us the start day of event and the number of days 

In [11]:
hottest_single = [(x[0], x.size) for x in counters if x.size >= 3]

In [12]:
hottest_single

[(6, 6),
 (19, 3),
 (29, 4),
 (37, 5),
 (47, 5),
 (55, 3),
 (73, 7),
 (83, 4),
 (106, 7),
 (166, 5),
 (275, 4),
 (282, 3),
 (312, 4),
 (337, 4),
 (362, 3)]

## For multiple years 

In [18]:
multiple_years = daily_tmax.sel(time=slice('2010', '2012'))