## Event based

In [43]:
from cartopy import config
import cartopy
import cartopy.crs as ccrs
import climtas
import dask.array
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from scipy import stats
from scipy.stats import ttest_ind 
import seaborn as sns
import sparse
import xarray as xr

In [2]:
#Point of interest for heatwave events
lat = -23.25
lon = 113.5

In [3]:
# opening the input files with heatwave severity data 
thw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-coastal-sev.nc').sel(latitude=lat, longitude=lon)
mhw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-ocean-sev-2.nc').sel(latitude=lat, longitude=lon)

In [4]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 3 consecutive days 
thw_events = climtas.event.find_events(thw.severity > 1, min_duration = 3)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [9]:
# Create a sparse array with (coords, values, shape)
event_duration_array = sparse.COO(thw_events['time'], thw_events['event_duration'], shape=thw.severity.shape)
event_duration_da = xr.DataArray(event_duration_array, coords=thw.severity.coords)
event_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,171
Density,0.012653544472398994
Read-only,True
Size,2.7K
Storage ratio,0.0


In [10]:
def get_coords(da, events):
    # Convert the index values to coordinates
    coords = {}
    for d in da.dims:
        coords[d] = da[d].values[events[d].values]
    
    # Also work out when the event ends
    coords['time_end'] = da['time'].values[events['time'].values + events['event_duration'].values-1]
    #coords['event_duration'] = coords['time_end'] - coords['time'] 
    coords['event_duration'] = events['event_duration'].values
    
    return pd.DataFrame(coords, index=events.index)

In [11]:
thw_new_H = get_coords(event_duration_da, thw_events)
thw_new_H

Unnamed: 0,time,time_end,event_duration
0,1982-12-06,1982-12-08,3
1,1983-06-07,1983-06-11,5
2,1983-08-07,1983-08-10,4
3,1983-09-16,1983-09-21,6
4,1983-10-22,1983-10-25,4
...,...,...,...
166,2018-01-02,2018-01-06,5
167,2018-01-12,2018-01-14,3
168,2018-03-07,2018-03-11,5
169,2018-05-14,2018-05-16,3


In [12]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 5 consecutive days (MHW events)
mhw_events = climtas.event.find_events(mhw.severity > 1, min_duration = 5)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [14]:
mhw_duration_array = sparse.COO(mhw_events['time'], mhw_events['event_duration'], shape=mhw.severity.shape)
mhw_duration_da = xr.DataArray(event_duration_array, coords=mhw.severity.coords)
mhw_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,171
Density,0.012653544472398994
Read-only,True
Size,2.7K
Storage ratio,0.0


In [22]:
mhw_new = get_coords(mhw_duration_da, mhw_events)
mhw_new

Unnamed: 0,time,time_end,event_duration
0,1982-03-15,1982-03-20,6
1,1982-04-16,1982-04-20,5
2,1982-04-28,1982-05-10,13
3,1982-08-07,1982-08-11,5
4,1982-08-15,1982-08-19,5
...,...,...,...
96,2015-09-15,2015-10-17,33
97,2015-10-21,2015-10-30,10
98,2016-07-17,2016-07-21,5
99,2017-02-28,2017-03-06,7


In [23]:
mhw_new.rename(columns={"time": "time_mhw", "time_end": "time_end_mhw", "event_duration": "event_duration_mhw"}, inplace=True)
mhw_new

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1982-03-15,1982-03-20,6
1,1982-04-16,1982-04-20,5
2,1982-04-28,1982-05-10,13
3,1982-08-07,1982-08-11,5
4,1982-08-15,1982-08-19,5
...,...,...,...
96,2015-09-15,2015-10-17,33
97,2015-10-21,2015-10-30,10
98,2016-07-17,2016-07-21,5
99,2017-02-28,2017-03-06,7


In [24]:
# concatenating the THW and MHW dataframes 
df_merge_col = pd.concat([thw_new_H, mhw_new], axis=1)
df_merge_col

Unnamed: 0,time,time_end,event_duration,time_mhw,time_end_mhw,event_duration_mhw
0,1982-12-06,1982-12-08,3,1982-03-15,1982-03-20,6.0
1,1983-06-07,1983-06-11,5,1982-04-16,1982-04-20,5.0
2,1983-08-07,1983-08-10,4,1982-04-28,1982-05-10,13.0
3,1983-09-16,1983-09-21,6,1982-08-07,1982-08-11,5.0
4,1983-10-22,1983-10-25,4,1982-08-15,1982-08-19,5.0
...,...,...,...,...,...,...
166,2018-01-02,2018-01-06,5,NaT,NaT,
167,2018-01-12,2018-01-14,3,NaT,NaT,
168,2018-03-07,2018-03-11,5,NaT,NaT,
169,2018-05-14,2018-05-16,3,NaT,NaT,


In [30]:
#Calculating the overlap between THW and MHW events 
from datetime import datetime
from collections import namedtuple
Range = namedtuple('Range', ['start', 'end'])
overlap = []
for n in range (df_merge_col.shape[0]):
    mhw = Range(start=df_merge_col['time_mhw'].iloc[n], end=df_merge_col['time_end_mhw'].iloc[n])
    thw = Range(start=df_merge_col['time'].iloc[n], end=df_merge_col['time_end'].iloc[n])
    latest_start = max(mhw.start, thw.start)
    earliest_end = min(mhw.end, thw.end)
    delta = (earliest_end - latest_start).days + 1
    b = max(0, delta)
    overlap.append(b)

In [32]:
mod_fd = df_merge_col.assign(overlap_days = overlap)
mod_fd

Unnamed: 0,time,time_end,event_duration,time_mhw,time_end_mhw,event_duration_mhw,overlap_days
0,1982-12-06,1982-12-08,3,1982-03-15,1982-03-20,6.0,0
1,1983-06-07,1983-06-11,5,1982-04-16,1982-04-20,5.0,0
2,1983-08-07,1983-08-10,4,1982-04-28,1982-05-10,13.0,0
3,1983-09-16,1983-09-21,6,1982-08-07,1982-08-11,5.0,0
4,1983-10-22,1983-10-25,4,1982-08-15,1982-08-19,5.0,0
...,...,...,...,...,...,...,...
166,2018-01-02,2018-01-06,5,NaT,NaT,,0
167,2018-01-12,2018-01-14,3,NaT,NaT,,0
168,2018-03-07,2018-03-11,5,NaT,NaT,,0
169,2018-05-14,2018-05-16,3,NaT,NaT,,0


In [84]:
#co-occurring events is where there is a minimum overlap of 1 day between THW and MHW events
co_events = mod_fd[mod_fd.overlap_days != 0].reset_index()
co_events

Unnamed: 0,index,time,time_end,event_duration,time_mhw,time_end_mhw,event_duration_mhw,overlap_days
0,15,1985-02-26,1985-03-04,7,1985-02-12,1985-03-04,21.0,7


In [85]:
del co_events['time_mhw']
del co_events['time_end_mhw']
del co_events['overlap_days']
del co_events['event_duration_mhw']

co_events

Unnamed: 0,index,time,time_end,event_duration
0,15,1985-02-26,1985-03-04,7


In [86]:
#Changing time format to days so that it can be aligned with the mslp data array 

co_events.time = int(((co_events.time - np.datetime64('1982-01-01T00:00:00Z')) / np.timedelta64(1, 'h'))/24)
co_events.time_end = int(((co_events.time_end - np.datetime64('1982-01-01T00:00:00Z')) / np.timedelta64(1, 'h'))/24)
co_events

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


Unnamed: 0,index,time,time_end,event_duration
0,15,1152,1158,7


In [35]:
ds = xr.open_mfdataset('/g/data/e14/cp3790/Charuni/ERA5-MSL/era5_dailymsl_*.nc').sel(longitude=slice(90, 180), latitude=slice(0, -60))
mslp = ds['dmsl']/100
mslp.attrs['units'] = 'hPa'
mslp = mslp.chunk({'time':-1, 'latitude': 1, 'longitude':1})
mslp.load()

In [75]:
mslp_stack = mslp.stack(cell=('latitude', 'longitude'))

In [76]:
# Creating a new data array into which I subsequently save the mslp values
comp = xr.DataArray(np.random.randint(0, 100, size=(86760)),dims=["ncell"], coords=[mslp_stack.cell])

In [79]:
for x in mslp_stack.cell:
    mslp_ = mslp_stack.sel(cell=x)
    mslp_composite = climtas.event.map_events(mslp_, co_events, lambda x: x.mean().values) #this gives the means of individual events at a grid cell
    event_mean_mslp = mslp_composite.mean() #this gives the mean of all means 
    comp.loc[{"ncell":x}] = event_mean_mslp
TypeError: 'numpy.float64' object cannot be interpreted as an integer


TypeError: 'numpy.float64' object cannot be interpreted as an integer