# Event based

## Import packages 

In [1]:
from cartopy import config
import cartopy
import cartopy.crs as ccrs
import climtas
import cmocean
from collections import namedtuple
import dask.array
from datetime import datetime
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import scipy.stats
from scipy import stats
from scipy.stats import ttest_ind 
import seaborn as sns
import sparse
import xarray as xr

## Identifying heatwave events

In [2]:
#Point of interest for heatwave events in Hobart
lat = -43.00
lon = 148.25

In [3]:
# opening the input files with heatwave severity data 
thw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-coastal-sev.nc').sel(latitude=lat, longitude=lon, method='nearest')
mhw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-ocean-sev-2.nc').sel(latitude=lat, longitude=lon, method='nearest')

In [4]:
oisst = xr.open_dataset('/g/data/e14/cp3790/Charuni/NOAA-OISST/oisst_aus_time.nc').sel(lat=lat, lon=lon, method='nearest')

In [5]:
sst = oisst.sst

In [6]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 3 consecutive days 
thw_events = climtas.event.find_events(thw.severity > 1, min_duration = 3)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [7]:
# Create a sparse array with (coords, values, shape)
event_duration_array = sparse.COO(thw_events['time'], thw_events['event_duration'], shape=thw.severity.shape)
event_duration_da = xr.DataArray(event_duration_array, coords=thw.severity.coords)
event_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,161
Density,0.011913571111439989
Read-only,True
Size,2.5K
Storage ratio,0.0


In [8]:
def get_coords(da, events):
    # Convert the index values to coordinates
    coords = {}
    for d in da.dims:
        coords[d] = da[d].values[events[d].values]
    
    # Also work out when the event ends
    coords['time_end'] = da['time'].values[events['time'].values + events['event_duration'].values-1]
    #coords['event_duration'] = coords['time_end'] - coords['time'] 
    coords['event_duration'] = events['event_duration'].values
    
    return pd.DataFrame(coords, index=events.index)

In [9]:
thw_new = get_coords(event_duration_da, thw_events)
thw_new

Unnamed: 0,time,time_end,event_duration
0,1982-04-13,1982-04-15,3
1,1982-08-07,1982-08-09,3
2,1982-08-27,1982-08-29,3
3,1983-03-04,1983-03-06,3
4,1983-08-14,1983-08-16,3
...,...,...,...
156,2018-02-06,2018-02-10,5
157,2018-09-13,2018-09-15,3
158,2018-10-14,2018-10-16,3
159,2018-12-15,2018-12-17,3


In [10]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 5 consecutive days (MHW events)
mhw_events = climtas.event.find_events(mhw.severity > 1, min_duration = 5)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [11]:
mhw_duration_array = sparse.COO(mhw_events['time'], mhw_events['event_duration'], shape=mhw.severity.shape)
mhw_duration_da = xr.DataArray(event_duration_array, coords=mhw.severity.coords)
mhw_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,161
Density,0.011913571111439989
Read-only,True
Size,2.5K
Storage ratio,0.0


In [12]:
mhw_new = get_coords(mhw_duration_da, mhw_events)
mhw_new

Unnamed: 0,time,time_end,event_duration
0,1982-10-31,1982-11-07,8
1,1983-03-23,1983-03-27,5
2,1985-11-20,1985-12-06,17
3,1985-12-10,1985-12-23,14
4,1987-05-20,1987-05-24,5
...,...,...,...
135,2018-08-11,2018-08-24,14
136,2018-10-15,2018-10-21,7
137,2018-10-26,2018-11-08,14
138,2018-11-25,2018-12-04,10


In [13]:
mhw_new.rename(columns={"time": "time_mhw", "time_end": "time_end_mhw", "event_duration": "event_duration_mhw"}, inplace=True)
mhw_new

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1982-10-31,1982-11-07,8
1,1983-03-23,1983-03-27,5
2,1985-11-20,1985-12-06,17
3,1985-12-10,1985-12-23,14
4,1987-05-20,1987-05-24,5
...,...,...,...
135,2018-08-11,2018-08-24,14
136,2018-10-15,2018-10-21,7
137,2018-10-26,2018-11-08,14
138,2018-11-25,2018-12-04,10


## Stand-alone MHW

In [16]:
# Finding the co-occurring MHWs first  

Range = namedtuple('Range', ['start', 'end'])
mhw_mod = pd.DataFrame(columns=['time', 'time_end', 'event_duration'])
for t in range (thw_new.shape[0]):
    for m in range(mhw_new.shape[0]):
        thw = Range(start=thw_new['time'].iloc[t], end=thw_new['time_end'].iloc[t])
        mhw = Range(start=mhw_new['time_mhw'].iloc[m], end=mhw_new['time_end_mhw'].iloc[m])
        if thw.start >= mhw.start and mhw.end >= thw.end:
            mhw_mod = mhw_mod.append([{'time':mhw_new['time_mhw'].iloc[m], 'time_end':mhw_new['time_end_mhw'].iloc[m], 'event_duration':mhw_new['event_duration_mhw'].iloc[m]}], ignore_index=True)

In [17]:
mhw_mod

Unnamed: 0,time,time_end,event_duration
0,1988-05-03,1988-07-03,62
1,1988-05-03,1988-07-03,62
2,1988-07-06,1988-07-17,12
3,1988-07-06,1988-07-17,12
4,1988-07-20,1988-08-08,20
...,...,...,...
66,2017-11-14,2018-01-11,59
67,2018-01-13,2018-02-16,35
68,2018-01-13,2018-02-16,35
69,2018-01-13,2018-02-16,35


In [18]:
co_events_mhw = mhw_mod.reset_index()
co_events_mhw

Unnamed: 0,index,time,time_end,event_duration
0,0,1988-05-03,1988-07-03,62
1,1,1988-05-03,1988-07-03,62
2,2,1988-07-06,1988-07-17,12
3,3,1988-07-06,1988-07-17,12
4,4,1988-07-20,1988-08-08,20
...,...,...,...,...
66,66,2017-11-14,2018-01-11,59
67,67,2018-01-13,2018-02-16,35
68,68,2018-01-13,2018-02-16,35
69,69,2018-01-13,2018-02-16,35


In [19]:
co_events_mhw = co_events_mhw.rename(columns={"time":"time_mhw", "time_end":"time_end_mhw", "event_duration":"event_duration_mhw"})
co_events_mhw

Unnamed: 0,index,time_mhw,time_end_mhw,event_duration_mhw
0,0,1988-05-03,1988-07-03,62
1,1,1988-05-03,1988-07-03,62
2,2,1988-07-06,1988-07-17,12
3,3,1988-07-06,1988-07-17,12
4,4,1988-07-20,1988-08-08,20
...,...,...,...,...
66,66,2017-11-14,2018-01-11,59
67,67,2018-01-13,2018-02-16,35
68,68,2018-01-13,2018-02-16,35
69,69,2018-01-13,2018-02-16,35


In [20]:
del co_events_mhw['index']
co_events_mhw

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1988-05-03,1988-07-03,62
1,1988-05-03,1988-07-03,62
2,1988-07-06,1988-07-17,12
3,1988-07-06,1988-07-17,12
4,1988-07-20,1988-08-08,20
...,...,...,...
66,2017-11-14,2018-01-11,59
67,2018-01-13,2018-02-16,35
68,2018-01-13,2018-02-16,35
69,2018-01-13,2018-02-16,35


In [21]:
co_events_mhw['time'] = ((co_events_mhw.time_mhw - (pd.to_datetime('1982-1-1')))/np.timedelta64(1, 'D')).astype(int)
co_events_mhw

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw,time
0,1988-05-03,1988-07-03,62,2314
1,1988-05-03,1988-07-03,62,2314
2,1988-07-06,1988-07-17,12,2378
3,1988-07-06,1988-07-17,12,2378
4,1988-07-20,1988-08-08,20,2392
...,...,...,...,...
66,2017-11-14,2018-01-11,59,13101
67,2018-01-13,2018-02-16,35,13161
68,2018-01-13,2018-02-16,35,13161
69,2018-01-13,2018-02-16,35,13161


In [22]:
co_events_mhw = co_events_mhw.rename(columns={"event_duration_mhw":"event_duration"})
del co_events_mhw['time_mhw']
del co_events_mhw['time_end_mhw']
co_events_mhw

Unnamed: 0,event_duration,time
0,62,2314
1,62,2314
2,12,2378
3,12,2378
4,20,2392
...,...,...
66,59,13101
67,35,13161
68,35,13161
69,35,13161


In [23]:
co_events_mhw = co_events_mhw[['time', 'event_duration']]
co_events_mhw

Unnamed: 0,time,event_duration
0,2314,62
1,2314,62
2,2378,12
3,2378,12
4,2392,20
...,...,...
66,13101,59
67,13161,35
68,13161,35
69,13161,35


In [24]:
co_max_sst = climtas.event.map_events(sst, co_events_mhw, lambda x: {'max_sst': x.max().item(), 'time_peak_mhw': x.idxmax().values})
co_max_sst

Unnamed: 0,max_sst,time_peak_mhw
0,17.410000,1988-05-13 12:00:00
1,17.410000,1988-05-13 12:00:00
2,14.200000,1988-07-12 12:00:00
3,14.200000,1988-07-12 12:00:00
4,14.170000,1988-07-23 12:00:00
...,...,...
66,18.670000,2017-12-29 12:00:00
67,20.039999,2018-02-07 12:00:00
68,20.039999,2018-02-07 12:00:00
69,20.039999,2018-02-07 12:00:00


In [25]:
co_max_sst['time_peak_mhw'] = co_max_sst['time_peak_mhw'].dt.date.astype('datetime64')
co_max_sst

Unnamed: 0,max_sst,time_peak_mhw
0,17.410000,1988-05-13
1,17.410000,1988-05-13
2,14.200000,1988-07-12
3,14.200000,1988-07-12
4,14.170000,1988-07-23
...,...,...
66,18.670000,2017-12-29
67,20.039999,2018-02-07
68,20.039999,2018-02-07
69,20.039999,2018-02-07


In [33]:
co_events_mhw = mhw_mod.reset_index()
co_events_mhw = co_events_mhw.rename(columns={"time":"time_mhw", "time_end":"time_end_mhw", "event_duration":"event_duration_mhw"})
del co_events_mhw['index']

In [34]:
co_mhw_peak = co_events_mhw.join(co_max_sst)
co_mhw_peak

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw,max_sst,time_peak_mhw
0,1988-05-03,1988-07-03,62,17.410000,1988-05-13
1,1988-05-03,1988-07-03,62,17.410000,1988-05-13
2,1988-07-06,1988-07-17,12,14.200000,1988-07-12
3,1988-07-06,1988-07-17,12,14.200000,1988-07-12
4,1988-07-20,1988-08-08,20,14.170000,1988-07-23
...,...,...,...,...,...
66,2017-11-14,2018-01-11,59,18.670000,2017-12-29
67,2018-01-13,2018-02-16,35,20.039999,2018-02-07
68,2018-01-13,2018-02-16,35,20.039999,2018-02-07
69,2018-01-13,2018-02-16,35,20.039999,2018-02-07


In [35]:
co_mhw_peak = co_mhw_peak.drop_duplicates()
co_mhw_peak

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw,max_sst,time_peak_mhw
0,1988-05-03,1988-07-03,62,17.41,1988-05-13
2,1988-07-06,1988-07-17,12,14.2,1988-07-12
4,1988-07-20,1988-08-08,20,14.17,1988-07-23
5,1988-09-07,1988-09-18,12,13.45,1988-09-14
6,1999-09-26,1999-10-17,22,14.27,1999-10-09
7,2000-09-12,2000-09-19,8,14.54,2000-09-17
8,2000-11-23,2000-12-22,30,17.84,2000-12-18
10,2001-01-24,2001-02-12,20,19.549999,2001-02-03
12,2001-06-29,2001-08-17,50,15.0,2001-07-17
13,2001-08-26,2001-09-19,25,14.219999,2001-09-14


In [36]:
co_mhw_peak = co_mhw_peak.reset_index()
co_mhw_peak

Unnamed: 0,index,time_mhw,time_end_mhw,event_duration_mhw,max_sst,time_peak_mhw
0,0,1988-05-03,1988-07-03,62,17.41,1988-05-13
1,2,1988-07-06,1988-07-17,12,14.2,1988-07-12
2,4,1988-07-20,1988-08-08,20,14.17,1988-07-23
3,5,1988-09-07,1988-09-18,12,13.45,1988-09-14
4,6,1999-09-26,1999-10-17,22,14.27,1999-10-09
5,7,2000-09-12,2000-09-19,8,14.54,2000-09-17
6,8,2000-11-23,2000-12-22,30,17.84,2000-12-18
7,10,2001-01-24,2001-02-12,20,19.549999,2001-02-03
8,12,2001-06-29,2001-08-17,50,15.0,2001-07-17
9,13,2001-08-26,2001-09-19,25,14.219999,2001-09-14


In [37]:
co_mhw_buildup = co_mhw_peak[['time_mhw', 'time_peak_mhw']].copy()
co_mhw_buildup

Unnamed: 0,time_mhw,time_peak_mhw
0,1988-05-03,1988-05-13
1,1988-07-06,1988-07-12
2,1988-07-20,1988-07-23
3,1988-09-07,1988-09-14
4,1999-09-26,1999-10-09
5,2000-09-12,2000-09-17
6,2000-11-23,2000-12-18
7,2001-01-24,2001-02-03
8,2001-06-29,2001-07-17
9,2001-08-26,2001-09-14


In [38]:
co_mhw_buildup['event_duration'] = (co_mhw_buildup['time_peak_mhw'] - co_mhw_buildup['time_mhw']).dt.days+1
co_mhw_buildup['time'] = ((co_mhw_buildup['time_mhw'] - (pd.to_datetime('1982-1-1')))/np.timedelta64(1, 'D')).astype(int)
co_mhw_buildup

Unnamed: 0,time_mhw,time_peak_mhw,event_duration,time
0,1988-05-03,1988-05-13,11,2314
1,1988-07-06,1988-07-12,7,2378
2,1988-07-20,1988-07-23,4,2392
3,1988-09-07,1988-09-14,8,2441
4,1999-09-26,1999-10-09,14,6477
5,2000-09-12,2000-09-17,6,6829
6,2000-11-23,2000-12-18,26,6901
7,2001-01-24,2001-02-03,11,6963
8,2001-06-29,2001-07-17,19,7119
9,2001-08-26,2001-09-14,20,7177


In [39]:
def trend_function(y,ytime):
    intercept = scipy.stats.linregress(ytime, y)[0]
    return intercept

In [40]:
trend_sst = climtas.event.map_events(sst, co_mhw_buildup, trend_function, sst.time)
trend_sst

UFuncTypeError: ufunc 'add' cannot use operands with types dtype('<M8[ns]') and dtype('<M8[ns]')