# Event based

## Import packages 

In [1]:
from cartopy import config
import cartopy
import cartopy.crs as ccrs
import climtas
import cmocean
from collections import namedtuple
import dask.array
from datetime import datetime
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from scipy import stats
from scipy.stats import ttest_ind 
import seaborn as sns
import sparse
import xarray as xr

## Identifying heatwave events

In [2]:
#Point of interest for heatwave events in Hobart
lat = -43.00
lon = 148.25

In [40]:
# opening the input files with heatwave severity data 
thw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-coastal-sev.nc').sel(latitude=lat, longitude=lon, method='nearest')
mhw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-ocean-sev-2.nc').sel(latitude=lat, longitude=lon, method='nearest')

In [4]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 3 consecutive days 
thw_events = climtas.event.find_events(thw.severity > 1, min_duration = 3)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [5]:
thw_events

Unnamed: 0,time,event_duration
0,102,3
1,218,3
2,238,3
3,427,3
4,590,3
...,...,...
156,13185,5
157,13404,3
158,13435,3
159,13497,3


In [6]:
# Create a sparse array with (coords, values, shape)
event_duration_array = sparse.COO(thw_events['time'], thw_events['event_duration'], shape=thw.severity.shape)
event_duration_da = xr.DataArray(event_duration_array, coords=thw.severity.coords)
event_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,161
Density,0.011913571111439989
Read-only,True
Size,2.5K
Storage ratio,0.0


In [9]:
def get_coords(da, events):
    # Convert the index values to coordinates
    coords = {}
    for d in da.dims:
        coords[d] = da[d].values[events[d].values]
    
    # Also work out when the event ends
    coords['time_end'] = da['time'].values[events['time'].values + events['event_duration'].values-1]
    #coords['event_duration'] = coords['time_end'] - coords['time'] 
    coords['event_duration'] = events['event_duration'].values
    
    return pd.DataFrame(coords, index=events.index)

In [10]:
thw_new = get_coords(event_duration_da, thw_events)
thw_new

Unnamed: 0,time,time_end,event_duration
0,1982-04-13,1982-04-15,3
1,1982-08-07,1982-08-09,3
2,1982-08-27,1982-08-29,3
3,1983-03-04,1983-03-06,3
4,1983-08-14,1983-08-16,3
...,...,...,...
156,2018-02-06,2018-02-10,5
157,2018-09-13,2018-09-15,3
158,2018-10-14,2018-10-16,3
159,2018-12-15,2018-12-17,3


In [11]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 5 consecutive days (MHW events)
mhw_events = climtas.event.find_events(mhw.severity > 1, min_duration = 5)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [12]:
mhw_duration_array = sparse.COO(mhw_events['time'], mhw_events['event_duration'], shape=mhw.severity.shape)
mhw_duration_da = xr.DataArray(event_duration_array, coords=mhw.severity.coords)
mhw_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514,)"
nnz,161
Density,0.011913571111439989
Read-only,True
Size,2.5K
Storage ratio,0.0


In [13]:
mhw_new = get_coords(mhw_duration_da, mhw_events)
mhw_new

Unnamed: 0,time,time_end,event_duration
0,1982-10-31,1982-11-07,8
1,1983-03-23,1983-03-27,5
2,1985-11-20,1985-12-06,17
3,1985-12-10,1985-12-23,14
4,1987-05-20,1987-05-24,5
...,...,...,...
135,2018-08-11,2018-08-24,14
136,2018-10-15,2018-10-21,7
137,2018-10-26,2018-11-08,14
138,2018-11-25,2018-12-04,10


In [16]:
mhw_new.rename(columns={"time": "time_mhw", "time_end": "time_end_mhw", "event_duration": "event_duration_mhw"}, inplace=True)
mhw_new

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1982-10-31,1982-11-07,8
1,1983-03-23,1983-03-27,5
2,1985-11-20,1985-12-06,17
3,1985-12-10,1985-12-23,14
4,1987-05-20,1987-05-24,5
...,...,...,...
135,2018-08-11,2018-08-24,14
136,2018-10-15,2018-10-21,7
137,2018-10-26,2018-11-08,14
138,2018-11-25,2018-12-04,10


In [49]:
max_severity = climtas.event.map_events(mhw.severity, mhw_events, lambda x: {'max_severity': x.max().item()})
max_severity

Unnamed: 0,max_severity
0,1.422852
1,1.747070
2,1.336697
3,1.358765
4,1.111328
...,...
135,1.490479
136,1.458496
137,1.961914
138,1.587891


In [34]:
# Finding the co-occurring MHWs first based on overlap between THW and MHW

Range = namedtuple('Range', ['start', 'end'])
mhw_mod = pd.DataFrame(columns=['time_mhw', 'time_end_mhw', 'event_duration_mhw'])
for n in range (thw_new.shape[0]):
    for b in range(mhw_new.shape[0]):
        mhw = Range(start=mhw_new['time_mhw'].iloc[b], end=mhw_new['time_end_mhw'].iloc[b])
        thw = Range(start=thw_new['time'].iloc[n], end=thw_new['time_end'].iloc[n])
        if thw.start >= mhw.start and mhw.end >= thw.end:
            mhw_mod = mhw_mod.append([{'time_mhw':mhw_new['time_mhw'].iloc[b], 'time_end_mhw':mhw_new['time_end_mhw'].iloc[b], 'event_duration_mhw':mhw_new['event_duration_mhw'].iloc[b]}], ignore_index=True)

In [35]:
mhw_mod

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1988-05-03,1988-07-03,62
1,1988-05-03,1988-07-03,62
2,1988-07-06,1988-07-17,12
3,1988-07-06,1988-07-17,12
4,1988-07-20,1988-08-08,20
...,...,...,...
66,2017-11-14,2018-01-11,59
67,2018-01-13,2018-02-16,35
68,2018-01-13,2018-02-16,35
69,2018-01-13,2018-02-16,35


In [36]:
# There will be duplicates as more than one THW may overlap with a single MHW as MHWs last longer than THWs.
# Therefore these need to be dropped

test = mhw_mod.drop_duplicates()
test

Unnamed: 0,time_mhw,time_end_mhw,event_duration_mhw
0,1988-05-03,1988-07-03,62
2,1988-07-06,1988-07-17,12
4,1988-07-20,1988-08-08,20
5,1988-09-07,1988-09-18,12
6,1999-09-26,1999-10-17,22
7,2000-09-12,2000-09-19,8
8,2000-11-23,2000-12-22,30
10,2001-01-24,2001-02-12,20
12,2001-06-29,2001-08-17,50
13,2001-08-26,2001-09-19,25


In [37]:
co_events_mhw = test.reset_index()
co_events_mhw

Unnamed: 0,index,time_mhw,time_end_mhw,event_duration_mhw
0,0,1988-05-03,1988-07-03,62
1,2,1988-07-06,1988-07-17,12
2,4,1988-07-20,1988-08-08,20
3,5,1988-09-07,1988-09-18,12
4,6,1999-09-26,1999-10-17,22
5,7,2000-09-12,2000-09-19,8
6,8,2000-11-23,2000-12-22,30
7,10,2001-01-24,2001-02-12,20
8,12,2001-06-29,2001-08-17,50
9,13,2001-08-26,2001-09-19,25


In [39]:
# Join max_severity so we have the maximum severity of each event.
# How do I get the date (time_peak_mhw) corresponding to this value? 

co_mhw = co_events_mhw.join(max_severity)
co_mhw

Unnamed: 0,index,time_mhw,time_end_mhw,event_duration_mhw,max_severity
0,0,1988-05-03,1988-07-03,62,1.422852
1,2,1988-07-06,1988-07-17,12,1.74707
2,4,1988-07-20,1988-08-08,20,1.336697
3,5,1988-09-07,1988-09-18,12,1.358765
4,6,1999-09-26,1999-10-17,22,1.111328
5,7,2000-09-12,2000-09-19,8,1.257812
6,8,2000-11-23,2000-12-22,30,1.339844
7,10,2001-01-24,2001-02-12,20,1.813802
8,12,2001-06-29,2001-08-17,50,1.322266
9,13,2001-08-26,2001-09-19,25,2.028537


In [None]:
# Finding THW that occur during build up of MHW
# Calculating the overlap between THW and MHW events - for buildup 
# buildup = time_mhw to time_peak_mhw (time_peak_mhw correspnds to time of max_severity)

Range = namedtuple('Range', ['start', 'end'])
thw_buildup = pd.DataFrame(columns=['time', 'time_end', 'event_duration'])
for n in range (thw_new.shape[0]):
    for b in range(mhw_new.shape[0]):
        mhw = Range(start=mhw_new['time_mhw'].iloc[b], end=mhw_new['time_peak_mhw'].iloc[b])
        thw = Range(start=thw_new['time'].iloc[n], end=thw_new['time_end'].iloc[n])
        if thw.start >= mhw.start and mhw.end >= thw.end:
            thw_buildup = thw_buildup.append([{'time':thw_new['time'].iloc[n], 'time_end':thw_new['time_end'].iloc[n], 'event_duration':thw_new['event_duration'].iloc[n]}], ignore_index=True)

In [None]:
# Finding THW that occur during decay of MHW
# Calculating the overlap between THW and MHW events - for decay
# decay = time_peak_mhw to time_end_mhw (time_peak_mhw correspnds to time of max_severity)

Range = namedtuple('Range', ['start', 'end'])
thw_decay = pd.DataFrame(columns=['time', 'time_end', 'event_duration'])
for n in range (thw_new.shape[0]):
    for b in range(mhw_new.shape[0]):
        mhw = Range(start=mhw_new['time_peak_mhw'].iloc[b], end=mhw_new['time_end_mhw'].iloc[b])
        thw = Range(start=thw_new['time'].iloc[n], end=thw_new['time_end'].iloc[n])
        if thw.start >= mhw.start and mhw.end >= thw.end:
            thw_decay = thw_decay.append([{'time':thw_new['time'].iloc[n], 'time_end':thw_new['time_end'].iloc[n], 'event_duration':thw_new['event_duration'].iloc[n]}], ignore_index=True)