## Event based

In [1]:
from cartopy import config
import cartopy
import cartopy.crs as ccrs
import climtas
import dask.array
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from scipy import stats
from scipy.stats import ttest_ind 
import seaborn as sns
import sparse
import xarray as xr

In [2]:
# opening the input files with heatwave severity data 
thw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-coastal-sev.nc')
mhw = xr.open_dataset('/g/data/e14/cp3790/Charuni/Tasmania/aus-ocean-sev-2.nc')

In [3]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 3 consecutive days 
thw_events = climtas.event.find_events(thw.severity > 1, min_duration = 3)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [4]:
# Create a sparse array with (coords, values, shape)
event_duration_array = sparse.COO([thw_events['time'], thw_events['latitude'], thw_events['longitude']], thw_events['event_duration'], shape=thw.severity.shape)
event_duration_da = xr.DataArray(event_duration_array, coords=thw.severity.coords)
event_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514, 137, 165)"
nnz,165339
Density,0.0005412362553753639
Read-only,True
Size,5.0M
Storage ratio,0.0


In [5]:
def get_coords(da, events):
    # Convert the index values to coordinates
    coords = {}
    for d in da.dims:
        coords[d] = da[d].values[events[d].values]
    
    # Also work out when the event ends
    coords['time_end'] = da['time'].values[events['time'].values + events['event_duration'].values-1]
    #coords['event_duration'] = coords['time_end'] - coords['time'] 
    coords['event_duration'] = events['event_duration'].values
    
    return pd.DataFrame(coords, index=events.index)

In [8]:
thw_new_H = get_coords(event_duration_da, thw_events)
thw_new_H

Unnamed: 0,time,latitude,longitude,time_end,event_duration
0,1982-01-01,-22.00,150.75,1982-01-03,3
1,1982-01-01,-22.00,151.00,1982-01-03,3
2,1982-01-01,-22.50,151.00,1982-01-03,3
3,1982-01-01,-23.50,151.50,1982-01-03,3
4,1982-01-01,-23.50,151.75,1982-01-03,3
...,...,...,...,...,...
165334,2018-12-26,-42.50,148.50,2018-12-31,6
165335,2018-12-26,-42.50,148.75,2018-12-31,6
165336,2018-12-26,-42.75,148.50,2018-12-31,6
165337,2018-12-26,-43.25,148.00,2018-12-31,6


In [14]:
thw_new_df = thw_new_H.reset_index()
thw_new_df.set_index(['latitude', 'longitude'], inplace=True)
thw_new_df

Unnamed: 0_level_0,Unnamed: 1_level_0,index,time,time_end,event_duration
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-22.00,150.75,0,1982-01-01,1982-01-03,3
-22.00,151.00,1,1982-01-01,1982-01-03,3
-22.50,151.00,2,1982-01-01,1982-01-03,3
-23.50,151.50,3,1982-01-01,1982-01-03,3
-23.50,151.75,4,1982-01-01,1982-01-03,3
...,...,...,...,...,...
-42.50,148.50,165334,2018-12-26,2018-12-31,6
-42.50,148.75,165335,2018-12-26,2018-12-31,6
-42.75,148.50,165336,2018-12-26,2018-12-31,6
-43.25,148.00,165337,2018-12-26,2018-12-31,6


In [7]:
# using climtas.event.find_events to identify days where severity>1 for a minimum of 5 consecutive days (MHW events)
mhw_events = climtas.event.find_events(mhw.severity > 1, min_duration = 5)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13514.0), HTML(value='')))




In [9]:
mhw_duration_array = sparse.COO([mhw_events['time'], mhw_events['latitude'], mhw_events['longitude']], mhw_events['event_duration'], shape=mhw.severity.shape)
mhw_duration_da = xr.DataArray(event_duration_array, coords=mhw.severity.coords)
mhw_duration_da

0,1
Format,coo
Data Type,int64
Shape,"(13514, 137, 165)"
nnz,165339
Density,0.0005412362553753639
Read-only,True
Size,5.0M
Storage ratio,0.0


In [10]:
mhw_new = get_coords(mhw_duration_da, mhw_events)
mhw_new

Unnamed: 0,time,latitude,longitude,time_end,event_duration
0,1982-01-01,-10.00,120.25,1982-01-05,5
1,1982-01-01,-10.25,143.75,1982-01-05,5
2,1982-01-01,-10.25,144.00,1982-01-05,5
3,1982-01-01,-11.00,136.50,1982-01-05,5
4,1982-01-01,-11.00,136.75,1982-01-05,5
...,...,...,...,...,...
127727,2018-12-15,-43.75,146.25,2018-12-31,17
127728,2018-12-18,-43.75,146.50,2018-12-31,14
127729,2018-12-18,-43.75,146.75,2018-12-31,14
127730,2018-12-09,-43.75,147.00,2018-12-31,23


In [12]:
mhw_new_df = mhw_new.reset_index()
mhw_new_df.set_index(['latitude', 'longitude'], inplace=True)
mhw_new_df

Unnamed: 0_level_0,Unnamed: 1_level_0,index,time,time_end,event_duration
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-10.00,120.25,0,1982-01-01,1982-01-05,5
-10.25,143.75,1,1982-01-01,1982-01-05,5
-10.25,144.00,2,1982-01-01,1982-01-05,5
-11.00,136.50,3,1982-01-01,1982-01-05,5
-11.00,136.75,4,1982-01-01,1982-01-05,5
...,...,...,...,...,...
-43.75,146.25,127727,2018-12-15,2018-12-31,17
-43.75,146.50,127728,2018-12-18,2018-12-31,14
-43.75,146.75,127729,2018-12-18,2018-12-31,14
-43.75,147.00,127730,2018-12-09,2018-12-31,23


In [15]:
# merging the THW and MHW dataframes based on latitude and longitude 
df_merge_col = pd.merge(thw_new_df, mhw_new_df, on=['latitude', 'longitude'])
df_merge_col

Unnamed: 0_level_0,Unnamed: 1_level_0,index_x,time_x,time_end_x,event_duration_x,index_y,time_y,time_end_y,event_duration_y
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-22.0,150.75,0,1982-01-01,1982-01-03,3,149,1982-01-02,1982-01-08,7
-22.0,150.75,0,1982-01-01,1982-01-03,3,557,1982-01-18,1982-01-26,9
-22.0,150.75,0,1982-01-01,1982-01-03,3,1008,1982-03-04,1982-03-12,9
-22.0,150.75,0,1982-01-01,1982-01-03,3,1291,1982-03-16,1982-03-21,6
-22.0,150.75,0,1982-01-01,1982-01-03,3,1615,1982-03-29,1982-04-05,8
...,...,...,...,...,...,...,...,...,...
-29.5,153.50,164567,2018-12-13,2018-12-15,3,125538,2018-03-30,2018-04-29,31
-29.5,153.50,164567,2018-12-13,2018-12-15,3,126077,2018-06-10,2018-06-16,7
-29.5,153.50,164567,2018-12-13,2018-12-15,3,126430,2018-08-08,2018-08-15,8
-29.5,153.50,164567,2018-12-13,2018-12-15,3,126659,2018-09-21,2018-09-25,5


In [16]:
del df_merge_col['index_y']
del df_merge_col['index_x']

df_merge_col

Unnamed: 0_level_0,Unnamed: 1_level_0,time_x,time_end_x,event_duration_x,time_y,time_end_y,event_duration_y
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
-22.0,150.75,1982-01-01,1982-01-03,3,1982-01-02,1982-01-08,7
-22.0,150.75,1982-01-01,1982-01-03,3,1982-01-18,1982-01-26,9
-22.0,150.75,1982-01-01,1982-01-03,3,1982-03-04,1982-03-12,9
-22.0,150.75,1982-01-01,1982-01-03,3,1982-03-16,1982-03-21,6
-22.0,150.75,1982-01-01,1982-01-03,3,1982-03-29,1982-04-05,8
...,...,...,...,...,...,...,...
-29.5,153.50,2018-12-13,2018-12-15,3,2018-03-30,2018-04-29,31
-29.5,153.50,2018-12-13,2018-12-15,3,2018-06-10,2018-06-16,7
-29.5,153.50,2018-12-13,2018-12-15,3,2018-08-08,2018-08-15,8
-29.5,153.50,2018-12-13,2018-12-15,3,2018-09-21,2018-09-25,5


In [17]:
del df_merge_col['event_duration_y']
del df_merge_col['event_duration_x']

df_merge_col

Unnamed: 0_level_0,Unnamed: 1_level_0,time_x,time_end_x,time_y,time_end_y
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-22.0,150.75,1982-01-01,1982-01-03,1982-01-02,1982-01-08
-22.0,150.75,1982-01-01,1982-01-03,1982-01-18,1982-01-26
-22.0,150.75,1982-01-01,1982-01-03,1982-03-04,1982-03-12
-22.0,150.75,1982-01-01,1982-01-03,1982-03-16,1982-03-21
-22.0,150.75,1982-01-01,1982-01-03,1982-03-29,1982-04-05
...,...,...,...,...,...
-29.5,153.50,2018-12-13,2018-12-15,2018-03-30,2018-04-29
-29.5,153.50,2018-12-13,2018-12-15,2018-06-10,2018-06-16
-29.5,153.50,2018-12-13,2018-12-15,2018-08-08,2018-08-15
-29.5,153.50,2018-12-13,2018-12-15,2018-09-21,2018-09-25


In [18]:
#Calculating the overlap between THW and MHW events 
from datetime import datetime
from collections import namedtuple
Range = namedtuple('Range', ['start', 'end'])
overlap = []
for n in range (22207677):
    mhw = Range(start=df_merge_col['time_y'].iloc[n], end=df_merge_col['time_end_y'].iloc[n])
    thw = Range(start=df_merge_col['time_x'].iloc[n], end=df_merge_col['time_end_x'].iloc[n])
    latest_start = max(mhw.start, thw.start)
    earliest_end = min(mhw.end, thw.end)
    delta = (earliest_end - latest_start).days + 1
    b = max(0, delta)
    overlap.append(b)

In [19]:
mod_fd = df_merge_col.assign(overlap_days = overlap)
mod_fd

Unnamed: 0_level_0,Unnamed: 1_level_0,time_x,time_end_x,time_y,time_end_y,overlap_days
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
-22.0,150.75,1982-01-01,1982-01-03,1982-01-02,1982-01-08,2
-22.0,150.75,1982-01-01,1982-01-03,1982-01-18,1982-01-26,0
-22.0,150.75,1982-01-01,1982-01-03,1982-03-04,1982-03-12,0
-22.0,150.75,1982-01-01,1982-01-03,1982-03-16,1982-03-21,0
-22.0,150.75,1982-01-01,1982-01-03,1982-03-29,1982-04-05,0
...,...,...,...,...,...,...
-29.5,153.50,2018-12-13,2018-12-15,2018-03-30,2018-04-29,0
-29.5,153.50,2018-12-13,2018-12-15,2018-06-10,2018-06-16,0
-29.5,153.50,2018-12-13,2018-12-15,2018-08-08,2018-08-15,0
-29.5,153.50,2018-12-13,2018-12-15,2018-09-21,2018-09-25,0


In [20]:
#co-occurring events is where there is a minimum overlap of 1 day between THW and MHW events
co_events = mod_fd[mod_fd.overlap_days != 0].reset_index()
co_events

Unnamed: 0,latitude,longitude,time_x,time_end_x,time_y,time_end_y,overlap_days
0,-22.0,150.75,1982-01-01,1982-01-03,1982-01-02,1982-01-08,2
1,-22.0,150.75,1982-03-03,1982-03-08,1982-03-04,1982-03-12,5
2,-22.0,150.75,1982-03-12,1982-03-14,1982-03-04,1982-03-12,1
3,-22.0,150.75,1982-04-04,1982-04-06,1982-03-29,1982-04-05,2
4,-22.0,150.75,1983-04-04,1983-04-11,1983-03-31,1983-04-04,1
...,...,...,...,...,...,...,...
63609,-29.5,153.50,2017-09-03,2017-09-06,2017-08-30,2017-09-04,2
63610,-29.5,153.50,2017-09-12,2017-09-14,2017-09-08,2017-09-12,1
63611,-29.5,153.50,2017-09-27,2017-09-30,2017-09-27,2017-10-11,4
63612,-29.5,153.50,2018-02-13,2018-02-16,2018-02-15,2018-02-20,2


In [21]:
del co_events['time_y']
del co_events['time_end_y']
del co_events['overlap_days']

In [22]:
co_events

Unnamed: 0,latitude,longitude,time_x,time_end_x
0,-22.0,150.75,1982-01-01,1982-01-03
1,-22.0,150.75,1982-03-03,1982-03-08
2,-22.0,150.75,1982-03-12,1982-03-14
3,-22.0,150.75,1982-04-04,1982-04-06
4,-22.0,150.75,1983-04-04,1983-04-11
...,...,...,...,...
63609,-29.5,153.50,2017-09-03,2017-09-06
63610,-29.5,153.50,2017-09-12,2017-09-14
63611,-29.5,153.50,2017-09-27,2017-09-30
63612,-29.5,153.50,2018-02-13,2018-02-16


In [23]:
ds = xr.open_mfdataset('/g/data/e14/cp3790/Charuni/ERA5-MSL/era5_dailymsl_*.nc').sel(longitude=slice(90, 180), latitude=slice(0, -60))
mslp = ds['dmsl']/100
mslp.attrs['units'] = 'hPa'
mslp = mslp.chunk({'time':-1, 'latitude': 1, 'longitude':1})
mslp.load()

In [24]:
mslp_stack = mslp.stack(cell=('latitude', 'longitude'))

In [25]:
# Creating a new data array into which I subsequently save the mslp values
comp = xr.DataArray(np.random.randint(0, 100, size=(86760)),dims=["ncell"], coords=[mslp_stack.cell])

In [26]:
for x in mslp_stack.cell:
    mslp_ = mslp_stack.sel(cell=x)
    mslp_composite = climtas.event.map_events(mslp_, co_events, lambda x: x.mean().values) #this gives the means of individual events at a grid cell
    event_mean_mslp = mslp_composite.mean() #this gives the mean of all means 
    comp.loc[{"ncell":x}] = event_mean_mslp

# expectation: data array with average values at every grid cell which can then be plotted directly 

KeyError: 'time'

In [None]:
comp_da = comp.unstack("ncell")
comp_da