# Detecting heatwaves based on Ritwik Misra's code 

## Import modules and define functions 

In [16]:
from datetime import date 
import xarray as xr
import pandas as pd
import numpy as np
import dask as da
import glob 
import scipy.ndimage as ndimage
#import dask_image.ndmeasure <-- Would make computation faster - but not finished yet
from dask.distributed import LocalCluster, Client
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [2]:
local_dir = "/g/data/e14/cp3790/dask-workers"
cluster = LocalCluster(processes=False, local_dir=local_dir)
client = Client(cluster)
client




0,1
Client  Scheduler: inproc://10.0.64.6/7391/1  Dashboard: http://localhost:8787/status,Cluster  Workers: 1  Cores: 8  Memory: 33.67 GB


In [58]:
# This function will make the clim and thresh repeatable so that the shapes of clim, thresh and obs are equal.

def fix(ds):
    trial = ds
    i = 0
    while i < 3:  #<-- Here, 3 corresponds to the number of years in observation dataset 
        trial = xr.concat([trial, ds], 'dayofyear')
        i+=1
    trial = trial.isel(dayofyear = slice(0,1095))
    
    # to specify the dates/time 
    trial.coords['dayofyear'] = np.arange(date(2009,1,1).toordinal(),date(2011,12,31).toordinal())
    
    # This code was used to rename the dayofyear dimension to time.
    trial['time'] = trial['dayofyear']
    del trial['dayofyear']
    trial = trial.rename({'dayofyear': 'time'})
    
    return trial

In [65]:
def duration(severity, join_gaps = True):
    # Finding the dates considered in severity & converting it to nanoseconds
    hw_dates = pd.to_numeric(severity['time'])
    # Creates hw_dur object which has a time series (in nanoseconds) at each grid point
    hw_dur = xr.broadcast(severity, hw_dates)[1]
    hw_dur.data = da.array.from_array(hw_dur.data)
    
    lbl = da.array.map_blocks(remove_false_events, hw_dur.data, severity.data, dtype = 'float')
    
    return xr.where(lbl > 0, severity, 0)

In [60]:
def day2ns(days):
    # Converts days to nanoseconds. Days -> Hours -> Seconds -> nanoseconds 
    return int(days * 24 * 3600 * 1e9)

In [61]:
def remove_false_events(hw_dur, hw_days, minDur = 3):
    struc = np.zeros((3,3,3))
    # Looking for consecutive tmax threshold crosses for only the time dimension:
    struc[:,1,1] = 1
    labeled_array, num_features = ndimage.label(hw_days, structure = struc)   
    indexs = da.array.arange(1, num_features + 1)
   
    # Calculates the max and min time points for all labeled points, i.e. the beggining and end dates for heatwave events 
    maxHW = ndimage.maximum(hw_dur, labels=labeled_array, index=indexs)
    minHW = ndimage.minimum(hw_dur, labels=labeled_array, index=indexs)

    # Calculates the length of all potential heatwave events.
    dur = maxHW - minHW + day2ns(1) 

    # converting minDur (default = 3) days to nanoseconds
    minDur_ns = day2ns(minDur) 

    # Finds the event numbers of heatwave events with duration < minDur_ns.
    # Adding 1 so that event number matches with value in labeled_array. 
    failed_ev_num = da.array.where(dur < minDur_ns)[0]  + 1 
    failed_ev_num = failed_ev_num.compute()

    # objs has the location of each labeled event 
    # i.e objs[0] is the location of the event 1, objs[10] location of event 11 and so on.
    objs = ndimage.find_objects(labeled_array.astype('int'))
    
    # failed_objs has the location for all the failed events in labeled_array 
    failed_objs = [objs[i - 1] for i in failed_ev_num]
    # Removes all the places where we had a failed heatwave
    for loc in failed_objs:
        labeled_array[loc] = 0

    return labeled_array

## Opening files

In [29]:
# Opens Climatology and tmax threshold files, that have been created before.

localDir = "/g/data/e14/cp3790/Charuni/"
thresh = xr.open_dataarray(localDir + 'threshold-australia.nc')
clim = xr.open_dataarray(localDir + 'climatology-australia.nc')

In [13]:
# Opens ERA5 data, from which we need the tmax, for years 2009-2011.

files = sorted(glob.glob('/g/data/e14/cp3790/Charuni/ERA5-new/era5_dailytmax_*.nc'))

era5_dailytmax_aus = xr.open_mfdataset(files, combine='by_coords').sel(time=slice('2009', '2011'), longitude=slice(140, 145), latitude=slice(-32, -34))
daily_tmax = era5_dailytmax_aus["dmax"].load() 
daily_tmax.attrs['units'] = 'deg C'

## Code

### Generate new_climatology and new_threshold so that their shapes are equal to the obs dataset 

In [30]:
new_climatology = fix(clim)
new_threshold = fix(thresh) 

### Generate a data array of 1s and 0s (masking days on which the threshold has been exceeded)

In [66]:
heatwave_days = (daily_tmax > new_threshold).astype(int)

In [67]:
heatwave_events = duration(heatwave_days)

In [68]:
heatwave_events.plot()

Function:  subgraph_callable
args:      (array([[[[1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000000, 1230768000000000000],
         [1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000000, 1230768000000000000],
         [1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000000, 1230768000000000000],
         ...,
         [1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000000, 1230768000000000000],
         [1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000000, 1230768000000000000],
         [1230768000000000000, 1230768000000000000,
          1230768000000000000, ..., 1230768000000000000,
          1230768000000000

Function:  subgraph_callable
args:      (array([[[[1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000000, 1287532800000000000],
         [1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000000, 1287532800000000000],
         [1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000000, 1287532800000000000],
         ...,
         [1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000000, 1287532800000000000],
         [1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000000, 1287532800000000000],
         [1287532800000000000, 1287532800000000000,
          1287532800000000000, ..., 1287532800000000000,
          1287532800000000

RuntimeError: structure and input must have equal rank