# Detecting heatwaves based on Ritwik Misra's MHW code 

## Import modules and define functions 

In [1]:
import dask as da
from dask.distributed import LocalCluster, Client
from datetime import date 
import glob 
import numpy as np
import pandas as pd
import scipy.ndimage as ndimage
import xarray as xr
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
local_dir = "/g/data/e14/cp3790/dask-workers"
cluster = LocalCluster(processes=False, local_dir=local_dir)
client = Client(cluster)
client


Port 8787 is already in use. 
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.


0,1
Client  Scheduler: inproc://10.0.64.10/18235/1  Dashboard: http://localhost:39701/status,Cluster  Workers: 1  Cores: 8  Memory: 33.67 GB


In [4]:
def duration(event_mask):
    # Finding the dates considered in event_mask & converting it to nanoseconds
    hw_dates = pd.to_numeric(event_mask['time'])
    # Creates hw_dur object which has a time series (in nanoseconds) at each grid point
    hw_dur = xr.broadcast(event_mask, hw_dates)[1]
    hw_dur.data = da.array.from_array(hw_dur.data)
    
    lbl = da.array.map_blocks(remove_false_events, hw_dur.data, event_mask.data, dtype = 'float')
    
    return xr.where(lbl > 0, event_mask, 0)

In [5]:
def day2ns(days):
    # Converts days to nanoseconds. Days -> Hours -> Seconds -> nanoseconds 
    return int(days * 24 * 3600 * 1e9)

In [6]:
def remove_false_events(hw_dur, event_mask, minDur = 3):
    struc = np.zeros((3,3,3))
    # Looking for consecutive tmax threshold crosses for only the time dimension:
    struc[:,1,1] = 1 # structure must always be centrosymmetric
    labeled_array, num_features = ndimage.label(event_mask, structure = struc)   
    indexs = da.array.arange(1, num_features + 1)
   
    # Calculates the max and min time points for all labeled points, i.e. the beggining and end dates for heatwave events 
    maxHW = ndimage.maximum(hw_dur, labels=labeled_array, index=indexs)
    minHW = ndimage.minimum(hw_dur, labels=labeled_array, index=indexs)

    # Calculates the length of all potential heatwave events.
    dur = maxHW - minHW + day2ns(1) 

    # converting minDur (default = 3) days to nanoseconds
    minDur_ns = day2ns(minDur) 

    # Finds the event numbers of heatwave events with duration < minDur_ns.
    # Adding 1 so that event number matches with value in labeled_array. 
    failed_ev_num = da.array.where(dur < minDur_ns)[0]  + 1 
    failed_ev_num = failed_ev_num.compute()

    # objs has the location of each labeled event 
    # i.e objs[0] is the location of the event 1, objs[10] location of event 11 and so on.
    objs = ndimage.find_objects(labeled_array.astype('int'))
    
    # failed_objs has the location for all the failed events in labeled_array 
    failed_objs = [objs[i - 1] for i in failed_ev_num]
    # Removes all the places where we had a failed heatwave
    for loc in failed_objs:
        labeled_array[loc] = 0

    return labeled_array

## Opening files

In [3]:
# Opens climatology,tmax threshold and heatwave_days files, that have been created before.
# heatwave_days is a masked data array, with 1s for days where the daily threshold has been exceeded

localDir = "/g/data/e14/cp3790/Charuni/"
thresh = xr.open_dataarray(localDir + 'threshold-australia-365.nc')
clim = xr.open_dataarray(localDir + 'climatology-australia-365.nc')

heatwave_days = xr.open_dataarray(localDir + 'heatwave-days-test.nc')

## Code

### Identifying heatwave events

In [10]:
heatwave_events = duration(heatwave_days)

In [11]:
heatwave_events

<xarray.DataArray (time: 1095, latitude: 137, longitude: 165)>
dask.array<where, shape=(1095, 137, 165), dtype=int64, chunksize=(742, 137, 165), chunktype=numpy.ndarray>
Coordinates:
  * longitude       (longitude) float32 113.0 113.25 113.5 ... 153.75 154.0
  * latitude        (latitude) float32 -10.0 -10.25 -10.5 ... -43.5 -43.75 -44.0
  * time            (time) datetime64[ns] 2009-01-01 2009-01-02 ... 2011-12-31
    dayofyear       (time) int64 ...
    event_duration  (time) int64 ...

In [12]:
heatwave_events.values

Function:  subgraph_callable
args:      (array([[[1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         1230768000000000000],
        [1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         1230768000000000000],
        [1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         1230768000000000000],
        ...,
        [1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         1230768000000000000],
        [1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         1230768000000000000],
        [1230768000000000000, 1230768000000000000, 1230768000000000000,
         ..., 1230768000000000000, 1230768000000000000,
         12307680000000

ValueError: shape mismatch: objects cannot be broadcast to a single shape