#### Calculate SM deficit

In [3]:
# set up a cluster
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=2,memory="31GB",walltime='03:00:00') #'01:30:00')
client = Client(cluster)
cluster.scale(cores=4)
client

  from distributed.utils import tmpfile


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.128.148:44821,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [6]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def create_filepath(ds, prefix='filename', root_path="."):
    """
    Generate a filepath when given an xarray dataset or dataarray
    """
    start = ds.time[0].dt.strftime("%Y-%m").data
    end = ds.time[-1].dt.strftime("%Y-%m").data
    filepath = f'{root_path}/{prefix}_{start}_{end}.nc'
    return filepath

projdir = '/g/data/w97/ad9701/drought_2017to2020/drought_breakProb/awra/'
drght_dir = projdir + 'sm_droughts/'
out_dir = projdir + 'sm_droughts/'

prefix_sm = 'sm_deficits'
prefix_events = 'events'

# the Tinderbox drought region
lat_slice = slice(-20, -44)
lon_slice = slice(135, 154)

# get the full sm dataset
awra_dir = '/g/data/fj8/BoM/AWRA/DATA/SCHEDULED-V6/processed/values/day/'
file_names = 'sm_[1-2]*.nc'
ds_temp = xr.open_mfdataset(awra_dir + file_names, chunks = {'lat':400,'lon':400})
# converting the datatypes of SM to match P
lat_new = np.float32(ds_temp['latitude'])
lon_new = np.float32(ds_temp['longitude'])

# get the threshold of sm required to end the drought from the reference percentile file
perc_drought_end = 0.3 
ds_sm_perc = xr.open_dataset(projdir + 'sm_1980to2016_perc/sm_percentiles.nc')
da_sm_perc_end = ds_sm_perc['sm'].sel(quantile = perc_drought_end)

# performing calculations year by year to avoid memory errors
startyr_list = list(range(1911, 2022, 1))
ts_list = [4, 8, 12]

for styr in startyr_list:
    # get the data indicating drought grids (1 = drought)
    drght_files = prefix_events + '_' + str(styr) + '*.nc'
    ds_sm_drght = xr.open_mfdataset(drght_dir + drght_files)
    
    # subset the sm data for the same time period as the events
    enyr = ds_sm_drght['time.year'].values[-1]
    time_slice = slice(str(styr) + '-01-01', str(enyr) + '-12-31')
    ds_sm = ds_temp.rename({'latitude':'lat','longitude':'lon'}).assign_coords(lat = lat_new, lon = lon_new).sel(time = time_slice)
    ds_doy = np.array(ds_sm['time.dayofyear'])   # day of the year in ds, the sm dataset
    # soil moisture data array with 'dayofyear' as the co-ordinate variable
    da_sm = ds_sm['sm'].reset_index('time').rename({'time':'dayofyear', 'time_':'time'}).assign_coords({'dayofyear': ds_doy})
    da_sm = da_sm.sel(lat = lat_slice, lon = lon_slice)
    
    sm_deficit_alltimescale = []
    # Now using the time scale under consideration, get the future day & the corresponding drought soil moisture threshold on that day
    for ts in ts_list:
        # time values of the future day. The drought thershold corresponding to this day will be used to calculate the sm deficit.
        da_future_time = (ds_sm['time'] + np.timedelta64((7*ts), 'D'))
        da_future_time = da_future_time.assign_coords({'time': da_future_time.values})
        ds_future_doy = np.array(da_future_time['time.dayofyear'])
        
        # get the sm value for the drought ending based on future doy - assign the data coordinates of the current day so that it can be used to calculate sm_diff
        da_sm_end = da_sm_perc_end.sel(dayofyear = ds_future_doy).assign_coords({'dayofyear': ds_doy})
        da_sm_diff = (da_sm_end - da_sm).reset_index('dayofyear').rename({'dayofyear':'time','dayofyear_':'dayofyear'}) # the sm deficit can be positive or negative

        # the drought grids (drought = 1, no-drought = 0, its a flag)
        da_sm_drght = ds_sm_drght['sm_drought'].where(ds_sm_drght['sm_drought'] > 0)  # this will set the non-drought days to NaN
        
        # get the soil mosisture deficit at only the drought grids
        da_sm_diff_drght = (da_sm_diff * da_sm_drght).rename('sm_deficit')
        sm_deficit_alltimescale.append(da_sm_diff_drght)

    da_sm_deficit = xr.concat(sm_deficit_alltimescale, dim = "timescale").assign_coords({'timescale':ts_list}).rename('sm_deficit').chunk({'timescale':len(ts_list)})
    out_file_sm = create_filepath(da_sm_deficit, prefix = prefix_sm, root_path = out_dir)
    da_sm_deficit.to_netcdf(out_file_sm)

In [None]:
ds_sm['sm'].reset_index('time').rename({'time':'dayofyear', 'time_':'time'})