#### Calculate reference period percentiles of gridded soil moisture (sm) for ecah day of the year
#### These percentiles will then be used as thersholds to classify sm and analyse relationships between sm & P/P-E

In [1]:
%who

Interactive namespace is empty.


In [2]:
import xarray as xr
import numpy as np
import climtas
    
out_dir = '/g/data/w97/ad9701/p_prob_analysis/temp_files/sm_refPeriod_perc/'
    
awra_dir = '/g/data/fj8/BoM/AWRA/DATA/SCHEDULED-V6/processed/values/day/'
sm_files = 'sm_[1-2]*.nc'

ds_sm_temp = xr.open_mfdataset(awra_dir + sm_files)
lat_slice = slice(-32, -39)      #slice(-36.3, -36.2)  #tiny slice for testing
lon_slice = slice(139, 152)      #slice(148.9, 149)
time_slice = slice('1911-01-01', '2020-05-31')

# converting the datatypes of SM to match P
lat_new = np.float32(ds_sm_temp['latitude'])
lon_new = np.float32(ds_sm_temp['longitude'])

# rename & reassign lat-lon to match the precip data; subset lat-lon
ds_sm = ds_sm_temp.rename({'latitude':'lat','longitude':'lon'}).assign_coords(lat=lat_new, lon=lon_new)\
.sel(lat = lat_slice, lon = lon_slice, time = time_slice)

# function to get the julian day indices of a 31 day window around any selected day
import sys
def get31DayWindow(iday, julianDays = np.arange(366)+1):
    '''
    Function returns an array of 31 days: original day & 15 days on either side of the selected day
    iday: index of the julian day
    julianDays: numpy array of julianDays
    '''
    if iday < 0:
        sys.exit("iday cannot be less than zero")
    if iday > 365:
        sys.exit("iday cannot be greater than 365")
        
    if iday < 15:
        temp_days = np.append(julianDays[(-15+iday):], julianDays[0:(16+iday)])
    elif iday > 350:
        temp_days = np.append(julianDays[(iday-15):], julianDays[0:(16-(366-iday))])
    else:
        temp_days = julianDays[(iday-15):(iday+16)]
    return(temp_days)

julianDays = np.arange(366)+1

# iday will loop over np.arange(366), testing for one day now
for iday in [0]:
    # get days within a 31-day julian day window of the selected day
    day_window = get31DayWindow(iday)
    time_ind = np.isin(ds_sm['time.dayofyear'].values, day_window)
    da_sm = ds_sm['sm'].isel(time = time_ind) #chunk({'lat':72,'lon':132})
    # alternative to above line - this seems slower
    # da_sm = ds_sm['sm'].sel(time=ds_sm.time.dt.dayofyear.isin(list(day_window))).chunk({'lon':132, 'lat':72}) #chunk({'lat':72,'lon':132})
    
    #calculate percentiles from this data
    da_sm_perc = climtas.approx_percentile(da_sm, [10, 20, 30, 50], 'time')

In [3]:
da_sm_perc

Unnamed: 0,Array,Chunk
Bytes,575.02 kiB,575.02 kiB
Shape,"(4, 141, 261)","(4, 141, 261)"
Count,774 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 575.02 kiB 575.02 kiB Shape (4, 141, 261) (4, 141, 261) Count 774 Tasks 1 Chunks Type float32 numpy.ndarray",261  141  4,

Unnamed: 0,Array,Chunk
Bytes,575.02 kiB,575.02 kiB
Shape,"(4, 141, 261)","(4, 141, 261)"
Count,774 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [4]:
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=4,memory="31GB")
client = Client(cluster)
cluster.scale(cores=16)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.128.152:43737,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
climtas.io.to_netcdf_throttled(da_sm_perc, f'{out_dir}sm_191101_to_202005_perc_day{julianDays[iday]}.nc')

  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
cluster.scale(cores=0)