## Finding the start of a flash drought using ET Criteria

In [1]:
#importing packages
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import dask.distributed as dsk

# Start Client/ dask runs together with xarray to improve performance. Only need to initialize once here.
clnt = dsk.Client()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 34781 instead


In [17]:
# path to directory of AWRA model data
awra_dir = '/g/data/fj8/BoM/AWRA/DATA/'


# selecting a time slice and specific latitude and longitude to work on a single grid cell
time_slice = slice('1980-01-01', '2022-12-31')
lat_sel = -37
lon_sel = 148

In [18]:
#load in filtered FD start dates from quantiles method
fd = xr.open_dataset('/g/data/w97/aj8747/fd_summer_project/fd_sm_start_dates.nc')

#check how many FD values you have. ie. how many potential start dates 
fd['sm_pct'].dropna(dim = 'time') 

In [19]:
# Convert to pandas dataframe 
df = fd.to_dataframe()
#df.drop(columns = ['latitude','longitude'], inplace=True)
df['num_index'] = range(0,len(df)) #create a new column which indicates the length of the columns

In [20]:
#load in evapotranspiratin data 
etot_dir = awra_dir + 'SCHEDULED-V6/etot_*.nc'
ds_et = xr.open_mfdataset(etot_dir, chunks = {'time':'200mb'}, parallel = True)

# data at a grid point and a specific time
da_et = ds_et.sel(latitude = lat_sel, longitude = lon_sel, time = time_slice)
da_et = da_et.load()

#array of ET
ET = da_et['etot']
ET

In [21]:
# Putting the ET values in the df dataframe

df['ET'] = np.nan #adding new column called ET 
for row in df.index: #index is datetime for the dataframe, so for row in each datetime
    df.loc[row, 'ET'] = ET.sel(time=row)
    
df

Unnamed: 0_level_0,latitude,longitude,sm_pct,num_index,ET
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-01-01,-37.0,148.0,,0,2.306564
1980-01-02,-37.0,148.0,,1,2.605615
1980-01-03,-37.0,148.0,,2,3.518020
1980-01-04,-37.0,148.0,,3,5.368407
1980-01-05,-37.0,148.0,,4,3.738397
...,...,...,...,...,...
2022-12-27,-37.0,148.0,,15701,5.087646
2022-12-28,-37.0,148.0,,15702,3.178666
2022-12-29,-37.0,148.0,,15703,2.691792
2022-12-30,-37.0,148.0,,15704,2.631339


In [22]:
#creating monthly and dayofyear climatology

clim_doy = df['ET'].groupby(df.index.dayofyear).mean() 
clim_mon = df['ET'].groupby(df.index.month).mean() 

In [66]:
#Creating a column for when ET matches the criteria
df['ET_above80pct'] = np.nan
for row in df.index:
    clim_value = clim_doy[row.dayofyear]
    if ~np.isnan(df.loc[row,'sm_pct']): 
        if df.loc[row,'ET'] >= clim_value*0.8: #if above 0.8*clim
            if row -  pd.to_timedelta(20, unit='d') > df.index[0]: 
                df.loc[row-pd.to_timedelta(20, unit='d'):row, 'ET_above80pct'] = df.loc[row-pd.to_timedelta(20, unit='d'):row]['ET'] #then it is equal to 1
            
df.dropna()[0:60]

#need if the length of a period is 20 days keep it, if it is less than 20 days, do not keep it

Unnamed: 0_level_0,latitude,longitude,sm_pct,num_index,ET,ET_above80pct,ET_binary
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-02-15,-37.0,148.0,0.442254,45,3.664665,3.664665,0
1980-02-16,-37.0,148.0,0.430588,46,3.485145,3.485145,0
1980-03-14,-37.0,148.0,0.437388,73,1.840308,1.840308,0
1980-03-15,-37.0,148.0,0.430019,74,1.606545,1.606545,0
1980-03-16,-37.0,148.0,0.422903,75,1.535982,1.535982,0
1980-03-17,-37.0,148.0,0.414275,76,2.261897,2.261897,0
1981-04-11,-37.0,148.0,0.447263,466,1.546262,1.546262,0
1981-04-12,-37.0,148.0,0.440526,467,1.982643,1.982643,0
1981-09-01,-37.0,148.0,0.866418,609,1.464121,1.464121,0
1982-06-17,-37.0,148.0,0.694434,898,0.704742,0.704742,0


SyntaxError: 'return' outside function (3250908839.py, line 4)