This preprocessing notebook takes 6-hourly AR data from tARget v3, and selects only the times where an AR crosses the 1 km threshold in HMA.

Using the trackID from 

### Import

In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
import metpy.calc as mpcalc
from metpy.units import units

# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules
from ar_funcs import get_topo_mask
from timeseries import select_months_ds, select_months_df

In [2]:
# Set up paths
server = 'great'
path_to_data = '/home/nash/DATA/data/'                                      # project data -- read only
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

### Get list of AR dates and trackIDs when an AR crosses 1000 m elevation threshold in HMA

In [3]:
# identify ARs using single bound box with elevation mask during DJF
bbox = [20, 40, 65, 97] # HMA region
start_date = '1979-12-01 0:00'
end_date = '2015-02-28 18:00'
elev_thres = 1000.
start_mon = 12
end_mon = 2

# open ds
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ds = xr.open_dataset(filename, chunks={'time': 1460}, engine='netcdf4')
ds = ds.squeeze()
# remove lev and ens coords
ds = ds.reset_coords(names=['lev', 'ens'], drop=True)

# select lats, lons, and dates within start_date, end_date and months
lat1, lat2, lon1, lon2 = bbox
ds = ds.sel(time=slice(start_date, end_date), lat=slice(lat1,lat2), lon=slice(lon1,lon2))
ds = select_months_ds(ds, start_mon, end_mon)

# add topo mask
mask = get_topo_mask(ds.lat, ds.lon) # create a elevation dataset with same grid spacing as ds
ds = ds.where(mask.bedrock >= elev_thres) # mask ds where elevation is less than 1000 m

# convert dataset to dataframe
df = ds.kidmap.to_dataframe(dim_order=['time', 'lat', 'lon'])
df = df.dropna(axis='rows')
# keep only rows that have trackID
trackID = df.groupby('time').kidmap.unique()
# trackID # this is all trackIDs that crossed the 1000 m threshold


In [4]:
id_df = trackID.to_frame() # converts to a pandas dataframe
id_df = id_df.reset_index() # reset the index
id_df = id_df.rename(columns={'time': 'date'}) # rename time column into date
id_df = id_df.set_index(pd.to_datetime(id_df['date'])) # reset the index as "date"
id_df.index = id_df.index.strftime("%Y-%m-%d") # make it so the index date is normalized to daily
id_df = id_df.rename(columns={'date': 'time'}) # rename the date column back to time
id_df = id_df.reset_index() # remove the index
id_df = id_df.explode('kidmap') # explode the dataframe based on trackID
# id_df

In [5]:
# load AR CAT (from Nash et al. 2021)
filepath = path_to_out + 'AR-types_ALLDAYS.csv'
ar_cat = pd.read_csv(filepath)
ar_cat = ar_cat.rename(columns={'Unnamed: 0': 'date'})
ar_cat = ar_cat.set_index(pd.to_datetime(ar_cat['date']))
ar_cat = select_months_df(ar_cat, start_mon, end_mon)
ar_cat.index = ar_cat.index.strftime("%Y-%m-%d")
ar_cat = ar_cat.drop(columns=['date'])
ar_cat = ar_cat.reset_index()
idx = ar_cat['AR_CAT'] > 0
ar_cat = ar_cat.loc[idx]

# ar_cat

In [6]:
# merge id_df with ar_cat
merge_ar = pd.merge(id_df, ar_cat, how='outer', on='date')
track_ids = merge_ar.kidmap.unique() # get unique list of AR track IDs
ar_dates = merge_ar.time.unique() # get unique list of AR date/times (for later)
# merge_ar

In [7]:
# create df with trackID, ar_cat, start date, end date, and duration of AR (how long it is within HMA region)
ar = []
data = []
for i in [1, 2, 3]:
    idx = (merge_ar.AR_CAT == i)
    ar = merge_ar.loc[idx]

    for j, ids in enumerate(track_ids):
        idx = (ar.kidmap == ids)
        tmp = ar.loc[idx]
        start = pd.to_datetime(tmp.time.min())
        stop = pd.to_datetime(tmp.time.max()) + timedelta(hours=6)
        tmp = (stop - start)
        duration = tmp.total_seconds()/(3600) # convert to number of hours

        data.append([ids, i, start, stop, duration])
    
duration_df = pd.DataFrame(data, columns=['trackID', 'ar_cat', 'start_date', 'end_date', 'duration'])
duration_df = duration_df.dropna()
duration_df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration
1,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0
2,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0
3,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0
4,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0
5,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0
...,...,...,...,...,...
2367,114217.0,3,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0
2373,114363.0,3,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0
2374,114402.0,3,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0
2379,114602.0,3,2015-02-24 00:00:00,2015-02-27 06:00:00,78.0


### Landslide DF

In [8]:
def expand_grid(lat,lon):
    '''list all combinations of lats and lons using expand_grid(lat,lon)'''
    test = [(A,B) for A in lat for B in lon]
    test = np.array(test)
    test_lat = test[:,0]
    test_lon = test[:,1]
    full_grid = pd.DataFrame({'lat': test_lat, 'lon': test_lon})
    full_grid = full_grid.sort_values(by=['lat','lon'])
    full_grid = full_grid.reset_index(drop=True)
    return full_grid

In [9]:
fname = path_to_data + 'CH2_generated_data/Global_Landslide_Catalog_Export.csv' #TODO check this - is it the raw downloaded data?
landslide = pd.read_csv(fname)

# Select lat/lon grid
lonmin = 65
lonmax = 100
latmin = 20
latmax = 42

## Select Landslides within Southern Asia region
idx = (landslide.latitude >= latmin) & (landslide.latitude <= latmax) & (landslide.longitude >= lonmin) & (landslide.longitude <= lonmax)
landslide = landslide.loc[idx]
# set event time as index
landslide = landslide.set_index(pd.to_datetime(landslide.event_date))
# landslide.index = landslide.index.normalize()

# select only landslide dates that are between december and may
idx = (landslide.index.month >= 12) | (landslide.index.month <= 5)
landslide = landslide[idx]

# rename and reindex
landslide = landslide.rename(columns={"latitude": "lat", "longitude": "lon", "event_date": "event_time"})
landslide = landslide.reset_index()

# round event time to the nearest 6 hours
landslide['time'] = landslide['event_date'].dt.round('6H')
landslide = landslide.set_index(pd.to_datetime(landslide.time))

# select only landslide dates that are between december and may
idx = (landslide.index.month >= 12) | (landslide.index.month <= 5)
landslide = landslide[idx]

# landslide

In [10]:
# now we want to see if there is an AR present at the same time and location as the landslides
# open the trackID for ARs
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ar = xr.open_dataset(filename, engine='netcdf4')
ar = ar.squeeze()

# Select months
idx = (ar.time.dt.month >= 12) | (ar.time.dt.month <= 5)
kid = ar.kidmap.sel(time=idx) # trackID for indexing

# slice the dates so both ds match
kid = kid.sel(time=slice('1979-12-01 00', '2019-05-31 00:00'))
# kid

In [11]:
## for each landslide_id, if the lat/lon falls within an AR, keep that AR ID and landslide ID
landslideID = []
arID = []
landslide_lat = []
landslide_lon = []
for i, row in landslide.T.iteritems():
    t = kid.sel(lat=row['lat'], lon=row['lon'], time=row['time'], method='nearest').values
    # print(t)
    if t > 0:
        landslideID.append(row['event_id'])
        arID.append(t)
        landslide_lat.append(row['lat'])
        landslide_lon.append(row['lon'])
        
d = {'landslideID': landslideID, 'trackID': arID, 
     'landslide_lat': landslide_lat, 'landslide_lon': landslide_lon}
landslide_df = pd.DataFrame(data=d)
# convert the dtype for the trackID column
landslide_df = landslide_df.astype({'trackID': 'float64'})

# landslide_df

In [12]:
# merge AR duration df and landslide DF
merged_data = pd.merge(duration_df, landslide_df, how='outer', on='trackID')
# merged_data 
# note the rows that do not have a date or time 
# are landslides that are associated with a specific AR that was not considered a "HMA AR"

In [13]:
## test to make sure merged correctly
# idx = merged_data.landslideID > 0
# test = merged_data[idx]
# test

In [14]:
# drop the rows that are not a HMA AR
idx = merged_data['ar_cat'] > 0
merged_data = merged_data.loc[idx]
merged_data

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,landslideID,landslide_lat,landslide_lon
0,2861.0,1.0,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,,,
1,2861.0,2.0,1979-12-01 00:00:00,1979-12-02 00:00:00,24.0,,,
2,2871.0,1.0,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,,,
3,2871.0,2.0,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,,,
4,2975.0,1.0,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,,,
...,...,...,...,...,...,...,...,...
983,114196.0,3.0,2014-12-31 18:00:00,2015-01-03 12:00:00,66.0,,,
984,114217.0,3.0,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,,,
985,114363.0,3.0,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,,,
986,114402.0,3.0,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,,,


## load 2D WRF data

In [15]:
## pull wrflats and wrflons from first file
fname = path_to_data + 'wrf_hasia/d01/ivt/3hr/tmp_2015.nc'
tmp = xr.open_dataset(fname)
# print(tmp.time[:100])
# print(tmp.time[-100:])

## assign those lats to the other ds when you loop
wrflats = tmp.lat.values
wrflons = tmp.lon.values

fname = path_to_data + 'wrf_hasia/d02/prec/3hr/tmp_2014.nc'
tmp = xr.open_dataset(fname)
# print(tmp.time[:100])
# print(tmp.time[-100:])

## assign those lats to the other ds when you loop
wrflats2 = tmp.lat.values
wrflons2 = tmp.lon.values


In [16]:
%%time
def preprocess_ivt(ds):
    '''keep only the current year'''
    year = ds.time.dt.year.max().values
    ds = ds.assign_coords({"lon": wrflons, "lat": wrflats})
    if year == 1980:
        ds = ds
    else:
        ds = ds.sel(time=slice('{0}-01-01 00:00'.format(year), '{0}-12-31 21:00'.format(year)))
    return ds

def preprocess_prec(ds):
    '''keep only the current year'''
    year = ds.time.dt.year.max().values
    ds = ds.assign_coords({"lon": wrflons2, "lat": wrflats2})
    if year == 1980:
        ds = ds
    else:
        ds = ds.sel(time=slice('{0}-01-01 00:00'.format(year), '{0}-12-31 21:00'.format(year)))
    return ds

domains = ['d01', 'd02']
varname_lst = ['ivt', 'prec']

## loop through each ds
ds_lst = []
for i, (dom, varname) in enumerate(zip(domains, varname_lst)):
    print(varname)
    if server == 'great':
        data_path = path_to_data + 'wrf_hasia/'
    else:
        data_path = path_to_data + 'wrf_preprocessed_data/wrf_6km/'
        
    filename_pattern = '{0}/{1}/3hr/tmp_*.nc'.format(dom, varname)
    fname = data_path + filename_pattern
    
    if varname == 'ivt':
        ds = xr.open_mfdataset(fname, preprocess=preprocess_ivt)
        ds = ds.assign(ivt=lambda ds: np.sqrt(ds.ivtu**2 + ds.ivtv**2))
    elif varname == 'prec':
        ds = xr.open_mfdataset(fname, preprocess=preprocess_prec)
        ## shift subtraction to get mm per hour 
        # # rain at next time step - rain at current time step
        ds = ds.shift(time=-1) - ds # if in xarray
    elif varname == 'geopotential':
        ds = ds.sel(lev=250.)
    
    # subset to just ar days
    # ds = ds.sel(time = slice(start_date, end_date))
    # ds = select_months_ds(ds, start_mon, end_mon)
    ds = ds.sel(time = ar_dates[:-1])
    
    ds_lst.append(ds)
    
ivt = ds_lst[0]
prec = ds_lst[1]


ivt
prec
CPU times: user 1.9 s, sys: 114 ms, total: 2.02 s
Wall time: 2.11 s


In [17]:
# ## Having trouble with wrf ds not having all the dates in the ar_dates list 
# ## use this to find out which dates are having a problem
# # make a pandas dataframe of AR Dates
# d = {'dates': ar_dates[:-1]}
# df_A = pd.DataFrame(data=d)
# df_A = df_A.set_index(pd.to_datetime(df_A['dates'])) # reset the index as "dates"


# # make a pandas dataframe of WRF dates
# d = {'dates': ivt.time}
# df_B = pd.DataFrame(data=d)
# df_B = df_B.set_index(pd.to_datetime(df_B['dates'])) # reset the index as "dates"

# # test = df_A.isin(df_B)

# x = df_A.index
# y = df_B.index
# test = x.isin(y)

# idx = (test== False)
# df_A.loc[idx]


In [36]:
# latmin, latmax, lonmin, lonmax
ext1 = [71, 79, 32, 37] # Western precip anomalies
ext2 = [69, 74, 37, 40] # Northwestern precip anomalies
ext3 = [90, 99, 24, 30] # Eastern precip anomalies

region_name = ['western', 'northwestern', 'eastern']
domains = [ext1, ext2, ext3]

In [37]:
# %%time
# # make a ds for each subregion
# ds_lst = []
# for i, bnds in enumerate(domains):
#     tmp = ivt.sel(lat=slice(bnds[2], bnds[3]), lon=slice(bnds[0], bnds[1])) 
#     ds_lst.append(tmp)
# ds_lst

In [44]:
def ar_ivt(df, ds, domains):
    '''Calculate maximum IVT for a subregion in a ds and append to dataframe.
     For each range of AR event dates, we find the maximum IVT for the duration of the AR for every grid cell. 
    '''
    # the final IVT statistic to retain
    ivtdir_vals = []
    ivt_vals = []
    # loop through each AR track
    for i, (arcat, track) in enumerate(zip(df.ar_cat.values, df.trackID.values)):
        start = df.start_date.values[i]
        end = df.end_date.values[i]
        # print('Getting maximum between', start, end)
        print(i)
        # get bbox based on ar_cat
        bnds = domains[int(arcat)-1]
        # select only the time steps for AR event and specified domain
        tmp = ds.sel(time=slice(start, end), lat=slice(bnds[2], bnds[3]), lon=slice(bnds[0], bnds[1]))

        ### localized IVT maxima during event
        # event_max = tmp.where(tmp.ivt==tmp.ivt.max(), drop=True).squeeze()
        event_max = tmp.where(tmp.ivt==tmp.ivt.max(), drop=True).squeeze().load() # this was taking too long, decided to load earlier
        ## pull IVT and IVTDIR where ivt is max
        uvec = event_max.ivtu.values
        uvec = units.Quantity(uvec, "m/s")
        vvec = event_max.ivtv.values
        vvec = units.Quantity(vvec, "m/s")
        ivtdir = mpcalc.wind_direction(uvec, vvec)
        ivtdir_vals.append(ivtdir.item())
        ivt_vals.append(event_max.ivt.values.tolist())
        
    final = [ivtdir_vals, ivt_vals]
        
    return final

In [None]:
%%time
## For each row, calculate the maximum IVT within the region between start and end
ivt_final = ar_ivt(merged_data, ivt, domains)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [48]:
## attach data to existing df
merged_data['ivt'] = ivt_final[1]
merged_data['ivtdir'] = ivt_final[0]

# # Export dataframe as csv
# outfile = path_to_out + 'IVTDIR_IVT_new.csv'     
# merged_data.to_csv(outfile)

Unnamed: 0,ivt,ivtdir,trackID
0,152.087860,224.91021331187994 degree,2861.0
1,245.002441,220.23760859065467 degree,2861.0
2,108.576300,231.68037841051355 degree,2871.0
3,149.770879,237.0652245325259 degree,2871.0
4,69.228194,270.4320425315212 degree,2975.0
...,...,...,...
983,340.884138,240.43672607892813 degree,114196.0
984,375.217412,250.3772879757895 degree,114217.0
985,61.715181,188.15285286031184 degree,114363.0
986,265.416884,277.25970342681023 degree,114402.0


In [54]:
def ar_precip(df, ds, domains, mode):
    '''Calculate precipitation statistics for a subregion in a ds and append to dataframe.
     Mode is chosen based on calculation. For each range of AR event dates, we calculate the total accumulated precip for every grid cell. 
     Then we remove all gridcells that had less than 1 mm of rain per event (these are not included in any calc)
     Then we weight the gridcells by the cosine of the latitude.
     Then based on mode selected, different statistics are retained:
         'mean-total' averages all viable gridcells within the subregion and retains this number
         'max-total' selects the maximum gridcell value to append
         'percentile-total' calcuates the 95th percentile and then averages all the grid cells that exceed this threshold
    '''
    # the final precip statistic to retain
    m1_vals = []

    for i, (arcat, track) in enumerate(zip(df.ar_cat.values, df.trackID.values)):
        start = df.start_date.values[i]
        end = df.end_date.values[i]
        # print('Getting maximum between', start, end)
        print(i)
        # get bbox based on ar_cat
        bnds = domains[int(arcat)-1]
        # select only the time steps for AR event and specified domain
        tmp = ds.sel(time=slice(start, end), lat=slice(bnds[2], bnds[3]), lon=slice(bnds[0], bnds[1]))

        ### event-total precipitation per event for every grid cell
        tmp = tmp.sum('time')
        ### mask out grid cells with less than 1 mm per event
        tmp2 = xr.where(cond=(tmp.prec > 1), x=tmp.prec, y=np.nan)

        ### area weighted
        # tmp = tmp2.weighted(tmp.weights)

        if mode == 'mean-total':
            ## mode 1: mean-total
            # average over gridcells in weighted subregion
            mean_tot = tmp.mean(['lat', 'lon'], skipna=True)
            # append to list
            m1_vals.append(mean_tot.values.tolist())
        elif mode == 'max-total':
            ## mode 2: max-total
            ### localized precip maxima during event
            event_max = tmp2.max(['lat', 'lon'])
            m1_vals.append(event_max.values.tolist())
        elif mode == 'percentile-total':
            ## mode 3: percentile-total
            ###  get 95th percentile thres
            q_thres = tmp2.quantile(0.95, dim=['lat', 'lon'], interpolation='linear')
            ## mask out grid cells below threshold
            perc_prec = xr.where(cond=(tmp2 > q_thres), x=tmp2, y=np.nan)
            # average over all grid cells skipping nans
            mean = perc_prec.mean(['lat', 'lon'], skipna=True)
            m1_vals.append(mean.values.tolist())

        
    return m1_vals

In [55]:
%%time
## For each row, calculate the maximum IVT within the region between start and end
prec_final = ar_precip(merged_data, prec, domains, 'max-total')



0
1
2
3
4


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


5
6
7
8
9


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


10
11
12
13
14
15
16
17
18
19
20
21


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


46
47
48
49


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


71


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


98


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


99
100
101
102
103
104
105
106
107
108


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


133
134
135
136
137
138


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


139
140


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


141
142


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


143
144
145
146


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


147
148


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


149
150
151
152
153
154
155
156
157
158


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


159
160
161
162
163
164
165


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


166
167
168
169
170
171
172
173
174
175


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


176
177


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


178
179


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


180


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


181


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


182
183
184
185
186


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


187
188
189
190
191
192
193
194
195
196


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


197
198
199
200
201
202
203


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


204


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


205


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


206
207
208
209
210
211
212
213
214


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


215
216
217


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


218


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


219
220
221
222
223
224
225
226
227
228
229


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


230
231
232


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


233


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


234
235


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


236


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


262
263


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


264
265
266
267
268
269
270
271


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


272
273
274
275
276
277
278
279
280
281


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


282
283
284
285
286
287
288
289


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


309
310
311


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


312


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


313
314
315
316
317
318
319
320
321


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


322
323
324
325
326
327
328


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


355
356


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


357


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


358


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


359
360
361
362


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


363
364
365
366
367
368
369
370
371
372
373
374
375


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


376
377


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


378
379
380
381


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


382
383
384
385
386
387
388


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


389
390
391
392
393
394
395
396


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


397
398
399
400
401
402


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


403
404


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


405
406
407
408
409
410
411
412
413


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


414
415
416
417
418
419


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


420
421
422


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


423


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


460
461
462
463
464
465
466
467
468


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


469


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


470
471
472
473
474


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


475
476
477
478
479
480
481
482
483
484


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


485
486


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


514
515
516


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


517


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


518
519
520
521
522
523
524
525
526
527
528
529
530
531
532


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


533
534


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


535


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


536
537
538
539
540
541
542
543
544
545
546
547


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


548
549
550
551
552
553


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


554
555
556
557
558


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


559
560
561
562
563
564
565
566
567
568
569


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


570
571
572
573
574


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


575
576


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


577
578
579
580
581
582
583
584


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


585
586
587
588
589


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


590
591


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


592
593
594
595
596
597
598
599
600
601


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


619
620
621
622
623
624


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


625
626
627
628
629


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


630
631
632
633
634
635
636
637
638
639
640


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


661
662


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


663


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


664


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


665
666
667
668
669
670
671


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


672


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


673
674
675
676
677
678


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


679
680


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


681
682
683
684
685
686
687
688
689


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


690
691
692
693
694
695
696
697
698
699


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


700


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


701
702
703


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


704


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


705
706


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


707
708


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


709
710


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


711


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


712
713
714
715
716
717


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


718


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


719


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


720


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


721
722
723
724
725
726


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


727
728
729
730
731
732
733
734
735
736
737
738
739
740


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


741


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


742
743
744
745
746


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


747
748
749
750


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


751


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


752
753
754


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


755


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


756
757
758


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


759


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


760


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


761


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


762


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


763
764
765
766
767
768


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


769
770
771


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


772
773
775
776
777
778


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


779
780


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


781
782
783
784
785
786
787


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


788
789
790
791


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


792


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


793
794
795
796
797
798
799
800
801
802
803


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


804
805


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


806
807


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


808
809
810
811
812
813
814
815
816
817


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


818


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


819
820
821
822
823


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


841


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


842
843
844
845


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


846


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


847
848
849
850
851
852


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


853
854
855
856
857
858
859


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


860
861


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


862


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


863


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


864
865
866


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


867
868
869


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


870
871


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


872
873


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


874


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


875


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


876


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


877
878


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


879


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


880
881
882
883
884
885
886
887
888
889


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


890


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


891
892


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


893


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


894
895


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


896


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


897
898
899


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


900
901


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


902


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


903
904


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


905


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


906


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


907
908
909
910
911
912
913
914
915
916
917


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


918


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


919
920
921
922


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


923
924


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


925


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


926


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


927


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


928


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


929
930
931
932


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


933


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


934


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


935


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


936


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


937


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


938


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


939
940
941
942
943


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


944


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


945
946


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


947
948
949
950


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


951
952


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


953
954
955
956
957
958
959
960
961


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


962
963


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


964


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


965


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


966


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


967


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


968
969


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


970


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


971


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


972


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


973
974
975


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


976


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


977


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


978
979
980


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


981


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


982
983
984
985


  return func(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))


986
987
CPU times: user 37min 40s, sys: 1h 38min 52s, total: 2h 16min 32s
Wall time: 2h 22min 59s


In [61]:
merged_data['prec'] = prec_final

# # Export dataframe as csv
# outfile = path_to_out + 'IVTDIR_IVT_prec.csv'     
# merged_data.to_csv(outfile)

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,landslideID,landslide_lat,landslide_lon,prec,ivt,ivtdir
0,2861.0,1.0,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,,,,13.718994,152.087860,224.91021331187994 degree
1,2861.0,2.0,1979-12-01 00:00:00,1979-12-02 00:00:00,24.0,,,,17.533569,245.002441,220.23760859065467 degree
2,2871.0,1.0,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,,,,5.676636,108.576300,231.68037841051355 degree
3,2871.0,2.0,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,,,,8.985474,149.770879,237.0652245325259 degree
4,2975.0,1.0,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,,,,,69.228194,270.4320425315212 degree
...,...,...,...,...,...,...,...,...,...,...,...
983,114196.0,3.0,2014-12-31 18:00:00,2015-01-03 12:00:00,66.0,,,,4.890137,340.884138,240.43672607892813 degree
984,114217.0,3.0,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,,,,4.515137,375.217412,250.3772879757895 degree
985,114363.0,3.0,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,,,,,61.715181,188.15285286031184 degree
986,114402.0,3.0,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,,,,48.576172,265.416884,277.25970342681023 degree


In [66]:
%%time
varname = 'zerodegisotherm'
domain = 'd01'

filename_pattern = path_to_data + 'wrf_hasia/{0}/{1}/daily/out.wrf6km.{1}.daily_*.nc'.format(domain, varname)
print(filename_pattern)
ds = xr.open_mfdataset(filename_pattern)

# Trim date range
idx = slice(start_date, end_date)
ds = ds.sel(time=idx)

# select only months we are interested in
ds = select_months_ds(ds, start_mon, end_mon)

ds

/home/nash/DATA/data/wrf_hasia/d01/zerodegisotherm/daily/out.wrf6km.zerodegisotherm.daily_*.nc
CPU times: user 480 ms, sys: 41.3 ms, total: 521 ms
Wall time: 518 ms


Unnamed: 0,Array,Chunk
Bytes,838.13 MB,23.47 MB
Shape,"(3249, 249, 259)","(91, 249, 259)"
Count,185 Tasks,37 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 838.13 MB 23.47 MB Shape (3249, 249, 259) (91, 249, 259) Count 185 Tasks 37 Chunks Type float32 numpy.ndarray",259  249  3249,

Unnamed: 0,Array,Chunk
Bytes,838.13 MB,23.47 MB
Shape,"(3249, 249, 259)","(91, 249, 259)"
Count,185 Tasks,37 Chunks
Type,float32,numpy.ndarray


In [68]:
## load filtered annual climatology and std
clim_std = xr.open_dataset(path_to_data + 'wrf_hasia/d01/zerodegisotherm/daily_std_clim_zerodegisotherm.nc')
clim_mean = xr.open_dataset(path_to_data + 'wrf_hasia/d01/zerodegisotherm/filtered_daily_mean_clim_zerodegisotherm.nc')

## Calculate Anomalies
anomalies = ds.groupby('time.dayofyear') - clim_mean

In [78]:
# normalize AR dates
## get normalized start date for each row in the df - need this to create subset list of freezing level days
df = merged_data.rename(columns={'start_date': 'date'})
df = df.set_index(pd.to_datetime(df['date']))
df = select_months_df(df, start_mon, end_mon)
df.index = df.index.strftime("%Y-%m-%d")
df = df.rename(columns={'date': 'start_date'})
df = df.reset_index()
df

Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,landslideID,landslide_lat,landslide_lon,prec,ivt,ivtdir
0,1979-12-02,2861.0,1.0,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,,,,13.718994,152.087860,224.91021331187994 degree
1,1979-12-01,2861.0,2.0,1979-12-01 00:00:00,1979-12-02 00:00:00,24.0,,,,17.533569,245.002441,220.23760859065467 degree
2,1979-12-09,2871.0,1.0,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,,,,5.676636,108.576300,231.68037841051355 degree
3,1979-12-08,2871.0,2.0,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,,,,8.985474,149.770879,237.0652245325259 degree
4,1979-12-16,2975.0,1.0,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,,,,,69.228194,270.4320425315212 degree
...,...,...,...,...,...,...,...,...,...,...,...,...
983,2014-12-31,114196.0,3.0,2014-12-31 18:00:00,2015-01-03 12:00:00,66.0,,,,4.890137,340.884138,240.43672607892813 degree
984,2015-01-03,114217.0,3.0,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,,,,4.515137,375.217412,250.3772879757895 degree
985,2015-01-21,114363.0,3.0,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,,,,,61.715181,188.15285286031184 degree
986,2015-01-29,114402.0,3.0,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,,,,48.576172,265.416884,277.25970342681023 degree


In [79]:
# get list of dates that ar is present
ar_dates = pd.to_datetime(df['date']).values
# subset freezing level to just ar days
anomalies = anomalies.sel(time = ar_dates)
anomalies

Unnamed: 0,Array,Chunk
Bytes,509.74 MB,2.58 MB
Shape,"(988, 249, 259)","(5, 249, 259)"
Count,14190 Tasks,917 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 509.74 MB 2.58 MB Shape (988, 249, 259) (5, 249, 259) Count 14190 Tasks 917 Chunks Type float64 numpy.ndarray",259  249  988,

Unnamed: 0,Array,Chunk
Bytes,509.74 MB,2.58 MB
Shape,"(988, 249, 259)","(5, 249, 259)"
Count,14190 Tasks,917 Chunks
Type,float64,numpy.ndarray


In [80]:
%%time
## Calculate low freezing (x - mean < - 1.5*std)
low_freezing = anomalies.where(anomalies.z.groupby('time.dayofyear') < clim_std.z*-1.)
## Calculate high freezing (x - mean > 1.5*std)
high_freezing = anomalies.where(anomalies.z.groupby('time.dayofyear') > clim_std.z*1.)

CPU times: user 2.31 s, sys: 575 ms, total: 2.89 s
Wall time: 3.43 s


In [81]:
%%time
# make a ds for each subregion
ds_low = []
ds_high = []
for i, dom in enumerate(domains):
    tmp = low_freezing.sel(lon=slice(dom[0], dom[1]), lat=slice(dom[2], dom[3]))
    ds_low.append(tmp.load())
    tmp = high_freezing.sel(lon=slice(dom[0], dom[1]), lat=slice(dom[2], dom[3]))
    ds_high.append(tmp.load())
ds_low

CPU times: user 1min 24s, sys: 59.1 s, total: 2min 23s
Wall time: 1min 56s


[<xarray.Dataset>
 Dimensions:    (time: 988, lat: 29, lon: 39)
 Coordinates:
   * time       (time) datetime64[ns] 1979-12-02 1979-12-01 ... 2015-02-27
   * lat        (lat) float32 32.08 32.25 32.43 32.6 ... 36.37 36.54 36.7 36.87
   * lon        (lon) float32 71.01 71.21 71.42 71.63 ... 78.28 78.48 78.69 78.9
     dayofyear  (time) int64 336 335 343 342 350 351 355 ... 348 365 3 21 29 58
 Data variables:
     z          (time, lat, lon) float64 nan nan nan nan nan ... nan nan nan nan,
 <xarray.Dataset>
 Dimensions:    (time: 988, lat: 19, lon: 24)
 Coordinates:
   * time       (time) datetime64[ns] 1979-12-02 1979-12-01 ... 2015-02-27
   * lat        (lat) float32 37.04 37.2 37.37 37.53 ... 39.48 39.64 39.8 39.96
   * lon        (lon) float32 69.14 69.34 69.55 69.76 ... 73.29 73.5 73.71 73.91
     dayofyear  (time) int64 336 335 343 342 350 351 355 ... 348 365 3 21 29 58
 Data variables:
     z          (time, lat, lon) float64 nan nan nan nan nan ... nan nan nan nan,
 <xarray.Datas

In [82]:
%%time
## this version takes the average value in the subregion
for i, region in enumerate(region_name):
    ## compute low freezing level
    x = ds_low[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)
    
    # calculate mean, skipping nans
    low = np.nanmean(x, axis=1)
    colname = region + '_low'
    df[colname] = low

    ## compute high freezing level
    x = ds_high[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # count number of True for each time step
    high = np.nanmean(x, axis=1)
    colname = region + '_high'
    df[colname] = high


df

CPU times: user 127 ms, sys: 18.8 ms, total: 146 ms
Wall time: 156 ms


  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,landslideID,landslide_lat,landslide_lon,prec,ivt,ivtdir,western_low,western_high,northwestern_low,northwestern_high,eastern_low,eastern_high
0,1979-12-02,2861.0,1.0,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,,,,13.718994,152.087860,224.91021331187994 degree,-171.727932,410.474576,-277.262466,254.583280,-556.359117,243.805053
1,1979-12-01,2861.0,2.0,1979-12-01 00:00:00,1979-12-02 00:00:00,24.0,,,,17.533569,245.002441,220.23760859065467 degree,,538.542225,,550.066162,-687.720702,324.721934
2,1979-12-09,2871.0,1.0,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,,,,5.676636,108.576300,231.68037841051355 degree,,570.413285,,264.630210,-638.594000,186.723207
3,1979-12-08,2871.0,2.0,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,,,,8.985474,149.770879,237.0652245325259 degree,-264.049377,562.519768,,525.196476,-593.690705,217.179775
4,1979-12-16,2975.0,1.0,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,,,,,69.228194,270.4320425315212 degree,-232.334706,151.812409,,528.596253,-659.495027,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983,2014-12-31,114196.0,3.0,2014-12-31 18:00:00,2015-01-03 12:00:00,66.0,,,,4.890137,340.884138,240.43672607892813 degree,-181.423361,,,,-153.051697,620.529686
984,2015-01-03,114217.0,3.0,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,,,,4.515137,375.217412,250.3772879757895 degree,-266.148849,,-270.614868,,-153.740778,740.600011
985,2015-01-21,114363.0,3.0,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,,,,,61.715181,188.15285286031184 degree,-535.461933,,-215.308616,,-140.994532,221.152453
986,2015-01-29,114402.0,3.0,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,,,,48.576172,265.416884,277.25970342681023 degree,-658.230455,,-452.952213,,-149.184701,


In [83]:
%%time
for i, region in enumerate(region_name):
    ## compute low freezing level
    x = ds_low[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # mark True if value is not nan
    a = ~np.isnan(x)
    # # mark True if any value for each time step is True (aka not nan)
    # z = np.any(a, axis=1)
    
    # count number of True for each time step
    low = np.count_nonzero(a, axis=1)
    df['low'] = low

    ## compute high freezing level
    x = ds_high[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # mark True if value is not nan
    a = ~np.isnan(x)
    # # mark True if any value for each time step is True (aka not nan)
    # z = np.any(a, axis=1)
    
    # count number of True for each time step
    high = np.count_nonzero(a, axis=1)
    df['high'] = high
    
    colname = region + '_freeze'
    df[colname] = 0
    df.loc[df['low'] > df['high'], colname] = -1
    df.loc[df['low'] < df['high'], colname] = 1
    
    # drop low and high columns
    df = df.drop(columns=['low', 'high'])


df

CPU times: user 52.9 ms, sys: 5.34 ms, total: 58.2 ms
Wall time: 55 ms


Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,landslideID,landslide_lat,landslide_lon,prec,...,ivtdir,western_low,western_high,northwestern_low,northwestern_high,eastern_low,eastern_high,western_freeze,northwestern_freeze,eastern_freeze
0,1979-12-02,2861.0,1.0,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,,,,13.718994,...,224.91021331187994 degree,-171.727932,410.474576,-277.262466,254.583280,-556.359117,243.805053,1,1,-1
1,1979-12-01,2861.0,2.0,1979-12-01 00:00:00,1979-12-02 00:00:00,24.0,,,,17.533569,...,220.23760859065467 degree,,538.542225,,550.066162,-687.720702,324.721934,1,1,-1
2,1979-12-09,2871.0,1.0,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,,,,5.676636,...,231.68037841051355 degree,,570.413285,,264.630210,-638.594000,186.723207,1,1,-1
3,1979-12-08,2871.0,2.0,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,,,,8.985474,...,237.0652245325259 degree,-264.049377,562.519768,,525.196476,-593.690705,217.179775,1,1,-1
4,1979-12-16,2975.0,1.0,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,,,,,...,270.4320425315212 degree,-232.334706,151.812409,,528.596253,-659.495027,,1,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983,2014-12-31,114196.0,3.0,2014-12-31 18:00:00,2015-01-03 12:00:00,66.0,,,,4.890137,...,240.43672607892813 degree,-181.423361,,,,-153.051697,620.529686,-1,0,1
984,2015-01-03,114217.0,3.0,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,,,,4.515137,...,250.3772879757895 degree,-266.148849,,-270.614868,,-153.740778,740.600011,-1,-1,1
985,2015-01-21,114363.0,3.0,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,,,,,...,188.15285286031184 degree,-535.461933,,-215.308616,,-140.994532,221.152453,-1,-1,1
986,2015-01-29,114402.0,3.0,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,,,,48.576172,...,277.25970342681023 degree,-658.230455,,-452.952213,,-149.184701,,-1,-1,-1


In [84]:
# Export dataframes as csv
df.to_csv(path_to_out + 'DJF_ivt_ar_types_freezing_level_max_prec_new.csv')