### Import

In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr


# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules


In [2]:
# Set up paths

path_to_data = '/home/nash/DATA/data/'                                      # project data -- read only
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '/home/nash/DATA/repositories/HASIA_ch2_analysis/figs/'      # figures

In [3]:
## vars to update with each iteration
# data source can equal 'ERA5', 'WRF6km', or 'GPM'
data_src = 'ERA5'

### AR Type Data with IVT and landslide info

In [4]:
filepath = path_to_out + 'DJFMAM_ivt_ar_types.csv'
df = pd.read_csv(filepath)
df = df.drop(columns=['Unnamed: 0'])
df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivt,hlat,hlon,tlat,tlon,ivtdir,landslideID,landslide_lat,landslide_lon
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,
1,2861.0,2,1979-12-01 12:00:00,1979-12-02 00:00:00,12.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,
2,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,
3,2871.0,2,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,
4,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,178.765606,40.000000,64.000000,24.000000,58.500000,45.690158,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2722,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,156.680465,40.500000,102.000000,28.500000,94.500000,13.649099,,,
2723,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,229.579773,30.000000,100.500000,28.500000,84.000000,66.230859,,,
2724,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,408.649579,24.000000,100.500000,21.000000,82.500000,81.632143,,,
2725,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,388.614504,44.307692,140.653846,24.115385,113.653846,44.724359,,,


### Subregions to calculate precipitation

In [5]:
# latmin, latmax, lonmin, lonmax
ext1 = [69, 74, 37, 40] # Northwestern precip anomalies
ext2 = [71, 79, 32, 37] # Western precip anomalies
ext3 = [90, 99, 24, 30] # Eastern precip anomalies
ext4 = [48, 53, 30, 35] # zagros mountains precip anomalies
region_name = ['northwestern', 'western', 'eastern', 'zagros']
domains = [ext1, ext2, ext3, ext4]

# domain extent info 
# [xmin, ymin]
sr_xy = []
sr_width = []
sr_height = []

for i, d in enumerate(domains):
    sr_xy.append([d[0], d[2]])
    sr_width.append((d[1] - d[0]))
    sr_height.append((d[3]- d[2]))

print(sr_xy)
print(sr_width)
print(sr_height)


[[69, 37], [71, 32], [90, 24], [48, 30]]
[5, 8, 9, 5]
[3, 5, 6, 5]


## ERA5 Precip

In [6]:
%%time
rename_dict_prec = {'mtpr': 'prec', 
                    'latitude': 'lat',
                    'longitude': 'lon'}

filepath_pattern = path_to_data + 'ERA5/prec/6hr/era5_hma_025dg_6hr_prec_*.nc'
ds = xr.open_mfdataset(filepath_pattern, combine='by_coords')
ds = ds.rename(rename_dict_prec)
ds['time'] = ds.indexes['time'].normalize()
ds = ds.assign(prec=lambda ds: ds.prec*(60*60*6)) # convert to mm accumulated per 6-hours
# ds = ds.sel(lat=slice(40,24), lon=slice(66, 100))

# Select months
idx = (ds.time.dt.month >= 12) | (ds.time.dt.month <= 5)
ds = ds.sel(time=idx)

## create weights for area averaging
## for a rectangular grid the cosine of the latitude is proportional to the grid cell area.
weights = np.cos(np.deg2rad(ds.lat))
weights.name = "weights"
# add weights to ds
ds = ds.assign(weights=lambda ds: weights)



CPU times: user 677 ms, sys: 121 ms, total: 798 ms
Wall time: 3.62 s


**TODO: Add WRF and GPM precip capabilities**

## Get precip within each subregion for each ds

In [7]:
%%time
# make a ds for each subregion
ds_lst = []
for i, bnds in enumerate(domains):
    tmp = ds.sel(lat=slice(bnds[3], bnds[2]), lon=slice(bnds[0], bnds[1]))
    ds_lst.append(tmp.load())
ds_lst

CPU times: user 10min 7s, sys: 14min 22s, total: 24min 30s
Wall time: 5min 47s


[<xarray.Dataset>
 Dimensions:  (lon: 21, lat: 13, time: 29886)
 Coordinates:
   * lon      (lon) float32 69.0 69.25 69.5 69.75 70.0 ... 73.25 73.5 73.75 74.0
   * lat      (lat) float32 40.0 39.75 39.5 39.25 39.0 ... 37.75 37.5 37.25 37.0
   * time     (time) datetime64[ns] 1979-01-01 1979-01-01 ... 2019-12-31
 Data variables:
     prec     (time, lat, lon) float32 -2.012e-05 -2.012e-05 ... 0.2295 0.07278
     weights  (lat) float32 0.766 0.7688 0.7716 0.7744 ... 0.7934 0.796 0.7986
 Attributes:
     Conventions:  CF-1.6
     history:      2020-05-26 21:40:46 GMT by grib_to_netcdf-2.16.0: /opt/ecmw...,
 <xarray.Dataset>
 Dimensions:  (lon: 33, lat: 21, time: 29886)
 Coordinates:
   * lon      (lon) float32 71.0 71.25 71.5 71.75 72.0 ... 78.25 78.5 78.75 79.0
   * lat      (lat) float32 37.0 36.75 36.5 36.25 36.0 ... 32.75 32.5 32.25 32.0
   * time     (time) datetime64[ns] 1979-01-01 1979-01-01 ... 2019-12-31
 Data variables:
     prec     (time, lat, lon) float32 -2.012e-05 0.005371 

In [8]:
def ar_precip(df, ds_lst):
    '''Calculate precipitation statistics for a subregion in a ds and append to dataframe.
     Mode is chosen based on calculation. For each range of AR event dates, we calculate the total accumulated precip for every grid cell. 
     Then we remove all gridcells that had less than 1 mm of rain per event (these are not included in any calc)
     Then we weight the gridcells by the cosine of the latitude.
     Then based on mode selected, different statistics are retained:
         'mean-total' averages all viable gridcells within the subregion and retains this number
         'max-total' selects the maximum gridcell value to append
         'percentile-total' calcuates the 95th percentile and then averages all the grid cells that exceed this threshold
    '''
    # the final precip statistic to retain
    final1 = []
    final2 = []
    final3 = []

    for k, ds1 in enumerate(ds_lst):
        print('loop', k, 'of', len(ds_lst))
        m1_vals = []
        m2_vals = []
        m3_vals = []
        for i, track in enumerate(df.trackID.values):
            start = df.start_date.values[i]
            end = df.end_date.values[i]

            idx = slice(start, end)
            tmp = ds1.sel(time=idx)

            ### event-total precipitation per event for every grid cell
            tmp = tmp.sum('time')
            ### mask out grid cells with less than 1 mm per event
            tmp2 = xr.where(cond=(tmp.prec > 1), x=tmp.prec, y=np.nan)
            
            ### area weighted
            tmp = tmp2.weighted(tmp.weights)
            
            ## mode 1: mean-total
            # average over gridcells in subregion
            mean_tot = tmp.mean(['lat', 'lon'], skipna=True)
            # append to list
            m1_vals.append(mean_tot.values.tolist())
                
            ## mode 2: max-total
            ### localized precip maxima during event
            event_max = tmp2.max(['lat', 'lon'])
            m2_vals.append(event_max.values.tolist())
                
            ## mode 3: percentile-total
            ###  get 95th percentile thres
            q_thres = tmp2.quantile(0.95, dim=['lat', 'lon'], interpolation='linear')
            ## mask out grid cells below threshold
            perc_prec = xr.where(cond=(tmp2 > q_thres), x=tmp2, y=np.nan)
            # average over all grid cells skipping nans
            mean = perc_prec.mean(['lat', 'lon'], skipna=True)
            m3_vals.append(mean.values.tolist())

        final1.append(m1_vals)
        final2.append(m2_vals)
        final3.append(m3_vals)
    final = [final1, final2, final3]
        
    return final

In [9]:
%%time
prec_final = ar_precip(df, ds_lst)
print(len(prec_final))
    


loop 0 of 4


  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interp

loop 1 of 4


  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interp

loop 2 of 4


  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interp

loop 3 of 4


  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interp

3
CPU times: user 3min 17s, sys: 3.54 s, total: 3min 21s
Wall time: 3min 18s


  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation


In [10]:
df_mean = df.copy()
df_max = df.copy()
df_perc = df.copy()

for i, d in enumerate(domains):
    df_mean[region_name[i]] = prec_final[0][i]
    df_max[region_name[i]] = prec_final[1][i]
    df_perc[region_name[i]] = prec_final[2][i]
    
df_max

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivt,hlat,hlon,tlat,tlon,ivtdir,landslideID,landslide_lat,landslide_lon,northwestern,western,eastern,zagros
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,,33.108181,19.348969,98.042885,
1,2861.0,2,1979-12-01 12:00:00,1979-12-02 00:00:00,12.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,,33.108181,19.348969,98.042885,
2,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,,,,,
3,2871.0,2,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,,27.518328,3.974229,32.871208,
4,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,178.765606,40.000000,64.000000,24.000000,58.500000,45.690158,,,,11.874307,6.085221,40.997482,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2722,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,156.680465,40.500000,102.000000,28.500000,94.500000,13.649099,,,,,,,
2723,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,229.579773,30.000000,100.500000,28.500000,84.000000,66.230859,,,,,,,
2724,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,408.649579,24.000000,100.500000,21.000000,82.500000,81.632143,,,,18.653128,43.061131,120.298950,6.096446
2725,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,388.614504,44.307692,140.653846,24.115385,113.653846,44.724359,,,,42.607674,19.890316,209.371338,11.868172


### Export to CSV

In [11]:
# Export dataframes as csv
df_mean.to_csv(path_to_out + 'DJFMAM_ivt_ar_types_' + data_src + '_prec_mean.csv')
df_max.to_csv(path_to_out + 'DJFMAM_ivt_ar_types_' + data_src + '_prec_max.csv')
df_perc.to_csv(path_to_out + 'DJFMAM_ivt_ar_types_' + data_src + '_prec_perc.csv')