### Import

In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr


# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules


In [2]:
# Set up paths

path_to_data = '/home/nash/DATA/data/'                                      # project data -- read only
path_to_out  = '/home/nash/DATA/repositories/HASIA_ch2_analysis/out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '/home/nash/DATA/repositories/HASIA_ch2_analysis/figs/'      # figures

### AR Type Data

In [3]:
# filepath = path_to_out + 'DJFMAM_ARS_TRACKID_daily.csv'
filepath = path_to_out + 'HMAAR_duration_DJFMAM.csv' # just dates in HMA
# filepath = path_to_out + 'HMA_AR_duration.csv' # dates based on AR Catalog
df = pd.read_csv(filepath)
df = df.drop(columns=['Unnamed: 0'])
df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0
1,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0
2,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0
3,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0
4,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0
...,...,...,...,...,...
2684,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0
2685,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0
2686,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0
2687,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0


### AR Data

In [4]:
## duration of ARs based on trackID
# for each AR Track ID, find the duration of that track
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ds = xr.open_dataset(filename, engine='netcdf4')
ds = ds.squeeze()

# Select months
idx = (ds.time.dt.month >= 12) | (ds.time.dt.month <= 5)
kivty = ds.kivty.sel(time=idx) # ivty for each trackID
kivtx = ds.kivtx.sel(time=idx) # ivtx for each trackID
kid = ds.kid.sel(time=idx) # trackID for indexing
hlat = ds.hlat.sel(time=idx) # head latitude of each trackID
hlon = ds.hlon.sel(time=idx) # head longitude of each trackID
tlat = ds.tlat.sel(time=idx) # tail latitude of each trackID
tlon = ds.tlon.sel(time=idx) # tail longitude of each trackID


In [5]:
%%time
# append ivty and ivtx to df for each trackID
ivty = []
ivtx = []
trID = []
trLAT = []
trLON = []
tlLAT = []
tlLON = []

for i, track in enumerate(df.trackID.values):
    idx = kivtx.where(kid == track, drop=True).values
    x = idx[~np.isnan(idx)][0]
    idy = kivty.where(kid == track, drop=True).values
    y = idy[~np.isnan(idy)][0]
    
    # append to lists
    ivtx.append(x)
    ivty.append(y)
    trID.append(track)
    
    # head lat and lon of each AR
    idlat = hlat.where(kid == track, drop=True).values
    trLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = hlon.where(kid == track, drop=True).values
    trLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())
    
    # tail lat and lon of each AR
    idlat = tlat.where(kid == track, drop=True).values
    tlLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = tlon.where(kid == track, drop=True).values
    tlLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())

CPU times: user 11min 1s, sys: 1.01 s, total: 11min 2s
Wall time: 11min 12s


In [6]:
# put collected data in df
df['ivty'] = ivty
df['ivtx'] = ivtx
df['ivt'] = np.sqrt(df.ivty**2 + df.ivtx**2)
df['hlat'] = trLAT
df['hlon'] = trLON
df['tlat'] = tlLAT
df['tlon'] = tlLON
df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivty,ivtx,ivt,hlat,hlon,tlat,tlon
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,84.715817,156.934744,178.340359,54.000000,88.950000,33.300000,54.750000
1,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,140.456836,255.515681,291.575695,58.855263,137.842105,37.657895,165.789474
2,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,123.568145,129.182257,178.765606,40.000000,64.000000,24.000000,58.500000
3,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,118.158087,138.478370,182.037338,33.600000,56.800000,16.600000,45.300000
4,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,95.302288,105.452260,142.136221,28.500000,69.000000,19.500000,49.500000
...,...,...,...,...,...,...,...,...,...,...,...,...
2684,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,152.255674,36.972662,156.680465,40.500000,102.000000,28.500000,94.500000
2685,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,92.532689,210.106101,229.579773,30.000000,100.500000,28.500000,84.000000
2686,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,59.469964,404.299149,408.649579,24.000000,100.500000,21.000000,82.500000
2687,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,269.018570,280.446504,388.614504,44.307692,140.653846,24.115385,113.653846


### Subregions to calculate precipitation

In [7]:
# latmin, latmax, lonmin, lonmax
ext1 = [69, 74, 37, 40] # Northwestern precip anomalies
ext2 = [71, 79, 32, 37] # Western precip anomalies
ext3 = [90, 99, 24, 30] # Eastern precip anomalies
ext4 = [48, 53, 30, 35] # zagros mountains precip anomalies
region_name = ['northwestern', 'western', 'eastern', 'zagros']
domains = [ext1, ext2, ext3, ext4]

# domain extent info 
# [xmin, ymin]
sr_xy = []
sr_width = []
sr_height = []

for i, d in enumerate(domains):
    sr_xy.append([d[0], d[2]])
    sr_width.append((d[1] - d[0]))
    sr_height.append((d[3]- d[2]))

print(sr_xy)
print(sr_width)
print(sr_height)


[[69, 37], [71, 32], [90, 24], [48, 30]]
[5, 8, 9, 5]
[3, 5, 6, 5]


### Link Landslide to AR DF

In [8]:
## link up landslide AR df with above
fname = path_to_out + 'djfmam_landslide_ars.csv'
landslide = pd.read_csv(fname)

ar_trackID = df.trackID
landslide_tracks = landslide.kidmap.values
landslide_lats = landslide.latitude.values
landslide_lons = landslide.longitude.values
arr_allDays = np.zeros(len(ar_trackID), dtype=int)
arr_latitude = np.zeros(len(ar_trackID), dtype=float)
arr_longitude = np.zeros(len(ar_trackID), dtype=float)

# Loop over ar days and match to ar_full 
for i, (track, lat, lon) in enumerate(zip(landslide_tracks, landslide_lats, landslide_lons)):
    idx = np.where(ar_trackID == track)
    arr_allDays[idx] = 1
    # add in landslide lat and lon
    arr_latitude[idx] = lat
    arr_longitude[idx] = lon
df['landslide'] = arr_allDays
df['landslide_lat'] = arr_latitude
df['landslide_lon'] = arr_longitude
df


Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivty,ivtx,ivt,hlat,hlon,tlat,tlon,landslide,landslide_lat,landslide_lon
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,84.715817,156.934744,178.340359,54.000000,88.950000,33.300000,54.750000,0,0.0,0.0
1,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,140.456836,255.515681,291.575695,58.855263,137.842105,37.657895,165.789474,0,0.0,0.0
2,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,123.568145,129.182257,178.765606,40.000000,64.000000,24.000000,58.500000,0,0.0,0.0
3,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,118.158087,138.478370,182.037338,33.600000,56.800000,16.600000,45.300000,0,0.0,0.0
4,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,95.302288,105.452260,142.136221,28.500000,69.000000,19.500000,49.500000,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2684,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,152.255674,36.972662,156.680465,40.500000,102.000000,28.500000,94.500000,0,0.0,0.0
2685,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,92.532689,210.106101,229.579773,30.000000,100.500000,28.500000,84.000000,0,0.0,0.0
2686,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,59.469964,404.299149,408.649579,24.000000,100.500000,21.000000,82.500000,0,0.0,0.0
2687,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,269.018570,280.446504,388.614504,44.307692,140.653846,24.115385,113.653846,0,0.0,0.0


### Export to CSV

In [12]:
# Export dataframe as csv
outfile = path_to_out + 'DJFMAM_ivt_ar_types.csv'     
df.to_csv(outfile)