### Import

In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr


# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules


In [2]:
# Set up paths

path_to_data = '/home/nash/DATA/data/'                                      # project data -- read only
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

### Landslide DF

In [3]:
def expand_grid(lat,lon):
    '''list all combinations of lats and lons using expand_grid(lat,lon)'''
    test = [(A,B) for A in lat for B in lon]
    test = np.array(test)
    test_lat = test[:,0]
    test_lon = test[:,1]
    full_grid = pd.DataFrame({'lat': test_lat, 'lon': test_lon})
    full_grid = full_grid.sort_values(by=['lat','lon'])
    full_grid = full_grid.reset_index(drop=True)
    return full_grid

In [4]:
fname = path_to_data + 'CH2_generated_data/Global_Landslide_Catalog_Export.csv'
landslide = pd.read_csv(fname)

# Select lat/lon grid
lonmin = 65
lonmax = 100
latmin = 20
latmax = 42

## Select Landslides within Southern Asia region
idx = (landslide.latitude >= latmin) & (landslide.latitude <= latmax) & (landslide.longitude >= lonmin) & (landslide.longitude <= lonmax)
landslide = landslide.loc[idx]
# set event time as index
landslide = landslide.set_index(pd.to_datetime(landslide.event_date))
# landslide.index = landslide.index.normalize()

# select only landslide dates that are between december and may
idx = (landslide.index.month >= 12) | (landslide.index.month <= 5)
landslide = landslide[idx]

# rename and reindex
landslide = landslide.rename(columns={"latitude": "lat", "longitude": "lon", "event_date": "event_time"})
landslide = landslide.reset_index()

# round event time to the nearest 6 hours
landslide['time'] = landslide['event_date'].dt.round('6H')
landslide = landslide.set_index(pd.to_datetime(landslide.time))

# select only landslide dates that are between december and may
idx = (landslide.index.month >= 12) | (landslide.index.month <= 5)
landslide = landslide[idx]

landslide

Unnamed: 0_level_0,event_date,source_name,source_link,event_id,event_time,event_time,event_title,event_description,location_description,location_accuracy,...,admin_division_name,admin_division_population,gazeteer_closest_point,gazeteer_distance,submitted_date,created_date,last_edited_date,lon,lat,time
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-03-19 00:00:00,2007-03-19 00:00:00,Tribune India.com,,37,03/19/2007 12:00:00 AM,,Doba,"2 killed, 5 injured",Doba,25km,...,Kashmir,14711.0,Kupwāra,18.10887,04/01/2014 12:00:00 AM,11/20/2017 03:17:00 PM,02/15/2018 03:51:00 PM,74.333300,33.877800,2007-03-19 00:00:00
2017-05-28 12:00:00,2017-05-28 13:34:00,AGU Blogs (Petley),http://blogs.agu.org/landslideblog/2017/05/30/...,9774,05/28/2017 01:34:00 PM,,Mudflow in Northern Tajikistan,"Meltwater causes large mudflow, affects cows",Rasht Valley of Tajikistan (exact coordinates ...,exact,...,,,,,06/16/2017 01:34:00 PM,11/20/2017 03:17:00 PM,02/15/2018 03:51:00 PM,71.275479,39.197800,2017-05-28 12:00:00
2017-05-29 12:00:00,2017-05-29 13:34:00,Eleven Myanmar,http://www.elevenmyanmar.com/local/9759,9771,05/29/2017 01:34:00 PM,,"Hakhha Landslide, Zayhuang Ward","Heavy rains cause landslides, affect houses mi...",Hakha in Zayhuang Ward,5km,...,,,,,06/16/2017 01:34:00 PM,11/20/2017 03:17:00 PM,02/15/2018 03:51:00 PM,93.615899,22.656202,2017-05-29 12:00:00
2017-05-20 18:00:00,2017-05-20 20:14:00,World Hindu News,http://www.worldhindunews.com/2017/05/23/58870...,9802,05/20/2017 08:14:00 PM,,Landslide in Chanoli district,"Boulders roll down mountainside, blocking highway","Rishikesh-Badrinath highway, near Vishnuprayag...",5km,...,,,,,06/20/2017 08:14:00 PM,11/20/2017 03:17:00 PM,02/15/2018 03:51:00 PM,79.561632,30.565047,2017-05-20 18:00:00
2017-05-20 18:00:00,2017-05-20 20:14:00,Etemaa Daily,http://www.en.etemaaddaily.com/World/National/...,9809,05/20/2017 08:14:00 PM,,Badrinath highway landslide,Landslide on highway to Hindu temple Badrinath...,"Highway to Badrinath, Uttarakhand",25km,...,,,,,06/20/2017 08:14:00 PM,11/20/2017 03:17:00 PM,02/15/2018 03:51:00 PM,79.492316,30.733156,2017-05-20 18:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-04-23 18:00:00,2017-04-23 17:58:00,Chandigarh Metro,https://chandigarhmetro.com/manali-rohtang-hig...,10853,04/23/2017 05:58:00 PM,,Landslide near Rani Nullah,"Landslide blocks Malai-Rohtang HWY. Article: ""...","Manali Sarchu Road, Manali Sub-District, Kullu...",1km,...,,,,,07/17/2017 05:58:00 PM,12/05/2017 06:57:00 PM,02/15/2018 03:51:00 PM,77.233987,32.361492,2017-04-23 18:00:00
2017-04-17 06:00:00,2017-04-17 04:00:00,AKI Press,http://akipress.com/news:591338/,10897,04/17/2017 04:00:00 AM,,Landslide in Chyrmash,Landslide hits 3 houses,"Chyrmash (??), Ozgon, Osh, Kyrgyzstan",50km,...,,,,,07/28/2017 01:34:00 PM,12/07/2017 09:19:00 PM,02/15/2018 03:51:00 PM,73.604732,40.804379,2017-04-17 06:00:00
2017-03-25 18:00:00,2017-03-25 17:32:00,Greater Kashmir,http://www.greaterkashmir.com/news/jammu/lands...,10845,03/25/2017 05:32:00 PM,,Barnari Sigdi Landslide,Two teenage girls died after they were buried ...,"Barnari Sigdi area, Tehsil Mughalmaidan, Kisht...",5km,...,,,,,09/21/2017 05:32:00 PM,12/05/2017 06:45:00 PM,02/15/2018 03:51:00 PM,75.680611,33.403080,2017-03-25 18:00:00
2016-12-15 06:00:00,2016-12-15 05:00:00,NBC Daily,http://www.nbcdaily.com/separate-landslides-ki...,10973,12/15/2016 05:00:00 AM,,Landslide at Pub Sarania Hill,An octogenarian was killed when a sudden lands...,"Pub Sarania Hill, Guwahati, Assam, India",1km,...,,,,,07/26/2017 01:22:00 PM,12/08/2017 08:37:00 PM,02/15/2018 03:51:00 PM,91.772042,26.181606,2016-12-15 06:00:00


In [5]:
# now we want to see if there is an AR present at the same time and location as the landslides
# open the trackID for ARs
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ar = xr.open_dataset(filename, engine='netcdf4')
ar = ar.squeeze()

# Select months
idx = (ar.time.dt.month >= 12) | (ar.time.dt.month <= 5)
kid = ar.kidmap.sel(time=idx) # trackID for indexing

# slice the dates so both ds match
kid = kid.sel(time=slice('1979-12-01 00', '2019-05-31 00:00'))
kid

In [6]:
## for each landslide_id, if the lat/lon falls within an AR, keep that AR ID and landslide ID
landslideID = []
arID = []
landslide_lat = []
landslide_lon = []
for i, row in landslide.T.iteritems():
    t = kid.sel(lat=row['lat'], lon=row['lon'], time=row['time'], method='nearest').values
    # print(t)
    if t > 0:
        landslideID.append(row['event_id'])
        arID.append(t)
        landslide_lat.append(row['lat'])
        landslide_lon.append(row['lon'])
        
d = {'landslideID': landslideID, 'trackID': arID, 
     'landslide_lat': landslide_lat, 'landslide_lon': landslide_lon}
landslide_df = pd.DataFrame(data=d)
# convert the dtype for the trackID column
landslide_df = landslide_df.astype({'trackID': 'float64'})

landslide_df

Unnamed: 0,landslideID,trackID,landslide_lat,landslide_lon
0,37,88266.0,33.8778,74.3333
1,3116,101168.0,33.33876,75.192106
2,6020,111907.0,24.5784,91.7227
3,3099,101129.0,34.2256,73.673
4,9405,118455.0,26.7223,95.0243
5,1611,98243.0,25.1553,93.028
6,6019,111907.0,24.4136,91.7561
7,419,90945.0,33.237381,75.245282
8,1526,97918.0,33.0097,74.9402
9,9681,98633.0,27.218988,89.518792


### AR Type Data

In [7]:
# filepath = path_to_out + 'DJFMAM_ARS_TRACKID_daily.csv'
filepath = path_to_out + 'HMAAR_duration_DJFMAM.csv' # just dates in HMA
# filepath = path_to_out + 'HMA_AR_duration.csv' # dates based on AR Catalog
df = pd.read_csv(filepath)
df = df.drop(columns=['Unnamed: 0'])
df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0
1,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0
2,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0
3,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0
4,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0
...,...,...,...,...,...
2684,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0
2685,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0
2686,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0
2687,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0


### AR Data

In [8]:
## duration of ARs based on trackID
# for each AR Track ID, find the duration of that track
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ds = xr.open_dataset(filename, engine='netcdf4')
ds = ds.squeeze()



In [9]:
# Select months
idx = (ds.time.dt.month >= 12) | (ds.time.dt.month <= 5)
kivty = ds.kivty.sel(time=idx) # ivty for each trackID
kivtx = ds.kivtx.sel(time=idx) # ivtx for each trackID
kid = ds.kid.sel(time=idx) # trackID for indexing
hlat = ds.hlat.sel(time=idx) # head latitude of each trackID
hlon = ds.hlon.sel(time=idx) # head longitude of each trackID
tlat = ds.tlat.sel(time=idx) # tail latitude of each trackID
tlon = ds.tlon.sel(time=idx) # tail longitude of each trackID
# width = ds.width.sel(time=idx) # width of AR
ivtdir = ds.ivtdir.sel(time=idx) # direction of AR

In [10]:
%%time
# append ivty and ivtx to df for each trackID
maxivt = []
trID = []
trLAT = []
trLON = []
tlLAT = []
tlLON = []
# ar_width = []
ardirection = []

for i, track in enumerate(df.trackID.values):
    # get average IVT within AR
    idx = kivtx.where(kid == track, drop=True).values
    x = idx[~np.isnan(idx)]
    idy = kivty.where(kid == track, drop=True).values
    y = idy[~np.isnan(idy)]
    # calculate ivt for each time step within AR event
    ivt = np.sqrt(y**2 + x**2)
    # append maximum ivt for that event
    maxivt.append(ivt.max())
    trID.append(track)
    
    # head lat and lon of each AR
    idlat = hlat.where(kid == track, drop=True).values
    trLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = hlon.where(kid == track, drop=True).values
    trLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())
    
    # tail lat and lon of each AR
    idlat = tlat.where(kid == track, drop=True).values
    tlLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = tlon.where(kid == track, drop=True).values
    tlLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())
    
    # # avg width of each AR
    # idwidth = width.where(kid == track, drop=True).values
    # ar_width.append(np.asarray(idwidth[~np.isnan(idwidth)]).mean())
    
    # mode AR direction
    iddir = ivtdir.where(kid == track, drop=True).values
    ardirection.append(np.asarray(iddir[~np.isnan(iddir)]).mean())

CPU times: user 12min 9s, sys: 736 ms, total: 12min 10s
Wall time: 12min 10s


In [11]:
## check to see the AR direction for single AR
# test_track = df.trackID.values[1]
# tmp_iddir = ivtdir.where(kid == test_track, drop=True).values
# np.asarray(tmp_iddir[~np.isnan(tmp_iddir)])

In [12]:
# put collected data in df
df['ivt'] = maxivt
df['hlat'] = trLAT
df['hlon'] = trLON
df['tlat'] = tlLAT
df['tlon'] = tlLON
# df['width'] = ar_width
df['ivtdir'] = ardirection

df

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivt,hlat,hlon,tlat,tlon,ivtdir
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763
1,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943
2,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,178.765606,40.000000,64.000000,24.000000,58.500000,45.690158
3,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,182.037338,33.600000,56.800000,16.600000,45.300000,46.965576
4,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,142.136221,28.500000,69.000000,19.500000,49.500000,47.894359
...,...,...,...,...,...,...,...,...,...,...,...
2684,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,156.680465,40.500000,102.000000,28.500000,94.500000,13.649099
2685,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,229.579773,30.000000,100.500000,28.500000,84.000000,66.230859
2686,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,408.649579,24.000000,100.500000,21.000000,82.500000,81.632143
2687,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,388.614504,44.307692,140.653846,24.115385,113.653846,44.724359


In [13]:
# merge AR df and landslide DF
merged_data = pd.merge(df, landslide_df, how='outer', on='trackID')
merged_data

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivt,hlat,hlon,tlat,tlon,ivtdir,landslideID,landslide_lat,landslide_lon
0,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,
1,2861.0,2,1979-12-01 12:00:00,1979-12-02 00:00:00,12.0,178.340359,54.000000,88.950000,33.300000,54.750000,60.608763,,,
2,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,
3,2871.0,2,1979-12-08 06:00:00,1979-12-09 00:00:00,18.0,291.575695,58.855263,137.842105,37.657895,165.789474,61.661943,,,
4,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,178.765606,40.000000,64.000000,24.000000,58.500000,45.690158,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2722,128613.0,3,2019-05-06 06:00:00,2019-05-06 12:00:00,6.0,156.680465,40.500000,102.000000,28.500000,94.500000,13.649099,,,
2723,128655.0,3,2019-05-12 06:00:00,2019-05-12 12:00:00,6.0,229.579773,30.000000,100.500000,28.500000,84.000000,66.230859,,,
2724,128755.0,3,2019-05-24 00:00:00,2019-05-24 06:00:00,6.0,408.649579,24.000000,100.500000,21.000000,82.500000,81.632143,,,
2725,128763.0,3,2019-05-25 00:00:00,2019-05-25 06:00:00,6.0,388.614504,44.307692,140.653846,24.115385,113.653846,44.724359,,,


In [14]:
## test to make sure merged correctly
idx = merged_data.landslideID > 0
test = merged_data[idx]
test

Unnamed: 0,trackID,ar_cat,start_date,end_date,duration,ivt,hlat,hlon,tlat,tlon,ivtdir,landslideID,landslide_lat,landslide_lon
750,88247.0,1,2007-03-11 00:00:00,2007-03-12 00:00:00,24.0,226.386332,33.000000,72.937500,17.812500,57.937500,53.457743,35.0,34.093500,74.911200
751,88247.0,3,2007-03-12 00:00:00,2007-03-13 00:00:00,24.0,226.386332,33.000000,72.937500,17.812500,57.937500,53.457743,35.0,34.093500,74.911200
752,88266.0,1,2007-03-18 00:00:00,2007-03-20 12:00:00,60.0,244.164847,36.611111,68.055556,17.555556,41.055556,58.147779,37.0,33.877800,74.333300
753,88266.0,1,2007-03-18 00:00:00,2007-03-20 12:00:00,60.0,244.164847,36.611111,68.055556,17.555556,41.055556,58.147779,38.0,33.570800,73.494000
754,88266.0,2,2007-03-15 12:00:00,2007-03-18 00:00:00,60.0,244.164847,36.611111,68.055556,17.555556,41.055556,58.147779,37.0,33.877800,74.333300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2591,114676.0,3,2015-03-01 00:00:00,2015-03-03 18:00:00,66.0,221.376905,29.035714,85.285714,13.285714,72.535714,47.551777,6842.0,33.329600,75.203000
2594,114816.0,3,2015-03-15 00:00:00,2015-03-16 18:00:00,42.0,186.684338,26.785714,94.285714,18.428571,72.000000,49.675832,6908.0,33.097700,75.578700
2634,118455.0,3,2016-04-18 18:00:00,2016-04-20 12:00:00,42.0,503.741125,39.281250,138.281250,24.750000,103.500000,65.439492,9405.0,26.722300,95.024300
2656,121217.0,3,2017-01-25 18:00:00,2017-01-26 00:00:00,6.0,156.453201,34.500000,73.500000,21.000000,63.000000,42.058024,10940.0,34.084526,74.030277


### Export to CSV

In [16]:
# Export dataframe as csv
outfile = path_to_out + 'DJFMAM_ivt_ar_types.csv'     
merged_data.to_csv(outfile)