### Import

In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr

# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules


In [2]:
# Set up paths

path_to_data = '/home/nash/DATA/data/'                                      # project data -- read only
path_to_out  = '/home/nash/DATA/repositories/HASIA_ch2_analysis/out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '/home/nash/DATA/repositories/HASIA_ch2_analysis/figs/'      # figures

## Generate first df with tlat, tlon and hlat, hlon of each AR 

In [3]:
%%time
# open the df with AR track ID and date
filepath = path_to_out + 'DJFMAM_ARS_TRACKID_explode.csv'
df = pd.read_csv(filepath)
df = df.set_index(pd.to_datetime(df['time']))

# for each AR Track ID and time step, find the hlat, hlon and tlat, tlon of each AR
filename =  path_to_data + 'ar_catalog/globalARcatalog_ERA-Interim_1979-2019_v3.0.nc'
ds = xr.open_dataset(filename, engine='netcdf4')
ds = ds.squeeze()
idx = df.index.values
ds = ds.sel(time=idx)
ds = ds.load()

CPU times: user 3min 27s, sys: 34.9 s, total: 4min 2s
Wall time: 4min 15s


In [4]:
%%time

# append hlat,hlon, tlat, tlon to df for each trackID
trID = []
trLAT = []
trLON = []
tlLAT = []
tlLON = []

for i, (date_idx, track) in enumerate(zip(df.index.values, df.kidmap.values)):
    tmp = ds.sel(time=date_idx)

    trID.append(track)
    
    # head lat and lon of each AR
    idlat = tmp.hlat.where(tmp.kid == track, drop=True).values
    trLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = tmp.hlon.where(tmp.kid == track, drop=True).values
    trLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())
    
    # tail lat and lon of each AR
    idlat = tmp.tlat.where(tmp.kid == track, drop=True).values
    tlLAT.append(np.asarray(idlat[~np.isnan(idlat)]).mean())
    idlon = tmp.tlon.where(tmp.kid == track, drop=True).values
    tlLON.append(np.asarray(idlon[~np.isnan(idlon)]).mean())
    
# put collected data in df
df['hlat'] = trLAT
df['hlon'] = trLON
df['tlat'] = tlLAT
df['tlon'] = tlLON
df

CPU times: user 4min 35s, sys: 3.88 s, total: 4min 39s
Wall time: 4min 55s


Unnamed: 0_level_0,Unnamed: 0,date,time,kidmap,AR_CAT,hlat,hlon,tlat,tlon
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1979-12-01 12:00:00,0,1979-12-01,1979-12-01 12:00:00,2864.0,2,33.0,109.5,18.0,93.0
1979-12-01 12:00:00,1,1979-12-01,1979-12-01 12:00:00,2861.0,2,40.5,67.5,27.0,51.0
1979-12-01 18:00:00,2,1979-12-01,1979-12-01 18:00:00,2864.0,2,33.0,112.5,16.5,93.0
1979-12-01 18:00:00,3,1979-12-01,1979-12-01 18:00:00,2861.0,2,43.5,70.5,25.5,54.0
1979-12-02 00:00:00,4,1979-12-02,1979-12-02 00:00:00,2861.0,1,51.0,75.0,24.0,54.0
...,...,...,...,...,...,...,...,...,...
2019-05-21 12:00:00,7815,2019-05-21,2019-05-21 12:00:00,128712.0,1,24.0,75.0,24.0,0.0
2019-05-24 00:00:00,7816,2019-05-24,2019-05-24 00:00:00,128755.0,3,24.0,100.5,21.0,82.5
2019-05-25 00:00:00,7817,2019-05-25,2019-05-25 00:00:00,128763.0,3,37.5,118.5,27.0,81.0
2019-05-25 12:00:00,7818,2019-05-25,2019-05-25 12:00:00,128756.0,3,72.0,76.5,36.0,64.5


In [5]:
# Export dataframe as csv
outfile = path_to_out + 'DJFMAM_ar_trackID_htlatlon.csv'     
df.to_csv(outfile)