In [1]:
# Step one is to identify RI Cases

import tropycal.tracks as tracks
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import xarray as xr

In [2]:
desired_basin = 'east_pacific'

basin = tracks.TrackDataset(basin=desired_basin,source='hurdat',include_btk=False)

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (12.51 seconds)


In [3]:
# Years of Evaluation
start_year = 2000
end_year = 2022
ri_threshold = 30 # kt
time_difference = 24 # hours (setting)

In [4]:
# create a dataset of all storm names in year range

year_range = range(start_year,end_year+1)

all_names_list = []
for year_loop in year_range:
    curr_year_list = []
    sname = basin.get_season(year_loop).to_dataframe()['name'].drop_duplicates(keep=False) # The repeats just drops TDs
    ids = basin.get_season(year_loop).to_dataframe()['id']
    for loop in sname.index:
        if sname[loop] == 'UNNAMED': # skips unnamed storms
            pass
        else:
            if ids[loop][0:2] != 'CP': # don't include central pacific storms
                storm_loop = (sname[loop],year_loop)
                all_names_list.append(storm_loop)
    # all_names_list.append(curr_year_list)
        

In [6]:
# Get individual storm

all_RI_names = []
all_RI_years = []
all_RI_starts = []
all_RI_ends = []
RI_number_all = []
storm_ids_all = []

def my_fun(x): # This function is used to subtract in rolling functions
#     print(x)
    return x.iloc[-1] - x.iloc[0]

def peak_time_in_window(x):
    max_loc = np.where(x == max(x))[0]
    max_loc = max_loc[0]
#     print(x,x.index[max_loc])
    return x.index[0],x.index[max_loc] # returns window start and time of maximum value in wondow

for all_storms_loop in range(len(all_names_list)): # This loops iterates through all storms
# for all_storms_loop in [350]:
    storm_inds = np.where((basin.get_storm(all_names_list[all_storms_loop]).type=='TS') | 
         (basin.get_storm(all_names_list[all_storms_loop]).type=='TD') |
        (basin.get_storm(all_names_list[all_storms_loop]).type=='SD')|
         (basin.get_storm(all_names_list[all_storms_loop]).type=='SS' )|
          (basin.get_storm(all_names_list[all_storms_loop]).type=='HU')|
           (basin.get_storm(all_names_list[all_storms_loop]).type=='TY')|
            (basin.get_storm(all_names_list[all_storms_loop]).type=='ST')) # This ensures we only take TC files, not invests
    
    pd_time = basin.get_storm(all_names_list[all_storms_loop])['time']
    storm_hours = np.asarray([datetime.time(d).hour for d in pd_time])
    synoyptic_inds = np.where((storm_hours == 6) | (storm_hours == 12) | (storm_hours == 18) | (storm_hours == 0))[0]
    time_inds = np.where(np.asarray(np.unique(np.concatenate((synoyptic_inds,storm_inds[0])),return_counts=True)[1])==2)[0]
    
    storm_time = basin.get_storm(all_names_list[all_storms_loop])['time'][time_inds] # calls times of storms
    storm_intensity = basin.get_storm(all_names_list[all_storms_loop])['vmax'][time_inds] # calls intensity of storms
    storm_id = basin.get_storm(all_names_list[all_storms_loop])['id']
    
    storm_pd = pd.DataFrame(storm_intensity,index=storm_time) # Generate a dataframe for intensity

    # Find time differences
    rolling_diff = storm_pd.rolling(window=timedelta(hours=time_difference+6),min_periods=int((time_difference+6)/6),center=False).apply(my_fun).shift(-(int(time_difference/6)))
    max_in_window_list = []
    recorder = 0
    for tloop in range(len(storm_pd)):
        current_ints = storm_pd.loc[storm_pd[0].index[tloop]:storm_pd[0].index[tloop]+timedelta(hours=24)]
        if len(current_ints) == 5:
            peak_in_window_time = peak_time_in_window(current_ints[0])
            max_in_window_list.append(peak_in_window_time)
            recorder = 1 # if this is not triggered, we skip this! For storms < 24 h, this can be not triggered!
    if recorder == 1:
        max_in_window_pd = pd.DataFrame(max_in_window_list)
        max_in_window_pd.index = max_in_window_pd[0]
        max_in_window_dropped = max_in_window_pd[1]
#             print(peak_in_window_time)
            
    #     max_item = storm_pd.rolling(window=timedelta(hours=time_difference+6),min_periods=int((time_difference+6)/6),center=False).apply(peak_time_in_window).shift(-(int(time_difference/6)))

    # This creates a boolean of whether RI threshold is met
    threshold_crossings = np.diff(rolling_diff >= ri_threshold, prepend=False)
    changes = pd.DataFrame(threshold_crossings).rolling(window=2).apply(my_fun) # Identifies when thresholds met and not met
    if threshold_crossings[0] == True: # if TC starts with RI then we need to include it!
        changes.iloc[[0]] = 1
    up_cross = np.where(changes == 1)[0] # Where false -> true
    down_cross = np.where(changes == -1)[0] - 1 # Where true -> false. Minus one gets you last qualifying time
    
    if threshold_crossings.sum() == 0: # No need to record any RIs if there are no RIs detected
        pass
    else:
        RI_start = storm_time[up_cross] # Identify RI start time
        if len(down_cross) == 0: # If there are no recorded false -> true, then uses the time when intensity flatlines or end of 24 h period
            RI_end = max_in_window_dropped.loc[RI_start]
        else: # we take the time when intensity flatlines for the last 24 h period
#             RI_end = storm_time[down_cross] + timedelta(hours=24) # RI end picks up the last time from this RI
            RI_end = np.asarray(max_in_window_dropped.loc[storm_time[down_cross]])
            if len(up_cross) - len(down_cross) == 1: # sometimes the down cross misses the last cross, this adds it in
#                 RI_end = np.append(RI_end,RI_start[-1]+timedelta(hours=24))      
                RI_end = np.append(RI_end,max_in_window_dropped.loc[RI_start[-1]])        

        for RI_same_storm in range(0,len(RI_start)): # This loops unravels the append by RI, so not by storm.
            if (RI_start[RI_same_storm].hour == 0 # Take synoptic times only
               ) or  (RI_start[RI_same_storm].hour == 6
                    ) or (RI_start[RI_same_storm].hour == 12) or(
                RI_start[RI_same_storm].hour == 18):
                
                all_RI_names.append(all_names_list[all_storms_loop][0])
                all_RI_years.append(all_names_list[all_storms_loop][1])
                all_RI_starts.append(RI_start[RI_same_storm])
                all_RI_ends.append(RI_end[RI_same_storm])
                RI_number_all.append(RI_same_storm+1)
                storm_ids_all.append(storm_id)
            else: 
                pass
        
        print('Current Storm: ' + str(all_names_list[all_storms_loop]))
        # interval that meets the set threshold/time interval. Plus 24 hours includes the 24 hours after the last threshold met

Current Storm: ('ALETTA', 2000)
Current Storm: ('CARLOTTA', 2000)
Current Storm: ('DANIEL', 2000)
Current Storm: ('ADOLPH', 2001)
Current Storm: ('FLOSSIE', 2001)
Current Storm: ('JULIETTE', 2001)
Current Storm: ('KIKO', 2001)
Current Storm: ('NARDA', 2001)
Current Storm: ('DOUGLAS', 2002)
Current Storm: ('ELIDA', 2002)
Current Storm: ('FAUSTO', 2002)
Current Storm: ('GENEVIEVE', 2002)
Current Storm: ('HERNAN', 2002)
Current Storm: ('KENNA', 2002)
Current Storm: ('CARLOS', 2003)
Current Storm: ('IGNACIO', 2003)
Current Storm: ('JIMENA', 2003)
Current Storm: ('LINDA', 2003)
Current Storm: ('NORA', 2003)
Current Storm: ('PATRICIA', 2003)
Current Storm: ('DARBY', 2004)
Current Storm: ('FRANK', 2004)
Current Storm: ('HOWARD', 2004)
Current Storm: ('ISIS', 2004)
Current Storm: ('JAVIER', 2004)
Current Storm: ('FERNANDA', 2005)
Current Storm: ('HILARY', 2005)
Current Storm: ('KENNETH', 2005)
Current Storm: ('MAX', 2005)
Current Storm: ('OTIS', 2005)
Current Storm: ('BUD', 2006)
Current Storm

In [7]:
# Create pandas dataframes of RI starts
RI_start_pd = pd.DataFrame(all_RI_starts,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI Start'])
RI_start_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Create pandas dataframes of RI ends
RI_end_pd = pd.DataFrame(all_RI_ends,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI End'])
RI_end_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Combines the two pandas dataframes
combined_RI_pd = pd.concat([RI_start_pd,RI_end_pd],axis=1)

# Create Xarray
RI_XR = combined_RI_pd.to_xarray()


In [8]:
combined_RI_pd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2000,ALETTA,EP012000,1,2000-05-23 12:00:00,2000-05-24 18:00:00
2000,CARLOTTA,EP032000,1,2000-06-19 18:00:00,2000-06-21 06:00:00
2000,DANIEL,EP062000,1,2000-07-23 12:00:00,2000-07-25 18:00:00
2001,ADOLPH,EP012001,1,2001-05-26 12:00:00,2001-05-29 00:00:00
2001,FLOSSIE,EP072001,1,2001-08-26 18:00:00,2001-08-27 18:00:00
...,...,...,...,...,...
2022,ESTELLE,EP062022,1,2022-07-15 18:00:00,2022-07-17 00:00:00
2022,HOWARD,EP092022,1,2022-08-07 18:00:00,2022-08-09 00:00:00
2022,KAY,EP122022,1,2022-09-04 18:00:00,2022-09-06 00:00:00
2022,ORLENE,EP162022,1,2022-10-01 00:00:00,2022-10-02 12:00:00


In [9]:
# Save as csv and netCDF. Personally, I think the CSV/Pandas Dataframe looks cleaner.

combined_RI_pd.to_csv('/Users/acheung/data/RI_Cases/RI_Events_'+desired_basin+'.csv')
RI_XR.to_netcdf('/Users/acheung/data/RI_Cases/RI_Events_'+desired_basin+'.nc')

In [10]:
# An example of how to call in multiindex pandas
combined_RI_pd.loc[(combined_RI_pd.index.get_level_values('RI_Number') == 2)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,JULIETTE,EP112001,2,2001-09-24 18:00:00,2001-09-25 06:00:00
2004,DARBY,EP052004,2,2004-07-27 18:00:00,2004-07-29 06:00:00
2005,KENNETH,EP112005,2,2005-09-16 06:00:00,2005-09-17 12:00:00
2006,BUD,EP032006,2,2006-07-12 06:00:00,2006-07-13 06:00:00
2008,NORBERT,EP152008,2,2008-10-10 06:00:00,2008-10-11 06:00:00
2009,CARLOS,EP042009,2,2009-07-13 12:00:00,2009-07-14 12:00:00
2009,GUILLERMO,EP102009,2,2009-08-14 00:00:00,2009-08-15 12:00:00
2010,DARBY,EP052010,2,2010-06-24 12:00:00,2010-06-25 18:00:00
2013,RAYMOND,EP172013,2,2013-10-27 00:00:00,2013-10-28 00:00:00
2015,ANDRES,EP012015,2,2015-05-29 18:00:00,2015-05-30 18:00:00


In [11]:
pd.set_option('display.max_rows', 500)

combined_RI_pd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2000,ALETTA,EP012000,1,2000-05-23 12:00:00,2000-05-24 18:00:00
2000,CARLOTTA,EP032000,1,2000-06-19 18:00:00,2000-06-21 06:00:00
2000,DANIEL,EP062000,1,2000-07-23 12:00:00,2000-07-25 18:00:00
2001,ADOLPH,EP012001,1,2001-05-26 12:00:00,2001-05-29 00:00:00
2001,FLOSSIE,EP072001,1,2001-08-26 18:00:00,2001-08-27 18:00:00
2001,JULIETTE,EP112001,1,2001-09-22 18:00:00,2001-09-24 06:00:00
2001,JULIETTE,EP112001,2,2001-09-24 18:00:00,2001-09-25 06:00:00
2001,KIKO,EP122001,1,2001-09-22 12:00:00,2001-09-23 12:00:00
2001,NARDA,EP162001,1,2001-10-20 18:00:00,2001-10-22 00:00:00
2002,DOUGLAS,EP052002,1,2002-07-21 12:00:00,2002-07-22 18:00:00


In [12]:
combined_RI_pd.shape

(164, 2)