In [1]:
# Step one is to identify RI Cases

import tropycal.tracks as tracks
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import xarray as xr

In [2]:
desired_basin = 'north_atlantic'

basin = tracks.TrackDataset(basin=desired_basin,source='hurdat',include_btk=False)

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (2.52 seconds)


In [3]:
# Years of Evaluation
start_year = 2000
end_year = 2022
ri_threshold = 30 # kt
time_difference = 24 # hours (setting)

In [4]:
# create a dataset of all storm names in year range

year_range = range(start_year,end_year+1)

all_names_list = []
for year_loop in year_range:
    curr_year_list = []
    sname = basin.get_season(year_loop).to_dataframe()['name'].drop_duplicates(keep=False) # The repeats just drops TDs
    ids = basin.get_season(year_loop).to_dataframe()['id']
    for loop in sname.index:
        if sname[loop] == 'UNNAMED': # skips unnamed storms
            pass
        else:
            if ids[loop][0:2] != 'CP': # don't include central pacific storms
                storm_loop = (sname[loop],year_loop)
                all_names_list.append(storm_loop)
    # all_names_list.append(curr_year_list)
        

In [5]:
# Get individual storm

all_RI_names = []
all_RI_years = []
all_RI_starts = []
all_RI_ends = []
RI_number_all = []
storm_ids_all = []

def my_fun(x): # This function is used to subtract in rolling functions
#     print(x)
    return x.iloc[-1] - x.iloc[0]

for all_storms_loop in range(len(all_names_list)): # This loops iterates through all storms
# for all_storms_loop in [37]:
    storm_inds = np.where((basin.get_storm(all_names_list[all_storms_loop]).type=='TS') | 
         (basin.get_storm(all_names_list[all_storms_loop]).type=='TD') |
        (basin.get_storm(all_names_list[all_storms_loop]).type=='SD')|
         (basin.get_storm(all_names_list[all_storms_loop]).type=='SS' )|
          (basin.get_storm(all_names_list[all_storms_loop]).type=='HU')|
           (basin.get_storm(all_names_list[all_storms_loop]).type=='TY')|
            (basin.get_storm(all_names_list[all_storms_loop]).type=='ST')) # This ensures we only take TC files, not invests
    
    storm_time = basin.get_storm(all_names_list[all_storms_loop])['time'][storm_inds] # calls times of storms
    storm_intensity = basin.get_storm(all_names_list[all_storms_loop])['vmax'][storm_inds] # calls intensity of storms
    storm_id = basin.get_storm(all_names_list[all_storms_loop])['id']
    
    storm_pd = pd.DataFrame(storm_intensity,index=storm_time) # Generate a dataframe for intensity

    # Find time differences
    rolling_diff = storm_pd.rolling(window=timedelta(hours=time_difference+6),min_periods=int((time_difference+6)/6),center=False).apply(my_fun).shift(-(int(time_difference/6)))

    # This creates a boolean of whether RI threshold is met
    threshold_crossings = np.diff(rolling_diff >= ri_threshold, prepend=False)
    changes = pd.DataFrame(threshold_crossings).rolling(window=2).apply(my_fun) # Identifies when thresholds met and not met
    if threshold_crossings[0] == True: # if TC starts with RI then we need to include it!
        changes.iloc[[0]] = 1
    up_cross = np.where(changes == 1)[0] # Where false -> true
    down_cross = np.where(changes == -1)[0] - 1 # Where true -> false. Minus one gets you last qualifying time
    
    if threshold_crossings.sum() == 0: # No need to record any RIs if there are no RIs detected
        pass
    else:
        RI_start = storm_time[up_cross] # Identify RI start time
        if len(down_cross) == 0: # If there are no recorded false -> true, then use 24 h after RI start
            RI_end = RI_start
        else:
            RI_end = storm_time[down_cross] + timedelta(hours=24) # RI end picks up the last time from this RI
            if len(up_cross) - len(down_cross) == 1: # sometimes the down cross misses the last cross, this adds it in
                RI_end = np.append(RI_end,RI_start[-1]+timedelta(hours=24))        

        for RI_same_storm in range(0,len(RI_start)): # This loops unravels the append by RI, so not by storm.
            if (RI_start[RI_same_storm].hour == 0 # Take synoptic times only
               ) or  (RI_start[RI_same_storm].hour == 6
                    ) or (RI_start[RI_same_storm].hour == 12) or(
                RI_start[RI_same_storm].hour == 18):
                
                all_RI_names.append(all_names_list[all_storms_loop][0])
                all_RI_years.append(all_names_list[all_storms_loop][1])
                all_RI_starts.append(RI_start[RI_same_storm])
                all_RI_ends.append(RI_end[RI_same_storm])
                RI_number_all.append(RI_same_storm+1)
                storm_ids_all.append(storm_id)
            else: 
                pass
        
        print('Current Storm: ' + str(all_names_list[all_storms_loop]))
        # interval that meets the set threshold/time interval. Plus 24 hours includes the 24 hours after the last threshold met

Current Storm: ('ALBERTO', 2000)
Current Storm: ('DEBBY', 2000)
Current Storm: ('FLORENCE', 2000)
Current Storm: ('GORDON', 2000)
Current Storm: ('HELENE', 2000)
Current Storm: ('ISAAC', 2000)
Current Storm: ('JOYCE', 2000)
Current Storm: ('KEITH', 2000)
Current Storm: ('MICHAEL', 2000)
Current Storm: ('DEAN', 2001)
Current Storm: ('ERIN', 2001)
Current Storm: ('FELIX', 2001)
Current Storm: ('HUMBERTO', 2001)
Current Storm: ('IRIS', 2001)
Current Storm: ('MICHELLE', 2001)
Current Storm: ('GUSTAV', 2002)
Current Storm: ('ISIDORE', 2002)
Current Storm: ('LILI', 2002)
Current Storm: ('FABIAN', 2003)
Current Storm: ('ISABEL', 2003)
Current Storm: ('ALEX', 2004)
Current Storm: ('CHARLEY', 2004)
Current Storm: ('DANIELLE', 2004)
Current Storm: ('FRANCES', 2004)
Current Storm: ('IVAN', 2004)
Current Storm: ('JEANNE', 2004)
Current Storm: ('KARL', 2004)
Current Storm: ('LISA', 2004)
Current Storm: ('CINDY', 2005)
Current Storm: ('DENNIS', 2005)
Current Storm: ('EMILY', 2005)
Current Storm: ('K

In [6]:
# Create pandas dataframes of RI starts
RI_start_pd = pd.DataFrame(all_RI_starts,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI Start'])
RI_start_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Create pandas dataframes of RI ends
RI_end_pd = pd.DataFrame(all_RI_ends,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI End'])
RI_end_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Combines the two pandas dataframes
combined_RI_pd = pd.concat([RI_start_pd,RI_end_pd],axis=1)

# Create Xarray
RI_XR = combined_RI_pd.to_xarray()


In [7]:
# Save as csv and netCDF. Personally, I think the CSV/Pandas Dataframe looks cleaner.

combined_RI_pd.to_csv('/Users/acheung/data/RI_Cases/RI_Events_'+desired_basin+'.csv')
RI_XR.to_netcdf('/Users/acheung/data/RI_Cases/RI_Events_'+desired_basin+'.nc')

In [8]:
# An example of how to call in multiindex pandas
combined_RI_pd.loc[(combined_RI_pd.index.get_level_values('RI_Number') == 2)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2000,KEITH,AL152000,2,2000-10-04 00:00:00,2000-10-05 18:00:00
2001,HUMBERTO,AL102001,2,2001-09-22 18:00:00,2001-09-24 00:00:00
2002,LILI,AL132002,2,2002-10-01 18:00:00,2002-10-03 06:00:00
2004,CHARLEY,AL032004,2,2004-08-13 00:00:00,2004-08-14 06:00:00
2004,JEANNE,AL112004,2,2004-09-19 18:00:00,2004-09-20 18:00:00
2005,DENNIS,AL042005,2,2005-07-09 06:00:00,2005-07-10 12:00:00
2005,EMILY,AL052005,2,2005-07-15 18:00:00,2005-07-17 00:00:00
2005,KATRINA,AL122005,2,2005-08-26 06:00:00,2005-08-27 06:00:00
2007,DEAN,AL042007,2,2007-08-16 18:00:00,2007-08-18 12:00:00
2008,GUSTAV,AL072008,2,2008-08-29 12:00:00,2008-08-31 06:00:00


In [9]:
pd.set_option('display.max_rows', 500)

combined_RI_pd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2000,ALBERTO,AL032000,1,2000-08-11 12:00:00,2000-08-12 12:00:00
2000,DEBBY,AL072000,1,2000-08-20 06:00:00,2000-08-21 18:00:00
2000,FLORENCE,AL102000,1,2000-09-10 18:00:00,2000-09-12 06:00:00
2000,GORDON,AL112000,1,2000-09-15 12:00:00,2000-09-16 18:00:00
2000,HELENE,AL122000,1,2000-09-20 18:00:00,2000-09-22 00:00:00
2000,ISAAC,AL132000,1,2000-09-22 18:00:00,2000-09-24 12:00:00
2000,JOYCE,AL142000,1,2000-09-26 12:00:00,2000-09-27 18:00:00
2000,KEITH,AL152000,1,2000-09-29 12:00:00,2000-10-02 00:00:00
2000,KEITH,AL152000,2,2000-10-04 00:00:00,2000-10-05 18:00:00
2000,MICHAEL,AL172000,1,2000-10-16 18:00:00,2000-10-18 00:00:00


In [10]:
combined_RI_pd.shape

(146, 2)