In [1]:
# Step one is to identify RI Cases

import tropycal.tracks as tracks
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import xarray as xr

In [2]:
desired_basin = 'east_pacific'

basin = tracks.TrackDataset(basin=desired_basin,source='hurdat',include_btk=False)

--> Starting to read in HURDAT2 data
--> Completed reading in HURDAT2 data (1.11 seconds)


In [3]:
# Years of Evaluation
start_year = 2000
end_year = 2022
ri_threshold = 30 # kt
time_difference = 24 # hours (setting)

In [4]:
# create a dataset of all storm names in year range

year_range = range(start_year,end_year+1)

all_names_list = []
for year_loop in year_range:
    curr_year_list = []
    sname = basin.get_season(year_loop).to_dataframe()['name'].drop_duplicates(keep=False) # The repeats just drops TDs
    ids = basin.get_season(year_loop).to_dataframe()['id']
    for loop in sname.index:
        if sname[loop] == 'UNNAMED': # skips unnamed storms
            pass
        else:
            if ids[loop][0:2] != 'CP': # don't include central pacific storms
                storm_loop = (sname[loop],year_loop)
                all_names_list.append(storm_loop)
    # all_names_list.append(curr_year_list)
        

In [8]:
# Get individual storm

all_RI_names = []
all_RI_years = []
all_RI_starts = []
all_RI_ends = []
RI_number_all = []
storm_ids_all = []

def my_fun(x): # This function is used to subtract in rolling functions
#     print(x)
    return x.iloc[-1] - x.iloc[0]

for all_storms_loop in range(len(all_names_list)): # This loops iterates through all storms
# for all_storms_loop in [66]:
    storm_inds = np.where((basin.get_storm(all_names_list[all_storms_loop]).type=='TS') | 
         (basin.get_storm(all_names_list[all_storms_loop]).type=='TD') |
        (basin.get_storm(all_names_list[all_storms_loop]).type=='SD')|
         (basin.get_storm(all_names_list[all_storms_loop]).type=='SS' )|
          (basin.get_storm(all_names_list[all_storms_loop]).type=='HU')|
           (basin.get_storm(all_names_list[all_storms_loop]).type=='TY')|
            (basin.get_storm(all_names_list[all_storms_loop]).type=='ST')) # This ensures we only take TC files, not invests
    
    storm_time = basin.get_storm(all_names_list[all_storms_loop])['time'][storm_inds] # calls times of storms
    storm_intensity = basin.get_storm(all_names_list[all_storms_loop])['vmax'][storm_inds] # calls intensity of storms
    storm_id = basin.get_storm(all_names_list[all_storms_loop])['id']
    
    storm_pd = pd.DataFrame(storm_intensity,index=storm_time) # Generate a dataframe for intensity

    # Find time differences
    rolling_diff = storm_pd.rolling(window=timedelta(hours=time_difference+6),min_periods=int((time_difference+6)/6),center=False).apply(my_fun).shift(-(int(time_difference/6)))

    # This creates a boolean of whether RI threshold is met
    threshold_crossings = np.diff(rolling_diff >= ri_threshold, prepend=False)
    changes = pd.DataFrame(threshold_crossings).rolling(window=2).apply(my_fun) # Identifies when thresholds met and not met
    up_cross = np.where(changes == 1)[0] # Where false -> true
    down_cross = np.where(changes == -1)[0] - 1 # Where true -> false. Minus one gets you last qualifying time
    
    if threshold_crossings.sum() == 0: # No need to record any RIs if there are no RIs detected
        pass
    else:
        RI_start = storm_time[up_cross] # Identify RI start time
        if len(down_cross) == 0: # If there are no recorded false -> true, then use 24 h after RI start
            RI_end = RI_start
        else:
            RI_end = storm_time[down_cross] + timedelta(hours=24) # RI end picks up the last time from this RI
            if len(up_cross) - len(down_cross) == 1: # sometimes the down cross misses the last cross, this adds it in
                RI_end = np.append(RI_end,RI_start[-1]+timedelta(hours=24))        

        for RI_same_storm in range(0,len(RI_start)): # This loops unravels the append by RI, so not by storm.
            if (RI_start[RI_same_storm].hour == 0 # Take synoptic times only
               ) or  (RI_start[RI_same_storm].hour == 6
                    ) or (RI_start[RI_same_storm].hour == 12) or(
                RI_start[RI_same_storm].hour == 18):
                
                all_RI_names.append(all_names_list[all_storms_loop][0])
                all_RI_years.append(all_names_list[all_storms_loop][1])
                all_RI_starts.append(RI_start[RI_same_storm])
                all_RI_ends.append(RI_end[RI_same_storm])
                RI_number_all.append(RI_same_storm+1)
                storm_ids_all.append(storm_id)
            else: 
                pass
        
        print('Current Storm: ' + str(all_names_list[all_storms_loop]))
        # interval that meets the set threshold/time interval. Plus 24 hours includes the 24 hours after the last threshold met

Current Storm: ('ALETTA', 2000)
Current Storm: ('CARLOTTA', 2000)
Current Storm: ('DANIEL', 2000)
Current Storm: ('ADOLPH', 2001)
Current Storm: ('FLOSSIE', 2001)
Current Storm: ('JULIETTE', 2001)
Current Storm: ('KIKO', 2001)
Current Storm: ('NARDA', 2001)
Current Storm: ('DOUGLAS', 2002)
Current Storm: ('ELIDA', 2002)
Current Storm: ('FAUSTO', 2002)
Current Storm: ('GENEVIEVE', 2002)
Current Storm: ('HERNAN', 2002)
Current Storm: ('KENNA', 2002)
Current Storm: ('CARLOS', 2003)
Current Storm: ('IGNACIO', 2003)
Current Storm: ('JIMENA', 2003)
Current Storm: ('LINDA', 2003)
Current Storm: ('NORA', 2003)
Current Storm: ('PATRICIA', 2003)
Current Storm: ('DARBY', 2004)
Current Storm: ('FRANK', 2004)
Current Storm: ('HOWARD', 2004)
Current Storm: ('ISIS', 2004)
Current Storm: ('JAVIER', 2004)
Current Storm: ('FERNANDA', 2005)
Current Storm: ('HILARY', 2005)
Current Storm: ('KENNETH', 2005)
Current Storm: ('MAX', 2005)
Current Storm: ('OTIS', 2005)
Current Storm: ('BUD', 2006)
Current Storm

In [9]:
# Create pandas dataframes of RI starts
RI_start_pd = pd.DataFrame(all_RI_starts,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI Start'])
RI_start_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Create pandas dataframes of RI ends
RI_end_pd = pd.DataFrame(all_RI_ends,index=[all_RI_years,all_RI_names,storm_ids_all,RI_number_all],columns=['RI End'])
RI_end_pd.index.names = ['Season','Storm_Name','Storm_ID','RI_Number'] # These are the axis

# Combines the two pandas dataframes
combined_RI_pd = pd.concat([RI_start_pd,RI_end_pd],axis=1)

# Create Xarray
RI_XR = combined_RI_pd.to_xarray()


In [10]:
# Save as csv and netCDF. Personally, I think the CSV/Pandas Dataframe looks cleaner.

combined_RI_pd.to_csv('/Users/acheung/data/RI_Events_'+desired_basin+'.csv')
RI_XR.to_netcdf('/Users/acheung/data/RI_Events_'+desired_basin+'.nc')

In [11]:
# An example of how to call in multiindex pandas
combined_RI_pd.loc[(combined_RI_pd.index.get_level_values('RI_Number') == 2)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RI Start,RI End
Season,Storm_Name,Storm_ID,RI_Number,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,JULIETTE,EP112001,2,2001-09-24 18:00:00,2001-09-25 18:00:00
2005,KENNETH,EP112005,2,2005-09-16 06:00:00,2005-09-17 18:00:00
2008,NORBERT,EP152008,2,2008-10-10 06:00:00,2008-10-11 06:00:00
2009,CARLOS,EP042009,2,2009-07-13 12:00:00,2009-07-14 12:00:00
2009,GUILLERMO,EP102009,2,2009-08-14 00:00:00,2009-08-15 12:00:00
2015,BLANCA,EP022015,2,2015-06-05 12:00:00,2015-06-06 12:00:00
2015,IGNACIO,EP122015,2,2015-08-28 18:00:00,2015-08-30 06:00:00
2016,LESTER,EP132016,2,2016-08-28 12:00:00,2016-08-30 00:00:00
2018,NORMAN,EP162018,2,2018-09-01 18:00:00,2018-09-02 18:00:00
2018,OLIVIA,EP172018,2,2018-09-06 00:00:00,2018-09-07 00:00:00


In [20]:
storm_ids_all

['EP012000',
 'EP032000',
 'EP062000',
 'EP012001',
 'EP072001',
 'EP112001',
 'EP112001',
 'EP122001',
 'EP162001',
 'EP052002',
 'EP092002',
 'EP142002',
 'EP092003',
 'EP102003',
 'EP122003',
 'EP142003',
 'EP052004',
 'EP112004',
 'EP122004',
 'EP132004',
 'EP112005',
 'EP112005',
 'EP132005',
 'EP152005',
 'EP032006',
 'EP052006',
 'EP092006',
 'EP112006',
 'EP132006',
 'EP172006',
 'EP212006',
 'EP062007',
 'EP092007',
 'EP122007',
 'EP092008',
 'EP152008',
 'EP152008',
 'EP042009',
 'EP042009',
 'EP042009',
 'EP102009',
 'EP102009',
 'EP042010',
 'EP052010',
 'EP022011',
 'EP032011',
 'EP042011',
 'EP052011',
 'EP072011',
 'EP092011',
 'EP102011',
 'EP132011',
 'EP022012',
 'EP032012',
 'EP042012',
 'EP052012',
 'EP072012',
 'EP132012',
 'EP162012',
 'EP022013',
 'EP112013',
 'EP132013',
 'EP172013',
 'EP012014',
 'EP032014',
 'EP072014',
 'EP082014',
 'EP092014',
 'EP142014',
 'EP152014',
 'EP192014',
 'EP212014',
 'EP012015',
 'EP022015',
 'EP022015',
 'EP052015',
 'EP092015',