In [1]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Grab with regular SHIPS

# Variables from SHIPS. HSTA and HEND are variables I made up for hours after RI start and hours before RI end.
variables = ['VMAX','SHRD','SHTD','VMPI','RSST','RHLO','IR00','DTL','HSTA','HEND']

basin = 'north_atlantic' # east_pacific or north_atlantic

# Read in RI cases
RI = pd.read_csv('/Users/acheung/data/RI_Cases/RI_Events_'+basin+'.csv')

# Convert columns to datetime format
RI["RI Start"] = pd.to_datetime(RI["RI Start"])
RI["RI End"] = pd.to_datetime(RI["RI End"])

list_pds = [] # Saves all the information as one big list!

for var_ind in range(len(variables)): # Loops through all variables
    if (variables[var_ind] == 'HSTA') | (variables[var_ind] == 'HEND'): # For time-hour variables
        SHIPS_data = pd.read_csv('/Users/acheung/data/SHIPS/SHIPS_'+variables[0]+'_'+basin+'.csv') # just pick another variable to get time elements
    else:
        SHIPS_data = pd.read_csv('/Users/acheung/data/SHIPS/SHIPS_'+variables[var_ind]+'_'+basin+'.csv')

    SHIPS_data['Time'] = pd.to_datetime(SHIPS_data['Time'])

    all_data_arrays = []
    all_storm_ids = []
    all_storm_times = []
    t_start_list = []
    t_end_list = []

    for i in range(len(RI)):
        # Grab current storm data
        current_storm_data = SHIPS_data.where(SHIPS_data['Storm_ID'] ==
                                                RI.iloc[[i]]['Storm_ID'].values[0]).dropna()
        current_storm_data_indexed = current_storm_data.set_index('Time')
        current_storm_data_indexed.index = pd.to_datetime(current_storm_data_indexed.index)
        # We call shear for 24 hours before RI, but sometimes we dont have data back that far if it was not a TC oficially
        
        # Grab only times interested from RI (24 h before) to RI end
        # Sometimes due to data availability or storm intensity, we may get less than this time period
        data_array = current_storm_data_indexed.loc[(RI.iloc[
            [i]]['RI Start']+timedelta(hours = -24)).values[0]:RI.iloc[[i]]['RI End'].values[0]]
        
        # Save storm IDs
        all_storm_ids.append(data_array['Storm_ID'].values)
        
        # Save datetime
        all_storm_times.append(data_array.index.values)
        
        if variables[var_ind] == 'HSTA': # for appending start time variable
            time_after_start = data_array.index - RI.iloc[[i]]['RI Start'].values[0]
            all_data_arrays.append(time_after_start)
            
        elif variables[var_ind] == 'HEND': # for appending end time variable
            time_before_end = data_array.index - RI.iloc[[i]]['RI End'].values[0]
            all_data_arrays.append(time_before_end)
        
        else: # Append data for when not time-data variables
            all_data_arrays.append(data_array[variables[var_ind]].values)

    # Creates a pandas dataframe with datetime and atcf id as axis for an individual variable
    alldata_pandad = pd.DataFrame(np.concatenate(all_data_arrays),
                              index=[np.concatenate(all_storm_ids),
                                     np.concatenate(all_storm_times)],columns=[variables[var_ind]])
    
    # Save all pandas dataframes of each variable with the same axis to a big list!
    list_pds.append(alldata_pandad)

# Concatenate all the pandas arrays
SHIPS_concat = pd.concat(list_pds,axis=1)

# Rename the axis
SHIPS_concat = SHIPS_concat.rename_axis(('Storm_ID','Time'))

# Save to CSV
SHIPS_concat.to_csv("/Users/acheung/data/RI_Cases/RI_Events_with_SHIPS_Data_"+basin+".csv")


In [5]:
SHIPS_concat

Unnamed: 0_level_0,Unnamed: 1_level_0,VMAX,SHRD,SHTD,VMPI,RSST,RHLO,IR00,DTL,HSTA,HEND
Storm_ID,Time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AL032000,2000-08-10 12:00:00,65.0,0.9,244.0,143.0,28.4,65.0,999.0,278.0,-1 days +00:00:00,-2 days +00:00:00
AL032000,2000-08-10 18:00:00,65.0,5.6,294.0,142.0,28.5,62.0,999.0,332.0,-1 days +06:00:00,-2 days +06:00:00
AL032000,2000-08-11 00:00:00,70.0,3.1,262.0,141.0,28.6,61.0,999.0,397.0,-1 days +12:00:00,-2 days +12:00:00
AL032000,2000-08-11 06:00:00,75.0,8.8,186.0,142.0,28.6,60.0,999.0,496.0,-1 days +18:00:00,-2 days +18:00:00
AL032000,2000-08-11 12:00:00,80.0,9.4,188.0,137.0,28.3,59.0,999.0,398.0,0 days 00:00:00,-1 days +00:00:00
...,...,...,...,...,...,...,...,...,...,...,...
AL162022,2022-11-01 18:00:00,45.0,32.1,56.0,111.0,26.0,52.0,26.0,208.0,0 days 00:00:00,-1 days +00:00:00
AL162022,2022-11-02 00:00:00,50.0,31.9,67.0,111.0,25.9,54.0,30.0,274.0,0 days 06:00:00,-1 days +06:00:00
AL162022,2022-11-02 06:00:00,55.0,26.6,73.0,127.0,25.8,59.0,30.0,291.0,0 days 12:00:00,-1 days +12:00:00
AL162022,2022-11-02 12:00:00,65.0,25.0,48.0,123.0,25.5,64.0,18.0,296.0,0 days 18:00:00,-1 days +18:00:00


In [29]:
# Grab with regular EC-SHIPS


basin_EC = 'north_atlantic'

RI_EC_pre = pd.read_csv('/Users/acheung/data/RI_Cases/RI_Events_'+basin_EC+'.csv')
if basin_EC == 'north_atlantic':
    EC_SHIPS_shear = pd.read_csv('/Users/acheung/data/EC_SHIPS/EC-SHIPS_deep_layer_shear_atl.csv')
elif basin_EC == 'east_pacific':
    EC_SHIPS_shear = pd.read_csv('/Users/acheung/data/EC_SHIPS/EC-SHIPS_deep_layer_shear_epac.csv')

RI_EC = RI_EC_pre.where(RI_EC_pre['Season'] >= 2016).dropna()
    
all_EC_shears = []
for i in range(len(RI_EC)):
    current_storm_shear_EC = EC_SHIPS_shear.where(EC_SHIPS_shear['Storm_ID'] ==
                                            RI_EC.iloc[[i]]['Storm_ID'].values[0]).dropna()
    if len(current_storm_shear_EC.where(
        current_storm_shear_EC['Time'] == RI_EC.iloc[[i]]['RI Start'].values[0]).dropna()['Deep_Layer_Shear']) == 0:
        # Some data is missing from EC-SHIPS, so we record NaN in these cases
        all_EC_shears.append(np.nan)
    else: # When data from EC-SHIPS is not missing
        current_shear_RI = current_storm_shear_EC.where(
            current_storm_shear_EC['Time'] == RI_EC.iloc[[i]]['RI Start'].values[0]).dropna()['Deep_Layer_Shear'].values[0]
        all_EC_shears.append(current_shear_RI)
    
RI_EC.insert(6, "Deep-Layer Shear (kt)", all_EC_shears)
RI_EC.to_csv("/Users/acheung/data/RI_Cases/RI_Events_with_EC_SHIPS_shear_"+basin_EC+".csv",index=False)
