# Rainband Data

This notebook is used to process the shear dataset and perform subsequent processing needed for rainband lightning burst analysis.
This notebook includes the following sections:
- Combining Shear Data Files
- Calculating Shear Quadrant
- Binning the Lightning Data
- Joining the Datasets
- Calculating Current Category and Intensification Change

### Combining Shear Data Files
We start by combining the individual shear data files.

In [2]:
import os
import glob
import pandas as pd
import polars as pl
from scipy.io import loadmat
import numpy as np

Import the filtered trackfile data to extract a list of unique storm codes for storms category 1 or higher. We use this to limit the shear files we process to streamline the process and reduce the workload.

In [3]:
# Import the filtered trackfile for the list of storm codes in analysis
trackfile = pd.read_csv("intermediate_data/Filtered_Reduced_Trackfile.csv")
tc_list = trackfile[["storm_code", "storm_name"]].drop_duplicates()
tc_list.reset_index(drop=True, inplace=True)
tc_list.head()

Unnamed: 0,storm_code,storm_name
0,ATL_20_28,Zeta
1,ATL_20_28,Twenty-Eig
2,SHEM_20_4,Sarai
3,SHEM_16_7,Victor
4,ATL_17_8,Gert


In [None]:
# Export filtered TC list for later use
tc_list.to_csv("data/filtered_tc_list.csv", index=False)

In [4]:
# Restart kernel and import unique value csv
tc_list = pd.read_csv("data/filtered_tc_list.csv")

Using the storm codes, we look for each TC's shear file in the base path (refers to thumb drive). If the TC has a shear file, we append it to the dataframe. We drop rows with null shear values.

In [5]:
# Define base path and list of storm codes
base_path = "/mnt/d/WWLLN_TC_Data_2010_2020/"
storm_codes = tc_list["storm_code"]

In [None]:
# List to store data efficiently
data_list = []
processed_storms = []

# Process each storm code
for storm_code in storm_codes:
    if storm_code in processed_storms:
        continue
    basin, year, number = storm_code.split("_")
    folder_path = os.path.join(base_path, year, basin, number)
    file_pattern = os.path.join(folder_path, "*_Intensity_Shear.mat")
    files = glob.glob(file_pattern)

    if files:
        filename = files[0]  # Pick the first matching file
        mat_data = loadmat(filename)

        # Print keys to inspect the structure
        print(f"Loaded file: {filename}")

        # Create a DataFrame
        if 'cg_IntenShear' in mat_data:
            data_array = mat_data['cg_IntenShear']
            # Convert NumPy array to a Polars DataFrame directly
            df_temp = pd.DataFrame(data_array, columns=['year', 'month', 'day', 'hour', 'min', 'second', 'lat',
              'long', 'distance_east', 'distance_north', 'category', 'intensity_change',
              'trackfile_id', 'shear_magnitude', 'shear_angle'])
            # Drop NaN (null) rows before appending
            df_temp = df_temp.dropna()
            # Add a new column for storm_code
            df_temp["storm_code"] = storm_code
            data_list.append(df_temp)
        else:
            print("Variable not found in .mat file")
    else:
        print(f"{storm_code} does not have a shear file.")

# Efficiently concatenate all Polars DataFrames
shear_data = pd.concat(data_list, ignore_index=True)

# printed output log saved in separate txt file for reference

In [None]:
# Display the final DataFrame
shear_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,category,intensity_change,trackfile_id,shear_magnitude,shear_angle,storm_code
0,2016.0,1.0,14.0,3.0,0.0,6.4994,-7.2386,-170.1965,-591.885,623.735,0.0,1.0,121.0,221.0,147.0,SHEM_16_7
1,2016.0,1.0,14.0,3.0,0.0,6.6831,-7.1552,-170.2882,-602.111,633.008,0.0,1.0,121.0,221.0,147.0,SHEM_16_7
2,2016.0,1.0,14.0,3.0,0.0,6.8064,-7.2494,-170.2057,-592.885,622.534,0.0,1.0,121.0,221.0,147.0,SHEM_16_7
3,2016.0,1.0,14.0,3.0,0.0,6.9387,-7.2733,-170.1481,-586.501,619.876,0.0,1.0,121.0,221.0,147.0,SHEM_16_7
4,2016.0,1.0,14.0,3.0,0.0,6.719,-7.2224,-170.1888,-591.057,625.536,0.0,1.0,121.0,221.0,147.0,SHEM_16_7


We export the unbinned shear data for future reference.

In [None]:
# Export the unbinned shear data as a tab-separated txt file
shear_data.to_csv("intermediate_data/unbinned_shear_data.txt", sep="\t", index=False)

### Calculating Shear Quadrant
We can restart the kernel here and read in the data again to free up space.

In [8]:
# Import unbinned shear data file
#shear_data = pd.read_csv("intermediate_data/unbinned_shear_data.txt", delimiter="\t")
shear_data = pd.read_csv(r"C:\Users\user\Desktop\25 WI\unbinned_shear_data.txt",delimiter="\t")



In [9]:
# Assign column names to the data
shear_data[['year', 'month', 'day', 'hour','min','category','intensity_change','trackfile_id','shear_magnitude','shear_angle']] = shear_data[['year', 'month', 'day', 'hour','min','category','intensity_change','trackfile_id','shear_magnitude','shear_angle']].astype(int)
shear_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,category,intensity_change,trackfile_id,shear_magnitude,shear_angle,storm_code
0,2016,1,14,3,0,6.4994,-7.2386,-170.1965,-591.885,623.735,0,1,121,221,147,SHEM_16_7
1,2016,1,14,3,0,6.6831,-7.1552,-170.2882,-602.111,633.008,0,1,121,221,147,SHEM_16_7
2,2016,1,14,3,0,6.8064,-7.2494,-170.2057,-592.885,622.534,0,1,121,221,147,SHEM_16_7
3,2016,1,14,3,0,6.9387,-7.2733,-170.1481,-586.501,619.876,0,1,121,221,147,SHEM_16_7
4,2016,1,14,3,0,6.719,-7.2224,-170.1888,-591.057,625.536,0,1,121,221,147,SHEM_16_7


Calculate the direct distance to storm center of each lightning event. Since the distance to center is generally close enough, we use a simplified hypotenuse calculation method instead of the Great Circle method.

In [10]:
# Calculate distance center using Pythagorean theorem
shear_data['distance_center'] = np.sqrt(shear_data['distance_east'] ** 2 + shear_data['distance_north'] ** 2)

Filter the data to rainband only. We define rainband as 200-400km of storm center.

?? need to confirm if rainband is only dependent on the direct distance to center we calculated before or if we need to use distanceX/distanceY in any way??? -> if we only need to depend on the hypotenuse distance we calculated we can filter before angle calculation.

In [11]:
# Create a rainband indicator, rainband is between 200 and 400 km of storm center
shear_data["rainband_ind"] = shear_data["distance_center"].apply(
    lambda x: 1 if (x >= 200 and x <= 400) else 0
)

# Filter the dataset to where rainband indicator = 1
rainband_data = shear_data[shear_data['rainband_ind'] == 1].copy()

Calculate shear angle for each lightning event

!! clean up notes !!

shearAngleCG=geoAngleCG - shearAngle; geoAngleCG is geographic angle of lightning to storm center relative to N = 0 deg; shearAngle is column 14 above

convert NS, EW, to Cartesian relative to shear vector to the right

distanceY=dist_center.*sind(shearAngleCG); dist_center is distance in km of lightning from storm center (direct distance)
distanceX=dist_center.*cosd(shearAngleCG);        

Create a function to calculate the geographic angle relative to north. Apply it to the dataframe and then use the calculated geo angle to get the shearAngleCG part of the equation.

In [12]:
def get_lightning_angle(north_south, east_west):
    angle_rad = np.arctan2(east_west, north_south)  # atan2(y, x)
    angle_deg = np.degrees(angle_rad)  # Convert radians to degrees
    return angle_deg % 360  # Ensure the angle is in [0, 360] range

In [13]:
# Apply function to data
rainband_data["geo_angle"] = get_lightning_angle(rainband_data["distance_north"], rainband_data["distance_east"])
rainband_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,category,intensity_change,trackfile_id,shear_magnitude,shear_angle,storm_code,distance_center,rainband_ind,geo_angle
25,2016,1,14,3,1,34.2911,-10.1733,-165.2575,-46.7057,297.411,0,1,121,221,147,SHEM_16_7,301.056017,1,351.075109
59,2016,1,14,3,3,32.6359,-10.4485,-166.4983,-182.348,266.81,0,1,121,221,147,SHEM_16_7,323.169258,1,325.649828
60,2016,1,14,3,3,32.7588,-10.4338,-166.6904,-203.363,268.445,0,1,121,221,147,SHEM_16_7,336.777713,1,322.853913
61,2016,1,14,3,3,32.6358,-10.4354,-166.7021,-204.642,268.267,0,1,121,221,147,SHEM_16_7,337.410035,1,322.662541
62,2016,1,14,3,3,32.6992,-10.4359,-166.693,-203.646,268.211,0,1,121,221,147,SHEM_16_7,336.762281,1,322.791493


In [14]:
# Calculate shearAngleCG by subtracting the angles, make sure output is positive
rainband_data["shearAngleCG"] = (rainband_data["geo_angle"] - rainband_data["shear_angle"]) % 360
rainband_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,category,intensity_change,trackfile_id,shear_magnitude,shear_angle,storm_code,distance_center,rainband_ind,geo_angle,shearAngleCG
25,2016,1,14,3,1,34.2911,-10.1733,-165.2575,-46.7057,297.411,0,1,121,221,147,SHEM_16_7,301.056017,1,351.075109,204.075109
59,2016,1,14,3,3,32.6359,-10.4485,-166.4983,-182.348,266.81,0,1,121,221,147,SHEM_16_7,323.169258,1,325.649828,178.649828
60,2016,1,14,3,3,32.7588,-10.4338,-166.6904,-203.363,268.445,0,1,121,221,147,SHEM_16_7,336.777713,1,322.853913,175.853913
61,2016,1,14,3,3,32.6358,-10.4354,-166.7021,-204.642,268.267,0,1,121,221,147,SHEM_16_7,337.410035,1,322.662541,175.662541
62,2016,1,14,3,3,32.6992,-10.4359,-166.693,-203.646,268.211,0,1,121,221,147,SHEM_16_7,336.762281,1,322.791493,175.791493


Next, let's calculate the distance X and Y of each lightning event relative to center, rotated on the shear angle.

?? is this necessary for rainband classification?

convert NS, EW, to Cartesian relative to shear vector to the right

distanceY=dist_center.*sind(shearAngleCG); dist_center is distance in km of lightning from storm center (direct distance)
distanceX=dist_center.*cosd(shearAngleCG);   

We start by defining a sind and cosd function since Python's built-in sin function only accepts input in radians (not degrees).

In [15]:
def sind(angle_degrees):
    return np.sin(np.radians(angle_degrees))

In [16]:
def cosd(angle_degrees):
    return np.cos(np.radians(angle_degrees))

Next, let's use these functions to calculate the distanceX and distanceY.

In [17]:
# Calculate distanceX and distanceY using formula
rainband_data["distanceX"] = rainband_data["distance_center"]*sind(rainband_data["shearAngleCG"])
rainband_data["distanceY"] = rainband_data["distance_center"]*cosd(rainband_data["shearAngleCG"])
rainband_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,...,trackfile_id,shear_magnitude,shear_angle,storm_code,distance_center,rainband_ind,geo_angle,shearAngleCG,distanceX,distanceY
25,2016,1,14,3,1,34.2911,-10.1733,-165.2575,-46.7057,297.411,...,121,221,147,SHEM_16_7,301.056017,1,351.075109,204.075109,-122.810944,-274.8676
59,2016,1,14,3,3,32.6359,-10.4485,-166.4983,-182.348,266.81,...,121,221,147,SHEM_16_7,323.169258,1,325.649828,178.649828,7.61476,-323.079533
60,2016,1,14,3,3,32.7588,-10.4338,-166.6904,-203.363,268.445,...,121,221,147,SHEM_16_7,336.777713,1,322.853913,175.853913,24.348937,-335.896349
61,2016,1,14,3,3,32.6358,-10.4354,-166.7021,-204.642,268.267,...,121,221,147,SHEM_16_7,337.410035,1,322.662541,175.662541,25.518542,-336.443659
62,2016,1,14,3,3,32.6992,-10.4359,-166.693,-203.646,268.211,...,121,221,147,SHEM_16_7,336.762281,1,322.791493,175.791493,24.713726,-335.854233


Let's assign each lightning event to a shear quadrant using the shear angle calculated earlier.

Based off shearAngleCG:
- 0-90 - Downshear Left (DL)
- 90-180 - Downshear Right (DR)
- 180-270 - Upshear Right (UR)
- 270 -360 - Upshear Left (UL)

In [18]:
# Define conditions and choices for the 4 quadrants
conditions = [
    (rainband_data["shearAngleCG"] >= 0) & (rainband_data["shearAngleCG"] < 90),
    (rainband_data["shearAngleCG"] >= 90) & (rainband_data["shearAngleCG"] < 180),
    (rainband_data["shearAngleCG"] >= 180) & (rainband_data["shearAngleCG"] < 270),
    (rainband_data["shearAngleCG"] >= 270) & (rainband_data["shearAngleCG"] < 360),
]
choices = ["DR", "UR", "UL", "DL"]

# Apply to new column shear_quad
rainband_data["shear_quad"] = np.select(conditions, choices, default="Unknown")
rainband_data.head()

Unnamed: 0,year,month,day,hour,min,second,lat,long,distance_east,distance_north,...,shear_magnitude,shear_angle,storm_code,distance_center,rainband_ind,geo_angle,shearAngleCG,distanceX,distanceY,shear_quad
25,2016,1,14,3,1,34.2911,-10.1733,-165.2575,-46.7057,297.411,...,221,147,SHEM_16_7,301.056017,1,351.075109,204.075109,-122.810944,-274.8676,UL
59,2016,1,14,3,3,32.6359,-10.4485,-166.4983,-182.348,266.81,...,221,147,SHEM_16_7,323.169258,1,325.649828,178.649828,7.61476,-323.079533,UR
60,2016,1,14,3,3,32.7588,-10.4338,-166.6904,-203.363,268.445,...,221,147,SHEM_16_7,336.777713,1,322.853913,175.853913,24.348937,-335.896349,UR
61,2016,1,14,3,3,32.6358,-10.4354,-166.7021,-204.642,268.267,...,221,147,SHEM_16_7,337.410035,1,322.662541,175.662541,25.518542,-336.443659,UR
62,2016,1,14,3,3,32.6992,-10.4359,-166.693,-203.646,268.211,...,221,147,SHEM_16_7,336.762281,1,322.791493,175.791493,24.713726,-335.854233,UR


### Binning the Lightning Data

Next, we'll create 30-minute timebins sectored by the shear quadrants. This means each TC will have 4 rows for the same timebin, one for each shear quadrant. We start by importing the min and max timestamps for each storm calculated using the trackfile.

In [19]:
full_time_period_df = pd.read_csv('intermediate_data/storm_time_period.csv')

In [20]:
# Ensure sec column is valid
rainband_data['sec'] = rainband_data['second'].apply(lambda x: 0 if x == 60 else x)

# Create a datetime column
rainband_data['datetime'] = pd.to_datetime(
    rainband_data['year'].astype(str) + '-' +
    rainband_data['month'].astype(str).str.zfill(2) + '-' +
    rainband_data['day'].astype(str).str.zfill(2) + ' ' +
    rainband_data['hour'].astype(str).str.zfill(2) + ':' +
    rainband_data['min'].astype(str).str.zfill(2) + ':' +
    rainband_data['sec'].astype(str).str.zfill(2)
)

# Define a function to apply the 30-minute binning for each storm_code group
def add_time_bin(group):
    group['time_bin'] = group['datetime'].dt.floor('30min')
    return group

# Group by storm_code and apply the binning function
rainband_data = rainband_data.groupby('storm_code', group_keys=False).apply(add_time_bin)

# Group by bins and get the count per 30-minute bin
rainband_data_grouped = rainband_data.groupby(['storm_code', 'time_bin', 'shear_quad']).size().reset_index(name='lightning_count')

# Define all shear_quad options
shear_quad_options = ["DR", "UR", "UL", "DL"]

# Function to ensure all time bins have all shear_quad values
def add_missing_bins(group):
    storm_code = group['storm_code'].iloc[0]
    # Use track file start and end dates to retain entirety of storm length
    min_time = full_time_period_df.loc[full_time_period_df['storm_code'] == storm_code, 'min_time'].values[0]
    max_time = full_time_period_df.loc[full_time_period_df['storm_code'] == storm_code, 'max_time'].values[0]

    # Create a full range of 30-minute bins for the time period of this storm
    full_bins = pd.DataFrame({'time_bin': pd.date_range(min_time, max_time, freq='30min')})

    # Create all possible combinations of (time_bin, shear_quad)
    shear_quad_expanded = full_bins.assign(key=1).merge(
        pd.DataFrame({'shear_quad': shear_quad_options, 'key': 1}), on='key'
    ).drop(columns=['key'])

    # Merge with original data
    merged = shear_quad_expanded.merge(group[['storm_code', 'time_bin', 'shear_quad', 'lightning_count']],
                                       how='left', on=['time_bin', 'shear_quad'])

    # Fill missing storm_code with the first valid entry in the group
    merged['storm_code'] = group['storm_code'].iloc[0]

    # Fill missing lightning_count with 0
    merged['lightning_count'] = merged['lightning_count'].fillna(0).astype(int)

    return merged

# Apply the function to ensure all (time_bin, shear_quad) combinations exist
rainband_data_timebin = rainband_data_grouped.groupby('storm_code', group_keys=False).apply(add_missing_bins)

# Sort the final result
rainband_data_timebin = rainband_data_timebin.sort_values(by=['storm_code', 'time_bin', 'shear_quad'])

# Print the resulting DataFrame with the new 'time_bin' and 'lightning_count' columns
print(rainband_data_timebin.head())

  rainband_data = rainband_data.groupby('storm_code', group_keys=False).apply(add_time_bin)


             time_bin shear_quad storm_code  lightning_count
3 2010-06-20 18:00:00         DL   ATL_10_1                0
0 2010-06-20 18:00:00         DR   ATL_10_1                0
2 2010-06-20 18:00:00         UL   ATL_10_1                0
1 2010-06-20 18:00:00         UR   ATL_10_1                0
7 2010-06-20 18:30:00         DL   ATL_10_1                0


  rainband_data_timebin = rainband_data_grouped.groupby('storm_code', group_keys=False).apply(add_missing_bins)


### Joining the Datasets
We join the binned lightning data with the trackfile data to get the closest wind and pressure data. Start by reading in the trackfile data and converting the data types.

In [21]:
# Import trackfile
reduced_track_file = pd.read_csv("intermediate_data/Filtered_Reduced_Trackfile.csv")

In [22]:
reduced_track_file.head()

Unnamed: 0,year,month,day,hour,lat,lon,pressure,knots,storm_code,storm_name,category,basin
0,2020,10,20,0,12.1,-80.0,0,15,ATL_20_28,Zeta,2,ATL
1,2020,10,20,6,12.5,-80.1,0,15,ATL_20_28,Zeta,2,ATL
2,2020,10,20,12,12.8,-80.2,0,15,ATL_20_28,Zeta,2,ATL
3,2020,10,20,18,13.2,-80.3,0,15,ATL_20_28,Zeta,2,ATL
4,2020,10,21,0,13.8,-80.4,0,15,ATL_20_28,Zeta,2,ATL


In [23]:
# Convert the datetime columns to the correct data type
rainband_data_timebin["time_bin"] = pd.to_datetime(rainband_data_timebin["time_bin"])

rainband_data_timebin["year"] = rainband_data_timebin["time_bin"].dt.year.astype('int64')
rainband_data_timebin["month"] = rainband_data_timebin["time_bin"].dt.month.astype('int64')
rainband_data_timebin["day"] = rainband_data_timebin["time_bin"].dt.day.astype('int64')
rainband_data_timebin["hour"] = rainband_data_timebin["time_bin"].dt.hour.astype('int64')
rainband_data_timebin["minute"] = rainband_data_timebin["time_bin"].dt.minute.astype('int64')

In [24]:
rainband_data_timebin_pl = pl.from_pandas(rainband_data_timebin)
reduced_track_file_pl = pl.from_pandas(reduced_track_file)

In [25]:
rainband_data_timebin_pl.head()

time_bin,shear_quad,storm_code,lightning_count,year,month,day,hour,minute
datetime[ns],str,str,i64,i64,i64,i64,i64,i64
2010-06-20 18:00:00,"""DL""","""ATL_10_1""",0,2010,6,20,18,0
2010-06-20 18:00:00,"""DR""","""ATL_10_1""",0,2010,6,20,18,0
2010-06-20 18:00:00,"""UL""","""ATL_10_1""",0,2010,6,20,18,0
2010-06-20 18:00:00,"""UR""","""ATL_10_1""",0,2010,6,20,18,0
2010-06-20 18:30:00,"""DL""","""ATL_10_1""",0,2010,6,20,18,30


Next, we join the rainband track data with the binned lightning data using the asof method. We look for the nearest track data to the lightning timebin.

In [26]:
rainband_data_timebin_joined = rainband_data_timebin_pl.join_asof(
    reduced_track_file_pl,
    on="hour",
    by=["year", "month", "day","storm_code"],
    strategy="nearest",
    tolerance=24
)


This can lead to invalid results. Ensure the asof key is sorted
  rainband_data_timebin_joined = rainband_data_timebin_pl.join_asof(

This can lead to invalid results. Ensure the asof key is sorted
  rainband_data_timebin_joined = rainband_data_timebin_pl.join_asof(


In [27]:
rainband_data_timebin_joined = rainband_data_timebin_joined.to_pandas()

In [28]:
rainband_data_timebin_joined.head(20)

Unnamed: 0,time_bin,shear_quad,storm_code,lightning_count,year,month,day,hour,minute,lat,lon,pressure,knots,storm_name,category,basin
0,2010-06-20 18:00:00,DL,ATL_10_1,0,2010,6,20,18,0,12.0,-64.3,1011,15,Alex,2,ATL
1,2010-06-20 18:00:00,DR,ATL_10_1,0,2010,6,20,18,0,12.0,-64.3,1011,15,Alex,2,ATL
2,2010-06-20 18:00:00,UL,ATL_10_1,0,2010,6,20,18,0,12.0,-64.3,1011,15,Alex,2,ATL
3,2010-06-20 18:00:00,UR,ATL_10_1,0,2010,6,20,18,0,12.0,-64.3,1011,15,Alex,2,ATL
4,2010-06-20 18:30:00,DL,ATL_10_1,0,2010,6,20,18,30,12.0,-64.3,1011,15,Alex,2,ATL
5,2010-06-20 18:30:00,DR,ATL_10_1,0,2010,6,20,18,30,12.0,-64.3,1011,15,Alex,2,ATL
6,2010-06-20 18:30:00,UL,ATL_10_1,0,2010,6,20,18,30,12.0,-64.3,1011,15,Alex,2,ATL
7,2010-06-20 18:30:00,UR,ATL_10_1,0,2010,6,20,18,30,12.0,-64.3,1011,15,Alex,2,ATL
8,2010-06-20 19:00:00,DL,ATL_10_1,0,2010,6,20,19,0,12.0,-64.3,1011,15,Alex,2,ATL
9,2010-06-20 19:00:00,DR,ATL_10_1,0,2010,6,20,19,0,12.0,-64.3,1011,15,Alex,2,ATL


In [29]:
rainband_data_timebin_joined.isnull().sum()

time_bin           0
shear_quad         0
storm_code         0
lightning_count    0
year               0
month              0
day                0
hour               0
minute             0
lat                0
lon                0
pressure           0
knots              0
storm_name         0
category           0
basin              0
dtype: int64

In [30]:
# calculate 24-hour intensity change
def knot_category(row):
    if 64 <= row['knots'] < 83:
        return 1
    elif 83 <= row['knots'] < 96:
        return 2
    elif 96 <= row['knots'] < 113:
        return 3
    elif 113 <= row['knots'] < 136:
        return 4
    elif row['knots'] >= 137:
        return 5
    else:
        return 'Unidentified'

def intensification(row):
    if row['24_hour_knots_diff'] < -30:
        return 'Rapidly Weakening'
    elif -30 <= row['24_hour_knots_diff'] < -10:
        return 'Weakening'
    elif -10 <= row['24_hour_knots_diff'] < 10:
        return 'Neutral'
    elif 10 <= row['24_hour_knots_diff'] < 30:
        return 'Intensifying'
    elif 30 <= row['24_hour_knots_diff']:
        return 'Rapidly Intensifying'
    else:
        return 'Unidentified'

In [31]:
pd.set_option('display.max_rows', 500)

# Sort by 'storm_code' and 'time_bin'
rainband_data_timebin_joined = rainband_data_timebin_joined.sort_values(by=['storm_code', 'time_bin','shear_quad'])

rainband_data_timebin_joined['24_hour_knots_diff'] = rainband_data_timebin_joined.groupby(['storm_code','shear_quad'])['knots'].shift(periods=-48) - rainband_data_timebin_joined['knots']
rainband_data_timebin_joined['24_hour_pressure_diff'] = rainband_data_timebin_joined.groupby(['storm_code','shear_quad'])['pressure'].shift(periods=-48) - rainband_data_timebin_joined['pressure']

# Save or display the DataFrame
rainband_data_timebin_joined.groupby('storm_code')['24_hour_knots_diff'].max()
rainband_data_timebin_joined.groupby('storm_code')['24_hour_pressure_diff'].max()

storm_code
ATL_10_1        29.0
ATL_10_11       15.0
ATL_10_12       26.0
ATL_10_13       24.0
ATL_10_14       18.0
ATL_10_17       12.0
ATL_10_18       16.0
ATL_10_19       22.0
ATL_10_20        1.0
ATL_10_21       20.0
ATL_10_6        18.0
ATL_10_7        28.0
ATL_11_12       23.0
ATL_11_14        4.0
ATL_11_16       42.0
ATL_11_17       13.0
ATL_11_18       18.0
ATL_11_9        20.0
ATL_12_11       19.0
ATL_12_12        7.0
ATL_12_13       14.0
ATL_12_14       17.0
ATL_12_17       16.0
ATL_12_18       33.0
ATL_12_3         3.0
ATL_12_5        15.0
ATL_12_8        17.0
ATL_12_9        21.0
ATL_13_10       18.0
ATL_13_9        19.0
ATL_14_1         8.0
ATL_14_3         9.0
ATL_14_4         2.0
ATL_14_6        23.0
ATL_14_7         7.0
ATL_14_8        22.0
ATL_15_11       24.0
ATL_15_12        6.0
ATL_15_4        20.0
ATL_15_6      1008.0
ATL_16_1         8.0
ATL_16_15       32.0
ATL_16_16     1008.0
ATL_16_5      1011.0
ATL_16_7        21.0
ATL_16_9        15.0
ATL_17_11     1008.0
AT

In [32]:
rainband_data_timebin_joined['TC_Category'] = rainband_data_timebin_joined.apply(knot_category, axis=1)
rainband_data_timebin_joined['Intensification_Category'] = rainband_data_timebin_joined.apply(intensification, axis=1)

In [33]:
rainband_data_timebin_joined.shape

(584416, 20)

In [34]:
rainband_data_timebin_joined.to_csv("data/rainband_shear_timebin_joined.csv", index=False)