In [100]:
import pandas as pd
import geopandas as gpd
import os, us, warnings, datetime
from sqlalchemy import create_engine  
from datetime import datetime
from pathlib import Path
warnings.filterwarnings("ignore")

### Prepare US state geo data

In [122]:
def generate_state_abbreviations():
    return {state.name: state.abbr for state in us.STATES_AND_TERRITORIES}

state_abbreviations = generate_state_abbreviations()
print(state_abbreviations)

{'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY', 'American Samoa': 'AS', 'Guam': 'GU', 'Northern Mariana Islands': 'MP', 'Puer

In [130]:
# Load US states GeoDataFrame
us_states = gpd.read_file("https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_040_00_5m.json")
us_states['area'] = us_states.geometry.area
us_states = us_states.sort_values(by='area', ascending=True)
us_states['st'] = us_states['NAME'].map(state_abbreviations)
us_states.head(5)

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry,area,st
8,0400000US11,11,District of Columbia,,61.048,"POLYGON ((-77.03860 38.79151, -77.03890 38.800...",0.01838,
39,0400000US44,44,Rhode Island,,1033.814,"MULTIPOLYGON (((-71.38359 41.46478, -71.38928 ...",0.309236,RI
7,0400000US10,10,Delaware,,1948.543,"MULTIPOLYGON (((-75.56493 39.58325, -75.57627 ...",0.545358,DE
51,0400000US72,72,Puerto Rico,,3423.775,"MULTIPOLYGON (((-65.32770 18.29584, -65.33745 ...",0.76508,PR
6,0400000US09,9,Connecticut,,4842.355,"POLYGON ((-71.79924 42.00807, -71.79792 41.935...",1.395161,CT


### For each US state, spatial join and aggregate Tornado data into H3 hexagons

In [124]:
def calculate_years_diff(dates_list):
    if len(dates_list) > 1:
        years_diff = [(dates_list[i] - dates_list[i - 1]).days / 365.25 for i in range(1, len(dates_list))]
        return [round(year_diff, 3) for year_diff in years_diff]
    else:
        return []

def calculate_days_diff(dates):
    if len(dates) > 1:
        sorted_dates = sorted(dates)
        return [(sorted_dates[i + 1] - sorted_dates[i]).days for i in range(len(sorted_dates) - 1)]
    else:
        return []

from datetime import datetime

def convert_to_date(dates_list):
    return [datetime.strptime(date_str, '%Y-%m-%d').date() for date_str in dates_list]

# Define a function that calculates the years between the event date and the present date
def years_since_event(dates):
    if len(dates) == 1:
        # if there is only one event, calculate the years since that event
        return (datetime.now().date() - dates[0]).days / 365.25
    else:
        # if there are multiple events, return None (or whatever you want to return in this case)
        return None

In [137]:
def load_geodata(state_name, directory):
    print(f"Read in geoDataframe for : {state_name}")
    st_boundary = us_states[us_states['st'] == state_name]
    st_boundary = st_boundary.to_crs(4326)
    print(f"Loading H3 id's and polygons for: {state_name}")
    try:
        full_name = st_boundary['NAME'].iloc[0]
    except IndexError:
        raise Exception("IndexError: The DataFrame st_boundary is likely empty or the index is out of bounds.")

    print("We've created the two state objects: full_name and abbreviated name")
    hexagons = gpd.read_file(f"{directory}/h3_by_state/h3_{full_name}.gpkg")
    hexagons['state'] = state_name
    lookup = hexagons['state'][0]
    st_boundary = st_boundary.dropna()

    return hexagons, lookup, st_boundary

def load_event_data(directory, lookup, st_boundary):
    event = gpd.read_file(f"{directory}/input/tornado_tracks.shp")
    event = event.to_crs(4326)
    event_df = event[event['st'] == lookup]
    print("Created event geoDataframe")
    event_df = gpd.clip(event_df, st_boundary)
    return event_df

In [138]:
def process_event_history(s):
    # Regular expression pattern to match the dates
    pattern = r'datetime\.date\((\d{4}), (\d{1,2}), (\d{1,2})\)'

    # Extract all matching date components
    matches = re.findall(pattern, s)

    # Convert to formatted date strings
    formatted_dates = [datetime(int(year), int(month), int(day)).strftime('%m/%d/%Y') for year, month, day in matches]

    return formatted_dates

In [140]:
print(us_states['st'].unique())


[nan 'RI' 'DE' 'PR' 'CT' 'HI' 'NJ' 'MA' 'NH' 'VT' 'MD' 'WV' 'SC' 'ME' 'IN'
 'KY' 'VA' 'TN' 'OH' 'LA' 'MS' 'PA' 'NC' 'AL' 'AR' 'FL' 'NY' 'GA' 'IL'
 'IA' 'WI' 'MI' 'OK' 'MO' 'WA' 'NE' 'ND' 'KS' 'SD' 'UT' 'ID' 'MN' 'WY'
 'CO' 'OR' 'AZ' 'NV' 'NM' 'CA' 'MT' 'TX' 'AK']


In [141]:
# Set directory path
directory = Path("/Users//Documents/engineering//tornado/data")
output_directory = "h3_hexagons_geopackage"

# Loop through each state name
for state_name in us_states['st'].dropna().unique():
    try:
        print("________________________________________________________________")
        hexagons, lookup, st_boundary = load_geodata(state_name, directory)
        event_df = load_event_data(directory, lookup, st_boundary)

        # Rest of the processing steps would go here...
        # Clip the data using GeoPandas clip
        event_df = gpd.clip(event_df, st_boundary)
        event_df['buffer_geom'] = event_df.buffer(0.008)
        event_df = event_df.drop('geometry', axis=1)
        event_df = event_df.rename(columns={'buffer_geom': 'geometry'})
        print(f"Loading Tornado event layer by the state of: {lookup}")

        # Convert column A from float to string and remove ".0"
        event_df['yr'] = event_df['yr'].astype(int).astype(str).str.replace('\.0', '')
        event_df['mo'] = event_df['mo'].astype(int).astype(str).str.replace('\.0', '')
        event_df['dy'] = event_df['dy'].astype(int).astype(str).str.replace('\.0', '')

                # select only float columns
        float_cols = event_df.select_dtypes(include=['float'])

        # round float columns to 2 decimal places
        event_df[float_cols.columns] = float_cols.round(2)

        #event_df['date'] = pd.to_datetime(event_df['date'], errors='coerce').dt.date
        event_df = event_df[['geometry', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'mag', 'inj',
        'fat', 'loss', 'closs', 'len', 'wid']]
        print("Start Join of geoDataframe")

        # Use geopandas to spatial join *intersect* the two tables
        join_df = gpd.sjoin(hexagons, event_df, how='inner', predicate='intersects')
        print(f"Resulted join has a record count of: {len(join_df.index)}")

        # Sort and group the data to produce aggregate layer
        join_df = join_df.sort_values(by=['h3_hexagon', 'date'], ascending=True)
        group_df = join_df.groupby('h3_hexagon').agg({
            'yr': list, 
            'mo': list, 
            'date': list, 
            'mag': list, 
            'loss': list, 
            'closs': list, 
            'inj': list, 
            'fat': list, 
            'len': list, 
            'wid': list
            })
        print("Aggregating event geoDataframe")

        # Convert the strings in the 'date' column to datetime.date objects
        group_df['date'] = group_df['date'].apply(convert_to_date)

        # Calculate days difference between dates in the 'date' column
        group_df['days_diff'] = group_df['date'].apply(calculate_days_diff)
        group_df['years_diff'] = group_df['date'].apply(calculate_years_diff)

        # Calculate total number of tornado events and sum of years difference
        group_df['num_events'] = group_df['date'].apply(len)
        group_df['sum_years_diff'] = group_df['years_diff'].apply(sum)
        group_df['sum_property_loss'] = group_df['loss'].apply(sum)

        # Calculate the average loss per year
        group_df['total_loss'] = group_df['loss'].apply(sum)
        group_df['total_years'] = group_df['date'].apply(len)
        group_df['avg_loss_per_year'] = group_df['total_loss'] / group_df['total_years']

        ######## Addition
        # Apply the function to the 'date' column
        group_df['years_since_event'] = group_df['date'].apply(years_since_event)

        # replace zero or null values with np.nan
        group_df['sum_years_diff'] = group_df['sum_years_diff'].replace({0: np.nan, None: np.nan})

        # Now use 'years_since_event' where 'sum_years_diff' is null
        group_df['sum_years_diff'] = group_df['sum_years_diff'].where(group_df['sum_years_diff'].notnull(), group_df['years_since_event'])

        # Now calculate average frequency as before
        group_df['avg_frequency'] = group_df['num_events'] / group_df['sum_years_diff']
        ######## End Addition

        # Calculate average frequency
        group_df['avg_property_loss'] = group_df['sum_property_loss'] / group_df['num_events']
        # Calculate average frequency of mag_stats#
        group_df['avg_mag_freq'] = group_df['num_events'] / group_df['sum_years_diff']

        print("Dissolving event geoDataframe")
        # Run a Group By and Sum operation to produce aggregate layer
        dissolve_df = join_df.dissolve(
                by="h3_hexagon",
                aggfunc={'state': 'count',
                        'mag':'mean', 'inj': 'sum', 
                        'fat': 'sum', 'loss': 'sum', 
                        'closs': 'sum', 'len': 'mean', 'wid': 'mean'})

        dissolve_df = dissolve_df.rename(columns={'h3_hexagon':'h3_id', 'name':'tornado_count'})

        # Merge the two dataframes to produce final aggregate layer
        final = dissolve_df.join(group_df, lsuffix='_h3_history', rsuffix='_stats')
        final = final.rename(columns={'state': 'tornado_count'})

        df_melt = final.assign(names=final.date.str.split(","))
                
        final = df_melt.date.apply(pd.Series) \
            .merge(df_melt, right_index=True, left_index=True)

        final['date'] = final['date'].apply(convert_to_string)


        ##############################

        print("Successfully dissolved...")
        final.to_csv(f"{directory}/h3_hexagons_tornado/{lookup}_final_geom.csv")
        print("Wrote to csv...")
    except FileNotFoundError as e:
        print(f"Error processing {state_name}: {e}")
    except ValueError as e:
        print(f"Error processing {state_name}: {e}")

________________________________________________________________
Read in geoDataframe for : RI
Loading H3 id's and polygons for: RI
We've created the two state objects: full_name and abbreviated name
Created event geoDataframe
Loading Tornado event layer by the state of: RI
Start Join of geoDataframe
Resulted join has a record count of: 944
Aggregating event geoDataframe


AttributeError: type object 'datetime.datetime' has no attribute 'datetime'

## Step-by-Step Model: DO NOT DELETE

In [134]:
from pathlib import Path
import glob

# Set directory path
directory = Path("/alsaceh/tornado/data")
output_directory = "h3_hexagons_geopackage"


# Specify the state you want to process
state_name = 'AR'

hexagons, lookup, st_boundary = load_geodata(state_name, directory)
event_df = load_event_data(directory, lookup, st_boundary)

Read in geoDataframe for : AR
Loading H3 id's and polygons for: AR
We've created the two state objects: full_name and abbreviated name
Created event geoDataframe


In [68]:
# Rest of the processing steps would go here...
# Clip the data using GeoPandas clip
event_df = gpd.clip(event_df, st_boundary)
event_df['buffer_geom'] = event_df.buffer(0.008)
event_df = event_df.drop('geometry', axis=1)
event_df = event_df.rename(columns={'buffer_geom': 'geometry'})
print(f"Loading Tornado event layer by the state of: {lookup}")

Loading Tornado event layer by the state of: AR



  event_df['buffer_geom'] = event_df.buffer(0.008)


In [69]:
event_df.head(3)

Unnamed: 0,OBJECTID,om,yr,mo,dy,date,time,tz,st,stf,...,slon,elat,elon,len,wid,fc,Shape__Len,Month_Calc,Date_Calc,geometry
9635,9636.0,127.0,1967.0,4.0,13.0,1967-04-13,18:00:00,3.0,AR,5.0,...,-91.9,33.0201,-91.8999,0.1,10.0,0.0,17.325684,3.0,-85863600000.0,"POLYGON ((-91.90556 33.02576, -91.90498 33.026..."
62941,62942.0,616091.0,2018.0,4.0,14.0,2018-04-14,01:42:00,3.0,AR,5.0,...,-92.0066,33.0325,-91.9634,2.72,600.0,0.0,5230.834624,3.0,1523678000000.0,"POLYGON ((-91.96610 33.04003, -91.96535 33.040..."
52575,52576.0,200.0,2009.0,4.0,9.0,2009-04-09,22:27:00,3.0,AR,5.0,...,-92.0012,33.0365,-91.7681,13.6,600.0,0.0,26129.19792,3.0,1239250000000.0,"POLYGON ((-91.76889 33.04446, -91.76810 33.044..."


In [70]:
# Convert column A from float to string and remove ".0"
event_df['yr'] = event_df['yr'].astype(int).astype(str).str.replace('\.0', '')
event_df['mo'] = event_df['mo'].astype(int).astype(str).str.replace('\.0', '')
event_df['dy'] = event_df['dy'].astype(int).astype(str).str.replace('\.0', '')

In [71]:
event_df.head(3)

Unnamed: 0,OBJECTID,om,yr,mo,dy,date,time,tz,st,stf,...,slon,elat,elon,len,wid,fc,Shape__Len,Month_Calc,Date_Calc,geometry
9635,9636.0,127.0,1967,4,13,1967-04-13,18:00:00,3.0,AR,5.0,...,-91.9,33.0201,-91.8999,0.1,10.0,0.0,17.325684,3.0,-85863600000.0,"POLYGON ((-91.90556 33.02576, -91.90498 33.026..."
62941,62942.0,616091.0,2018,4,14,2018-04-14,01:42:00,3.0,AR,5.0,...,-92.0066,33.0325,-91.9634,2.72,600.0,0.0,5230.834624,3.0,1523678000000.0,"POLYGON ((-91.96610 33.04003, -91.96535 33.040..."
52575,52576.0,200.0,2009,4,9,2009-04-09,22:27:00,3.0,AR,5.0,...,-92.0012,33.0365,-91.7681,13.6,600.0,0.0,26129.19792,3.0,1239250000000.0,"POLYGON ((-91.76889 33.04446, -91.76810 33.044..."


In [72]:
# select only float columns
float_cols = event_df.select_dtypes(include=['float'])

# round float columns to 2 decimal places
event_df[float_cols.columns] = float_cols.round(2)

#event_df['date'] = pd.to_datetime(event_df['date'], errors='coerce').dt.date
event_df = event_df[['geometry', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'mag', 'inj',
'fat', 'loss', 'closs', 'len', 'wid']]
print("Start Join of geoDataframe")

Start Join of geoDataframe


In [73]:
# Use geopandas to spatial join *intersect* the two tables
join_df = gpd.sjoin(hexagons, event_df, how='inner', predicate='intersects')
print(f"Resulted join has a record count of: {len(join_df.index)}")

# Sort and group the data to produce aggregate layer
join_df = join_df.sort_values(by=['h3_hexagon', 'date'], ascending=True)

join_df.head(5)

Resulted join has a record count of: 368711


Unnamed: 0,h3_hexagon,geometry,state,index_right,yr,mo,dy,date,time,tz,st,mag,inj,fat,loss,closs,len,wid
541011,89264ad92d3ffff,"POLYGON ((-89.74671 35.92758, -89.74525 35.928...",AR,36907,1997,3,1,1997-03-01,18:20:00,3.0,AR,4.0,0.0,0.0,0.03,0.0,20.0,200.0
1236875,89264ad92dbffff,"POLYGON ((-89.75034 35.92670, -89.74888 35.928...",AR,36907,1997,3,1,1997-03-01,18:20:00,3.0,AR,4.0,0.0,0.0,0.03,0.0,20.0,200.0
82334,89264ad9403ffff,"POLYGON ((-89.71177 35.94471, -89.71031 35.946...",AR,36907,1997,3,1,1997-03-01,18:20:00,3.0,AR,4.0,0.0,0.0,0.03,0.0,20.0,200.0
800306,89264ad940bffff,"POLYGON ((-89.71541 35.94384, -89.71395 35.945...",AR,36907,1997,3,1,1997-03-01,18:20:00,3.0,AR,4.0,0.0,0.0,0.03,0.0,20.0,200.0
66661,89264ad940fffff,"POLYGON ((-89.71252 35.94161, -89.71106 35.942...",AR,36907,1997,3,1,1997-03-01,18:20:00,3.0,AR,4.0,0.0,0.0,0.03,0.0,20.0,200.0


In [74]:
group_df = join_df.groupby('h3_hexagon').agg({
    'yr': list, 
    'mo': list, 
    'date': list, 
    'mag': list, 
    'loss': list, 
    'closs': list, 
    'inj': list, 
    'fat': list, 
    'len': list, 
    'wid': list
    })
print("Aggregating event geoDataframe")

group_df.head(5)

Aggregating event geoDataframe


Unnamed: 0_level_0,yr,mo,date,mag,loss,closs,inj,fat,len,wid
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
89264ad92d3ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0]
89264ad92dbffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0]
89264ad9403ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0]
89264ad940bffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0]
89264ad940fffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0]


In [None]:
import datetime

# Convert the strings in the 'date' column to datetime.date objects
group_df['date'] = group_df['date'].apply(convert_to_date)

# Calculate days difference between dates in the 'date' column
group_df['days_diff']_df['date'].apply(calculate_days_diff)

group_df['years_diff'] = group_df['date'].apply(calculate_years_diff)

In [46]:
group_df.head(5)

Unnamed: 0_level_0,yr,mo,date,mag,loss,closs,inj,fat,len,wid,days_diff,years_diff
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
89264ad92d3ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[]
89264ad92dbffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[]
89264ad9403ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[]
89264ad940bffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[]
89264ad940fffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[]


In [47]:
# Calculate total number of tornado events and sum of years difference
group_df['num_events'] = group_df['date'].apply(len)
group_df['sum_years_diff'] = group_df['years_diff'].apply(sum)
group_df['sum_property_loss'] = group_df['loss'].apply(sum)

group_df.head(5)

In [48]:
# Calculate the average loss per year
group_df['total_loss'] = group_df['loss'].apply(sum)
group_df['total_years'] = group_df['date'].apply(len)
group_df['avg_loss_per_year'] = group_df['total_loss'] / group_df['total_years']

group_df.head(5)

Unnamed: 0_level_0,yr,mo,date,mag,loss,closs,inj,fat,len,wid,days_diff,years_diff,num_events,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
89264ad92d3ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[],1,0.0,0.03,0.03,1,0.03
89264ad92dbffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[],1,0.0,0.03,0.03,1,0.03
89264ad9403ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[],1,0.0,0.03,0.03,1,0.03
89264ad940bffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[],1,0.0,0.03,0.03,1,0.03
89264ad940fffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],[],[],1,0.0,0.03,0.03,1,0.03


In [53]:
from datetime import datetime


# replace zero or null values with np.nan
group_df['sum_years_diff'] = group_df['sum_years_diff'].replace({0: np.nan, None: np.nan})

# Apply the function to the 'date' column
group_df['years_since_event'] = group_df['date'].apply(years_since_event)

# Now use 'years_since_event' where 'sum_years_diff' is null
group_df['sum_years_diff'] = group_df['sum_years_diff'].where(group_df['sum_years_diff'].notnull(), group_df['years_since_event'])

# Now calculate average frequency as before
group_df['avg_frequency'] = group_df['num_events'] / group_df['sum_years_diff']

In [54]:
group_df.head(5)

Unnamed: 0_level_0,yr,mo,date,mag,loss,closs,inj,fat,len,wid,...,num_events,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year,avg_frequency,avg_property_loss,avg_mag_freq,years_since_event
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89264ad92d3ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,inf,26.236824
89264ad92dbffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,inf,26.236824
89264ad9403ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,inf,26.236824
89264ad940bffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,inf,26.236824
89264ad940fffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,inf,26.236824


In [55]:
group_df['avg_property_loss'] = group_df['sum_property_loss'] / group_df['num_events']
# Calculate average frequency of mag_stats
group_df['avg_mag_freq'] = group_df['num_events'] / group_df['sum_years_diff']

group_df.head(5)

Unnamed: 0_level_0,yr,mo,date,mag,loss,closs,inj,fat,len,wid,...,num_events,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year,avg_frequency,avg_property_loss,avg_mag_freq,years_since_event
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89264ad92d3ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad92dbffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad9403ffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad940bffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad940fffff,[1997],[3],[1997-03-01],[4.0],[0.03],[0.0],[0.0],[0.0],[20.0],[200.0],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824


In [56]:
print("Dissolving event geoDataframe")
# Run a Group By and Sum operation to produce aggregate layer
dissolve_df = join_df.dissolve(
        by="h3_hexagon",
        aggfunc={'state': 'count',
                'mag':'mean', 'inj': 'sum', 
                'fat': 'sum', 'loss': 'sum', 
                'closs': 'sum', 'len': 'mean', 'wid': 'mean'})


dissolve_df = dissolve_df.rename(columns={'h3_hexagon':'h3_id', 'name':'tornado_count'})

dissolve_df.head(3)

Dissolving event geoDataframe


Unnamed: 0_level_0,geometry,state,mag,inj,fat,loss,closs,len,wid
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
89264ad92d3ffff,"POLYGON ((-89.74671 35.92758, -89.74525 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0
89264ad92dbffff,"POLYGON ((-89.75034 35.92670, -89.74888 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0
89264ad9403ffff,"POLYGON ((-89.71177 35.94471, -89.71031 35.946...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0


In [57]:
# Merge the two dataframes to produce final aggregate layer
final = dissolve_df.join(group_df, lsuffix='_h3_history', rsuffix='_stats')
final = final.rename(columns={'state': 'tornado_count'})

final.head(5)

Unnamed: 0_level_0,geometry,tornado_count,mag_h3_history,inj_h3_history,fat_h3_history,loss_h3_history,closs_h3_history,len_h3_history,wid_h3_history,yr,...,num_events,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year,avg_frequency,avg_property_loss,avg_mag_freq,years_since_event
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89264ad92d3ffff,"POLYGON ((-89.74671 35.92758, -89.74525 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad92dbffff,"POLYGON ((-89.75034 35.92670, -89.74888 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad9403ffff,"POLYGON ((-89.71177 35.94471, -89.71031 35.946...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad940bffff,"POLYGON ((-89.71541 35.94384, -89.71395 35.945...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824
89264ad940fffff,"POLYGON ((-89.71252 35.94161, -89.71106 35.942...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,1,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824


In [58]:
df_melt = final.assign(names=final.date.str.split(","))

df_melt.head(5)

Unnamed: 0_level_0,geometry,tornado_count,mag_h3_history,inj_h3_history,fat_h3_history,loss_h3_history,closs_h3_history,len_h3_history,wid_h3_history,yr,...,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year,avg_frequency,avg_property_loss,avg_mag_freq,years_since_event,names
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89264ad92d3ffff,"POLYGON ((-89.74671 35.92758, -89.74525 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824,
89264ad92dbffff,"POLYGON ((-89.75034 35.92670, -89.74888 35.928...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824,
89264ad9403ffff,"POLYGON ((-89.71177 35.94471, -89.71031 35.946...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824,
89264ad940bffff,"POLYGON ((-89.71541 35.94384, -89.71395 35.945...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824,
89264ad940fffff,"POLYGON ((-89.71252 35.94161, -89.71106 35.942...",1,4.0,0.0,0.0,0.03,0.0,20.0,200.0,[1997],...,26.236824,0.03,0.03,1,0.03,0.038114,0.03,0.038114,26.236824,


In [33]:
final = df_melt.date.apply(pd.Series) \
    .merge(df_melt, right_index=True, left_index=True)

final.head(5)

Unnamed: 0_level_0,0,1,2,3,4,5,6,geometry,tornado_count,mag_h3_history,...,num_events,sum_years_diff,sum_property_loss,total_loss,total_years,avg_loss_per_year,avg_frequency,avg_property_loss,avg_mag_freq,names
h3_hexagon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89264ad92d3ffff,1997-03-01,,,,,,,"POLYGON ((-89.74671 35.92758, -89.74525 35.928...",1,4.0,...,1,0.0,0.03,0.03,1,0.03,inf,0.03,inf,
89264ad92dbffff,1997-03-01,,,,,,,"POLYGON ((-89.75034 35.92670, -89.74888 35.928...",1,4.0,...,1,0.0,0.03,0.03,1,0.03,inf,0.03,inf,
89264ad9403ffff,1997-03-01,,,,,,,"POLYGON ((-89.71177 35.94471, -89.71031 35.946...",1,4.0,...,1,0.0,0.03,0.03,1,0.03,inf,0.03,inf,
89264ad940bffff,1997-03-01,,,,,,,"POLYGON ((-89.71541 35.94384, -89.71395 35.945...",1,4.0,...,1,0.0,0.03,0.03,1,0.03,inf,0.03,inf,
89264ad940fffff,1997-03-01,,,,,,,"POLYGON ((-89.71252 35.94161, -89.71106 35.942...",1,4.0,...,1,0.0,0.03,0.03,1,0.03,inf,0.03,inf,


In [76]:
def convert_to_string(dates_list):
    return [date.strftime('%Y-%m-%d') for date in dates_list]

final['date'] = final['date'].apply(convert_to_string)


In [79]:
final.columns

Index(['geometry', 'tornado_count', 'mag_h3_history', 'inj_h3_history',
       'fat_h3_history', 'loss_h3_history', 'closs_h3_history',
       'len_h3_history', 'wid_h3_history', 'yr', 'mo', 'date', 'mag_stats',
       'loss_stats', 'closs_stats', 'inj_stats', 'fat_stats', 'len_stats',
       'wid_stats', 'days_diff', 'years_diff', 'num_events', 'sum_years_diff',
       'sum_property_loss', 'total_loss', 'total_years', 'avg_loss_per_year',
       'avg_frequency', 'avg_property_loss', 'avg_mag_freq',
       'years_since_event'],
      dtype='object')

In [81]:
#final_stats = final.drop([0, 1, 'geometry'], axis=1)
#final_geom = final.drop([0], axis=1)

##############################

print("Successfully dissolved...")
final.to_csv(directory / f"h3_hexagons_tornado/{lookup}_final_geom.csv")
print("Wrote to csv...")

Successfully dissolved...
Wrote to csv...
