This file builds off of the code originally put together in the 2022_Tornado_All.ipynb file from Version 2 of the build.

The CSV file used in this code comes from the National Oceanic and Atmospheric Administration, and was downloaded here:
https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/

In [1]:
import pandas as pd
import datetime
import pprint

In [2]:
# Read in the 2022 Storm Data
allstorms_df = pd.read_csv("../../Resources/StormEvents_details-ftp_v1.0_d2023_c20240317.csv")
allstorms_df.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,202310,25,230,202310,27,551,186682,1145781,NORTH DAKOTA,38,...,,,,,,,,"In late October, a winter storm dumped heavy s...",Public reports 7.5 inches at Black Tiger Bay C...,CSV
1,202310,25,230,202310,27,1437,186682,1145783,NORTH DAKOTA,38,...,,,,,,,,"In late October, a winter storm dumped heavy s...",Local Police Department relays storm total sno...,CSV
2,202310,25,230,202310,27,1126,186682,1145784,NORTH DAKOTA,38,...,,,,,,,,"In late October, a winter storm dumped heavy s...",Public reports 10 inches of storm total snowfa...,CSV
3,202310,25,230,202310,27,1301,186682,1145796,NORTH DAKOTA,38,...,,,,,,,,"In late October, a winter storm dumped heavy s...",Emergency Manager reports 6 inches of storm to...,CSV
4,202310,25,230,202310,27,600,186682,1145884,NORTH DAKOTA,38,...,,,,,,,,"In late October, a winter storm dumped heavy s...",CoCoRaHS Station ND-GF-23 reports 8.8 inches o...,CSV


In [3]:
# Listing columns we want to drop
columns_drop = ["BEGIN_YEARMONTH", "BEGIN_DAY","BEGIN_TIME", "END_YEARMONTH", "END_DAY",
                "END_TIME", "EPISODE_ID", "EVENT_ID", "STATE_FIPS", 
                "END_RANGE", "END_AZIMUTH", "EPISODE_NARRATIVE", "EVENT_NARRATIVE",
                "DATA_SOURCE", "CZ_TYPE", "CZ_FIPS", "CZ_NAME", "TOR_OTHER_CZ_FIPS", 
                "TOR_OTHER_CZ_NAME", "BEGIN_RANGE", "BEGIN_AZIMUTH", "BEGIN_LOCATION",
                "WFO", "CZ_TIMEZONE", "TOR_OTHER_WFO", "TOR_OTHER_CZ_STATE",
                "END_LOCATION", "SOURCE", "INJURIES_INDIRECT", "DEATHS_INDIRECT", 
                "DAMAGE_CROPS", "MAGNITUDE", "MAGNITUDE_TYPE", "FLOOD_CAUSE", "CATEGORY",
                "END_DATE_TIME", "MONTH_NAME", "TOR_LENGTH"]

#Dropping the columns
allstorms_df.drop(columns=columns_drop, inplace=True)

#Display DataFrame
allstorms_df.head()

Unnamed: 0,STATE,YEAR,EVENT_TYPE,BEGIN_DATE_TIME,INJURIES_DIRECT,DEATHS_DIRECT,DAMAGE_PROPERTY,TOR_F_SCALE,TOR_WIDTH,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
0,NORTH DAKOTA,2023,Heavy Snow,10/25/23 2:30,0,0,,,,,,,
1,NORTH DAKOTA,2023,Heavy Snow,10/25/23 2:30,0,0,,,,,,,
2,NORTH DAKOTA,2023,Heavy Snow,10/25/23 2:30,0,0,,,,,,,
3,NORTH DAKOTA,2023,Heavy Snow,10/25/23 2:30,0,0,,,,,,,
4,NORTH DAKOTA,2023,Heavy Snow,10/25/23 2:30,0,0,,,,,,,


In [4]:
# Filter out rows for only Tornado
tornado_df = allstorms_df[allstorms_df["EVENT_TYPE"] == "Tornado"]

# Display DataFrame
tornado_df.head()

Unnamed: 0,STATE,YEAR,EVENT_TYPE,BEGIN_DATE_TIME,INJURIES_DIRECT,DEATHS_DIRECT,DAMAGE_PROPERTY,TOR_F_SCALE,TOR_WIDTH,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
28,ILLINOIS,2023,Tornado,8/5/23 16:30,0,0,0.00K,EFU,20.0,39.02,-87.97,39.0214,-87.9689
61,ALABAMA,2023,Tornado,1/12/23 11:10,0,0,0.00K,EF0,360.0,32.723,-87.474,32.7427,-87.4061
64,ALABAMA,2023,Tornado,1/3/23 13:40,0,0,0.00K,EF0,150.0,32.993,-86.515,33.0059,-86.4899
72,OKLAHOMA,2023,Tornado,6/17/23 20:24,0,0,0.00K,EFU,30.0,36.914,-98.422,36.922,-98.398
87,ILLINOIS,2023,Tornado,1/3/23 16:33,0,0,0.00K,EF0,25.0,40.0109,-89.0144,40.0136,-89.0096


In [5]:
# Changing the names of each column and arranging them
column_names = {
    "YEAR": "Year",
    "BEGIN_DATE_TIME": "Date",
    "STATE": "State",
    "TOR_F_SCALE": "Rating",
    "INJURIES_DIRECT": "Injuries",
    "DEATHS_DIRECT": "Death",
    "BEGIN_LAT": "Start_Lat",
    "BEGIN_LON": "Start_Lon",
    "END_LAT": "End_Lat",
    "END_LON": "End_Lon",
    "TOR_WIDTH": "Width" 
}

# Reorder columns and rename them
tornado_df = tornado_df[column_names.keys()].rename(columns=column_names)

# Display DataFrame
tornado_df.head()

Unnamed: 0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
28,2023,8/5/23 16:30,ILLINOIS,EFU,0,0,39.02,-87.97,39.0214,-87.9689,20.0
61,2023,1/12/23 11:10,ALABAMA,EF0,0,0,32.723,-87.474,32.7427,-87.4061,360.0
64,2023,1/3/23 13:40,ALABAMA,EF0,0,0,32.993,-86.515,33.0059,-86.4899,150.0
72,2023,6/17/23 20:24,OKLAHOMA,EFU,0,0,36.914,-98.422,36.922,-98.398,30.0
87,2023,1/3/23 16:33,ILLINOIS,EF0,0,0,40.0109,-89.0144,40.0136,-89.0096,25.0


In [6]:
#Changing Date into datetime format
tornado_df["Date"] = pd.to_datetime(tornado_df["Date"], format="%m/%d/%y %H:%M")

# Extract only the date part
tornado_df["Date"] = tornado_df["Date"].dt.date

#Changing Date into datetime format again
tornado_df["Date"] = pd.to_datetime(tornado_df["Date"])

# Convert Width columns to integer
tornado_df["Width"] = \
tornado_df["Width"].astype("int")

tornado_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1491 entries, 28 to 74907
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Year       1491 non-null   int64         
 1   Date       1491 non-null   datetime64[ns]
 2   State      1491 non-null   object        
 3   Rating     1491 non-null   object        
 4   Injuries   1491 non-null   int64         
 5   Death      1491 non-null   int64         
 6   Start_Lat  1491 non-null   float64       
 7   Start_Lon  1491 non-null   float64       
 8   End_Lat    1491 non-null   float64       
 9   End_Lon    1491 non-null   float64       
 10  Width      1491 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(4), object(2)
memory usage: 139.8+ KB


In [7]:
# Check States in the Tornado Dataframe
states = tornado_df["State"].unique()
print(sorted(states))

['ALABAMA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA', 'COLORADO', 'CONNECTICUT', 'DELAWARE', 'FLORIDA', 'GEORGIA', 'IDAHO', 'ILLINOIS', 'INDIANA', 'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MARYLAND', 'MASSACHUSETTS', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI', 'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEVADA', 'NEW HAMPSHIRE', 'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NORTH CAROLINA', 'NORTH DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA', 'PUERTO RICO', 'RHODE ISLAND', 'SOUTH CAROLINA', 'SOUTH DAKOTA', 'TENNESSEE', 'TEXAS', 'VERMONT', 'VIRGINIA', 'WASHINGTON', 'WEST VIRGINIA', 'WISCONSIN', 'WYOMING']


In [8]:
# Changing the state names to their abbreviations
state_abbr = { 'ALABAMA':'AL', 'ARIZONA':'AZ', 'ARKANSAS':'AR', 
              'CALIFORNIA':'CA', 'COLORADO':'CO', 'CONNECTICUT':'CT', 'DELAWARE':'DE',
              'FLORIDA':'FL', 'GEORGIA':'GA', 'IDAHO':'ID', 
              'ILLINOIS':'IL', 'INDIANA':'IN', 'IOWA':'IA', 
              'KANSAS':'KS', 'KENTUCKY':'KY', 'LOUISIANA':'LA', 'MARYLAND':'MD',
              'MASSACHUSETTS':'MA', 'MICHIGAN':'MI', 'MINNESOTA':'MN', 
              'MISSISSIPPI':'MS', 'MISSOURI':'MO', 'MONTANA':'MT', 
              'NEBRASKA':'NE', 'NEVADA':'NV', 'NEW HAMPSHIRE':'NH', 'NEW JERSEY':'NJ', 
              'NEW MEXICO':'NM', 'NEW YORK':'NY', 'NORTH CAROLINA':'NC', 
              'NORTH DAKOTA':'ND', 'OHIO':'OH', 'OKLAHOMA':'OK', 
              'OREGON':'OR', 'PENNSYLVANIA':'PA', 'PUERTO RICO':'PR', 'RHODE ISLAND':'RI',
              'SOUTH CAROLINA':'SC', 'SOUTH DAKOTA':'SD', 'TENNESSEE':'TN', 
              'TEXAS':'TX', 'VERMONT':'VT', 
              'VIRGINIA':'VA', 'WASHINGTON':'WA', 'WEST VIRGINIA':'WV', 
              'WISCONSIN':'WI', 'WYOMING':'WY'    
}

# Replaceing state names with the abbreviations
tornado_df["State"] = tornado_df["State"].str.upper().map(state_abbr)

# Display DataFrame
tornado_df.head()

Unnamed: 0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
28,2023,2023-08-05,IL,EFU,0,0,39.02,-87.97,39.0214,-87.9689,20
61,2023,2023-01-12,AL,EF0,0,0,32.723,-87.474,32.7427,-87.4061,360
64,2023,2023-01-03,AL,EF0,0,0,32.993,-86.515,33.0059,-86.4899,150
72,2023,2023-06-17,OK,EFU,0,0,36.914,-98.422,36.922,-98.398,30
87,2023,2023-01-03,IL,EF0,0,0,40.0109,-89.0144,40.0136,-89.0096,25


In [9]:
tornado_df = tornado_df.reset_index(drop=True)
tornado_df.index.name = "index_name"
tornado_df.head()

Unnamed: 0_level_0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2023,2023-08-05,IL,EFU,0,0,39.02,-87.97,39.0214,-87.9689,20
1,2023,2023-01-12,AL,EF0,0,0,32.723,-87.474,32.7427,-87.4061,360
2,2023,2023-01-03,AL,EF0,0,0,32.993,-86.515,33.0059,-86.4899,150
3,2023,2023-06-17,OK,EFU,0,0,36.914,-98.422,36.922,-98.398,30
4,2023,2023-01-03,IL,EF0,0,0,40.0109,-89.0144,40.0136,-89.0096,25


In [10]:
#Removing EF from dataframe for later merging with 1950-2021 Dataset
tornado_df["Rating"] = tornado_df["Rating"].str.replace("EF", "")

# Display DataFrame
tornado_df.head()

Unnamed: 0_level_0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2023,2023-08-05,IL,U,0,0,39.02,-87.97,39.0214,-87.9689,20
1,2023,2023-01-12,AL,0,0,0,32.723,-87.474,32.7427,-87.4061,360
2,2023,2023-01-03,AL,0,0,0,32.993,-86.515,33.0059,-86.4899,150
3,2023,2023-06-17,OK,U,0,0,36.914,-98.422,36.922,-98.398,30
4,2023,2023-01-03,IL,0,0,0,40.0109,-89.0144,40.0136,-89.0096,25


In [11]:
#Check tornado ratings
print(tornado_df["Rating"].unique())

['U' '0' '1' '2' '3' '4']


In [12]:
# Write the data to a CSV
tornado_df.to_csv("../../Resources/2023_tornado_all.csv")