This file builds off of the code originally put together in the 2022_Tornado.ipynb file from Version 1 of the build.

The CSV file used in this code comes from the National Oceanic and Atmospheric Administration, and was downloaded here:
https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/

In [1]:
import pandas as pd
import datetime
import pprint

In [2]:
# Read in the 2022 Storm Data
allstorms_df = pd.read_csv("../../Resources/StormEvents_details-ftp_v1.0_d2022_c20240216.csv")
allstorms_df.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,202202,20,2118,202202,20,2218,165464,999902,NEVADA,32,...,,,,,,,,Strong winds increased ahead of an approaching...,"Station (UP994) 3.1 SE West Wendover, Elevatio...",CSV
1,202202,21,800,202202,22,1000,165465,999903,NEVADA,32,...,,,,,,,,A low centered over northern and central Nevad...,Thirteen inches fell at station (BCSN2) Big Cr...,CSV
2,202202,22,200,202202,22,900,165465,999904,NEVADA,32,...,,,,,,,,A low centered over northern and central Nevad...,Fifteen inches fell at station (TJMN2) Toe Jam...,CSV
3,202202,18,1609,202202,18,1609,165611,1001181,ATLANTIC SOUTH,87,...,7.0,SE,PONTE VEDRA,30.05,-81.17,30.05,-81.17,Pre-frontal showers and thunderstorms moved so...,A brief waterspout was observed offshore of So...,CSV
4,202202,2,0,202202,3,0,165668,1001527,AMERICAN SAMOA,97,...,5.0,NNW,VAITOGI,-14.333,-170.7157,-14.3393,-170.7268,A surface trough over the Islands held the po...,"Over a 24-hour period, WSO Pago Pago recorded ...",CSV


In [3]:
# Listing columns we want to drop
columns_drop = ["BEGIN_YEARMONTH", "BEGIN_DAY","BEGIN_TIME", "END_YEARMONTH", "END_DAY",
                "END_TIME", "EPISODE_ID", "EVENT_ID", "STATE_FIPS", 
                "END_RANGE", "END_AZIMUTH", "EPISODE_NARRATIVE", "EVENT_NARRATIVE",
                "DATA_SOURCE", "CZ_TYPE", "CZ_FIPS", "CZ_NAME", "TOR_OTHER_CZ_FIPS", 
                "TOR_OTHER_CZ_NAME", "BEGIN_RANGE", "BEGIN_AZIMUTH", "BEGIN_LOCATION",
                "WFO", "CZ_TIMEZONE", "TOR_OTHER_WFO", "TOR_OTHER_CZ_STATE",
                "END_LOCATION", "SOURCE", "INJURIES_INDIRECT", "DEATHS_INDIRECT", 
                "DAMAGE_CROPS", "MAGNITUDE", "MAGNITUDE_TYPE", "FLOOD_CAUSE", "CATEGORY",
                "END_DATE_TIME", "MONTH_NAME", "TOR_LENGTH"]

#Dropping the columns
allstorms_df.drop(columns=columns_drop, inplace=True)

#Display DataFrame
allstorms_df.head()

Unnamed: 0,STATE,YEAR,EVENT_TYPE,BEGIN_DATE_TIME,INJURIES_DIRECT,DEATHS_DIRECT,DAMAGE_PROPERTY,TOR_F_SCALE,TOR_WIDTH,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
0,NEVADA,2022,High Wind,2/20/22 21:18,0,0,0.00K,,,,,,
1,NEVADA,2022,Heavy Snow,2/21/22 8:00,0,0,0.00K,,,,,,
2,NEVADA,2022,Heavy Snow,2/22/22 2:00,0,0,0.00K,,,,,,
3,ATLANTIC SOUTH,2022,Waterspout,2/18/22 16:09,0,0,0.00K,,,30.05,-81.17,30.05,-81.17
4,AMERICAN SAMOA,2022,Heavy Rain,2/2/22 0:00,0,0,50.00K,,,-14.333,-170.7157,-14.3393,-170.7268


In [4]:
# Filter out rows for only Tornado
tornado_df = allstorms_df[allstorms_df["EVENT_TYPE"] == "Tornado"]

# Display DataFrame
tornado_df.head()

Unnamed: 0,STATE,YEAR,EVENT_TYPE,BEGIN_DATE_TIME,INJURIES_DIRECT,DEATHS_DIRECT,DAMAGE_PROPERTY,TOR_F_SCALE,TOR_WIDTH,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
43,LOUISIANA,2022,Tornado,3/30/22 13:03,0,0,50.00K,EF0,25.0,32.3733,-91.7256,32.3934,-91.7065
44,LOUISIANA,2022,Tornado,3/30/22 13:10,0,0,85.00K,EF1,350.0,32.1463,-91.6868,32.2071,-91.5956
45,MISSISSIPPI,2022,Tornado,3/30/22 19:01,0,0,35.00K,EF0,125.0,32.186,-88.8433,32.208,-88.8179
48,MISSISSIPPI,2022,Tornado,3/30/22 16:36,0,0,150.00K,EF1,700.0,32.931,-89.512,32.9881,-89.4407
49,MISSISSIPPI,2022,Tornado,3/30/22 15:50,0,0,30.00K,EF0,100.0,32.341,-90.117,32.3517,-90.062


In [5]:
# Changing the names of each column and arranging them
column_names = {
    "YEAR": "Year",
    "BEGIN_DATE_TIME": "Date",
    "STATE": "State",
    "TOR_F_SCALE": "Rating",
    "INJURIES_DIRECT": "Injuries",
    "DEATHS_DIRECT": "Death",
    "BEGIN_LAT": "Start_Lat",
    "BEGIN_LON": "Start_Lon",
    "END_LAT": "End_Lat",
    "END_LON": "End_Lon",
    "TOR_WIDTH": "Width" 
}

# Reorder columns and rename them
tornado_df = tornado_df[column_names.keys()].rename(columns=column_names)

# Display DataFrame
tornado_df.head()

Unnamed: 0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
43,2022,3/30/22 13:03,LOUISIANA,EF0,0,0,32.3733,-91.7256,32.3934,-91.7065,25.0
44,2022,3/30/22 13:10,LOUISIANA,EF1,0,0,32.1463,-91.6868,32.2071,-91.5956,350.0
45,2022,3/30/22 19:01,MISSISSIPPI,EF0,0,0,32.186,-88.8433,32.208,-88.8179,125.0
48,2022,3/30/22 16:36,MISSISSIPPI,EF1,0,0,32.931,-89.512,32.9881,-89.4407,700.0
49,2022,3/30/22 15:50,MISSISSIPPI,EF0,0,0,32.341,-90.117,32.3517,-90.062,100.0


In [6]:
#Changing Date into datetime format
tornado_df["Date"] = pd.to_datetime(tornado_df["Date"], format="%m/%d/%y %H:%M")

# Extract only the date part
tornado_df["Date"] = tornado_df["Date"].dt.date

#Changing Date into datetime format again
tornado_df["Date"] = pd.to_datetime(tornado_df["Date"])

# Convert Width columns to integer
tornado_df["Width"] = \
tornado_df["Width"].astype("int")

tornado_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1379 entries, 43 to 69873
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Year       1379 non-null   int64         
 1   Date       1379 non-null   datetime64[ns]
 2   State      1379 non-null   object        
 3   Rating     1379 non-null   object        
 4   Injuries   1379 non-null   int64         
 5   Death      1379 non-null   int64         
 6   Start_Lat  1379 non-null   float64       
 7   Start_Lon  1379 non-null   float64       
 8   End_Lat    1379 non-null   float64       
 9   End_Lon    1379 non-null   float64       
 10  Width      1379 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(4), object(2)
memory usage: 129.3+ KB


In [7]:
# Check States in the Tornado Dataframe
states = tornado_df["State"].unique()
print(sorted(states))

['ALABAMA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA', 'COLORADO', 'CONNECTICUT', 'FLORIDA', 'GEORGIA', 'IDAHO', 'ILLINOIS', 'INDIANA', 'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MARYLAND', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI', 'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEW HAMPSHIRE', 'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NORTH CAROLINA', 'NORTH DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA', 'PUERTO RICO', 'SOUTH CAROLINA', 'SOUTH DAKOTA', 'TENNESSEE', 'TEXAS', 'UTAH', 'VERMONT', 'VIRGINIA', 'WASHINGTON', 'WEST VIRGINIA', 'WISCONSIN', 'WYOMING']


In [8]:
# Changing the state names to their abbreviations
state_abbr = { 'ALABAMA':'AL', 'ARIZONA':'AZ', 'ARKANSAS':'AR', 
              'CALIFORNIA':'CA', 'COLORADO':'CO', 'CONNECTICUT':'CT', 
              'FLORIDA':'FL', 'GEORGIA':'GA', 'IDAHO':'ID', 
              'ILLINOIS':'IL', 'INDIANA':'IN', 'IOWA':'IA', 
              'KANSAS':'KS', 'KENTUCKY':'KY', 'LOUISIANA':'LA', 
              'MARYLAND':'MD', 'MICHIGAN':'MI', 'MINNESOTA':'MN', 
              'MISSISSIPPI':'MS', 'MISSOURI':'MO', 'MONTANA':'MT', 
              'NEBRASKA':'NE', 'NEW HAMPSHIRE':'NH', 'NEW JERSEY':'NJ', 
              'NEW MEXICO':'NM', 'NEW YORK':'NY', 'NORTH CAROLINA':'NC', 
              'NORTH DAKOTA':'ND', 'OHIO':'OH', 'OKLAHOMA':'OK', 
              'OREGON':'OR', 'PENNSYLVANIA':'PA', 'PUERTO RICO':'PR', 
              'SOUTH CAROLINA':'SC', 'SOUTH DAKOTA':'SD', 'TENNESSEE':'TN', 
              'TEXAS':'TX', 'UTAH':'UT', 'VERMONT':'VT', 
              'VIRGINIA':'VA', 'WASHINGTON':'WA', 'WEST VIRGINIA':'WV', 
              'WISCONSIN':'WI', 'WYOMING':'WY'    
}

# Replaceing state names with the abbreviations
tornado_df["State"] = tornado_df["State"].str.upper().map(state_abbr)

# Display DataFrame
tornado_df.head()

Unnamed: 0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
43,2022,2022-03-30,LA,EF0,0,0,32.3733,-91.7256,32.3934,-91.7065,25
44,2022,2022-03-30,LA,EF1,0,0,32.1463,-91.6868,32.2071,-91.5956,350
45,2022,2022-03-30,MS,EF0,0,0,32.186,-88.8433,32.208,-88.8179,125
48,2022,2022-03-30,MS,EF1,0,0,32.931,-89.512,32.9881,-89.4407,700
49,2022,2022-03-30,MS,EF0,0,0,32.341,-90.117,32.3517,-90.062,100


In [9]:
tornado_df = tornado_df.reset_index(drop=True)
tornado_df.index.name = "index_name"
tornado_df.head()

Unnamed: 0_level_0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2022,2022-03-30,LA,EF0,0,0,32.3733,-91.7256,32.3934,-91.7065,25
1,2022,2022-03-30,LA,EF1,0,0,32.1463,-91.6868,32.2071,-91.5956,350
2,2022,2022-03-30,MS,EF0,0,0,32.186,-88.8433,32.208,-88.8179,125
3,2022,2022-03-30,MS,EF1,0,0,32.931,-89.512,32.9881,-89.4407,700
4,2022,2022-03-30,MS,EF0,0,0,32.341,-90.117,32.3517,-90.062,100


In [10]:
#Removing EF from dataframe for later merging with 1950-2021 Dataset
tornado_df["Rating"] = tornado_df["Rating"].str.replace("EF", "")

# Display DataFrame
tornado_df.head()

Unnamed: 0_level_0,Year,Date,State,Rating,Injuries,Death,Start_Lat,Start_Lon,End_Lat,End_Lon,Width
index_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2022,2022-03-30,LA,0,0,0,32.3733,-91.7256,32.3934,-91.7065,25
1,2022,2022-03-30,LA,1,0,0,32.1463,-91.6868,32.2071,-91.5956,350
2,2022,2022-03-30,MS,0,0,0,32.186,-88.8433,32.208,-88.8179,125
3,2022,2022-03-30,MS,1,0,0,32.931,-89.512,32.9881,-89.4407,700
4,2022,2022-03-30,MS,0,0,0,32.341,-90.117,32.3517,-90.062,100


In [11]:
print(tornado_df["Rating"].unique())

['0' '1' '3' '2' '4' 'U']


In [12]:
# Write the data to a CSV
tornado_df.to_csv("../../Resources/2022_tornado_all.csv")