In [739]:
import pandas as pd
import numpy as np
import os
#import re

In [740]:
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [753]:
# Get all files in the Details folder
details_path = "../Data/Details/"
files = [f"{details_path}{filename}" for filename in os.listdir(details_path) if os.path.isfile((os.path.join(details_path, filename)))]

In [754]:
cleaned_path = "../Data/Cleaned/"

#file_path = "../Data/"
#filename_start_details = "StormEvents_details-ftp_v1.0"
#filename_start_fatalities = "StormEvents_fatalities-ftp_v1.0"
#filename_start_locations = "StormEvents_locations-ftp_v1.0"

#filename = "StormEvents_details-ftp_v1.0_d2024_c20250122"
#extension = ".csv"

tornado_id = 1

In [755]:
def process_file(file):
    print(f"Processing file {file}")
    # Read file
    detail_data_raw_pd = pd.read_csv(file)

    # Filter tornadoes
    detail_data_clean_pd = detail_data_raw_pd[detail_data_raw_pd["EVENT_TYPE"] == "Tornado"]

    # Process timestamps
    detail_data_clean_pd.loc[:,"B_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"BEGIN_DATE_TIME"])
    detail_data_clean_pd.loc[:,"E_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"END_DATE_TIME"])
    detail_data_clean_pd.loc[:,"BEGIN_TIMESTAMP"] = detail_data_clean_pd.loc[:,"B_DATE_TIME"].astype("int64") // 10**9
    detail_data_clean_pd.loc[:,"END_TIMESTAMP"] = detail_data_clean_pd.loc[:,"E_DATE_TIME"].astype("int64") // 10**9

    detail_data_clean_pd = detail_data_clean_pd.sort_values("BEGIN_TIMESTAMP")
    detail_data_clean_pd = detail_data_clean_pd.reset_index()

    # Accumulate Deaths & Injuries
    detail_data_clean_pd["DEATHS"] = detail_data_clean_pd["DEATHS_DIRECT"] + detail_data_clean_pd["DEATHS_INDIRECT"]
    detail_data_clean_pd["INJURIES"] = detail_data_clean_pd["INJURIES_DIRECT"] + detail_data_clean_pd["INJURIES_INDIRECT"]

    # CONVERT DAMAGE_PROPERTY TO NUMERIC
    detail_data_clean_pd["DAMAGE_PROPERTY"] = detail_data_clean_pd["DAMAGE_PROPERTY"].astype(str)
    detail_data_clean_pd.loc[:,"DAMAGE_PROPERTY"] = detail_data_clean_pd.loc[:,"DAMAGE_PROPERTY"].fillna("0.00K")
    detail_data_clean_pd["DAMAGE_PROPERTY"] = detail_data_clean_pd["DAMAGE_PROPERTY"].where(detail_data_clean_pd["DAMAGE_PROPERTY"] == 0, "0.00K")

    pattern = r'(\d+[.]*\d*)[KM]'
    detail_data_clean_pd["DMG_PRP"] = detail_data_clean_pd["DAMAGE_PROPERTY"].str.extract(pattern).astype(float)
    pattern = r'\d+[.]*\d*([KM])'
    detail_data_clean_pd["DMG_PRP_MULT_STR"] = detail_data_clean_pd["DAMAGE_PROPERTY"].str.extract(pattern)
    detail_data_clean_pd["DMG_PRP"] = (detail_data_clean_pd["DMG_PRP"] * 
                                        np.where(detail_data_clean_pd["DMG_PRP_MULT_STR"] == "K", 1000, 1))
    detail_data_clean_pd["DMG_PRP"] = (detail_data_clean_pd["DMG_PRP"] * 
                                        np.where(detail_data_clean_pd["DMG_PRP_MULT_STR"] == "M", 1000000, 1))
    
    # CONVERT DAMAGE_CROPS TO NUMERIC
    detail_data_clean_pd["DAMAGE_CROPS"] = detail_data_clean_pd["DAMAGE_CROPS"].astype(str)
    detail_data_clean_pd.loc[:,"DAMAGE_CROPS"] = detail_data_clean_pd.loc[:,"DAMAGE_CROPS"].fillna("0.00K")
    detail_data_clean_pd["DAMAGE_CROPS"] = detail_data_clean_pd["DAMAGE_CROPS"].where(detail_data_clean_pd["DAMAGE_CROPS"] == 0, "0.00K")

    pattern = r'(\d+[.]*\d*)[KM]'
    detail_data_clean_pd["DMG_CRP"] = detail_data_clean_pd["DAMAGE_CROPS"].str.extract(pattern).astype(float)
    pattern = r'\d+[.]*\d*([KM])'
    detail_data_clean_pd["DMG_CRP_MULT_STR"] = detail_data_clean_pd["DAMAGE_CROPS"].str.extract(pattern)
    detail_data_clean_pd["DMG_CRP"] = (detail_data_clean_pd["DMG_CRP"] * 
                                        np.where(detail_data_clean_pd["DMG_CRP_MULT_STR"] == "K", 1000, 1))
    detail_data_clean_pd["DMG_CRP"] = (detail_data_clean_pd["DMG_CRP"] * 
                                        np.where(detail_data_clean_pd["DMG_CRP_MULT_STR"] == "M", 1000000, 1))

    # Trim columns
    detail_data_clean_pd = detail_data_clean_pd[[
       'EVENT_ID', "STATE_FIPS", 'CZ_FIPS', 'WFO',
       'CZ_TIMEZONE', 'BEGIN_TIMESTAMP', 'END_TIMESTAMP',
       'DEATHS', 'INJURIES', 'DMG_PRP', 'DMG_CRP',
       'TOR_F_SCALE', 'TOR_LENGTH', 'TOR_WIDTH',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_WFO',
       'BEGIN_RANGE', 'BEGIN_AZIMUTH', 'BEGIN_LOCATION',
       'END_RANGE', 'END_AZIMUTH', 'END_LOCATION',
       'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EVENT_NARRATIVE']]
    
    # Rename the property and crop damage columns back to their original names
    detail_data_clean_pd = detail_data_clean_pd.rename({"DMG_PRP": "DAMAGE_PROPERTY", "DMG_CRP": "DAMAGE_CROPS"}, axis=1)

    # Fix numeric dtypes
    detail_data_clean_pd["CZ_FIPS"] = detail_data_clean_pd["CZ_FIPS"].astype("int64")
    detail_data_clean_pd["TOR_OTHER_CZ_FIPS"] = detail_data_clean_pd["TOR_OTHER_CZ_FIPS"].astype("Int64")
    detail_data_clean_pd["DAMAGE_PROPERTY"] = detail_data_clean_pd["DAMAGE_PROPERTY"].astype("int64")
    detail_data_clean_pd["DAMAGE_CROPS"] = detail_data_clean_pd["DAMAGE_CROPS"].astype("int64")

    detail_data_clean_pd["TORNADO_ID"] = 0

    return detail_data_clean_pd


In [756]:
df_list = [process_file(file) for file in files]
details_full_df = pd.concat(df_list)

print(details_full_df.info)
details_full_df.to_csv(f"{cleaned_path}/Tornadoes_1950_2024.csv", index=False)

Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1950_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1951_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1952_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1953_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1954_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1955_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1956_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1957_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1958_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1959_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1960_c20210803.csv
Processing file ../Data/Details/StormEvents_details-ftp_v1.0_d1961_c20210803.csv
Processing file ../Data/Deta

In [670]:
detail_data_raw_pd.columns

Index(['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH',
       'END_DAY', 'END_TIME', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_FIPS',
       'YEAR', 'MONTH_NAME', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME',
       'WFO', 'BEGIN_DATE_TIME', 'CZ_TIMEZONE', 'END_DATE_TIME',
       'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
       'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE',
       'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE',
       'TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH',
       'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'DATA_SOURCE'],
      dtype='object')

In [671]:
# Filter to only tornadoes
detail_data_clean_pd = detail_data_raw_pd[detail_data_raw_pd["EVENT_TYPE"] == "Tornado"]
#detail_data_clean_pd = detail_data_clean_pd.sort_values("EVENT_ID")
#detail_data_clean_pd = detail_data_clean_pd.reset_index()
detail_data_clean_pd["EVENT_TYPE"]

78       Tornado
79       Tornado
84       Tornado
91       Tornado
98       Tornado
          ...   
64069    Tornado
64076    Tornado
64099    Tornado
64111    Tornado
64115    Tornado
Name: EVENT_TYPE, Length: 1901, dtype: object

In [672]:
detail_data_clean_pd.loc[:,"B_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"BEGIN_DATE_TIME"])
detail_data_clean_pd.loc[:,"E_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"END_DATE_TIME"])
detail_data_clean_pd.loc[:,"BEGIN_TIMESTAMP"] = detail_data_clean_pd.loc[:,"B_DATE_TIME"].astype("int64") // 10**9
detail_data_clean_pd.loc[:,"END_TIMESTAMP"] = detail_data_clean_pd.loc[:,"E_DATE_TIME"].astype("int64") // 10**9

detail_data_clean_pd = detail_data_clean_pd.sort_values("BEGIN_TIMESTAMP")
detail_data_clean_pd = detail_data_clean_pd.reset_index()

#print(detail_data_clean_pd["BEGIN_TIMESTAMP"])
#print(detail_data_clean_pd["END_TIMESTAMP"])

  detail_data_clean_pd.loc[:,"B_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"BEGIN_DATE_TIME"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  detail_data_clean_pd.loc[:,"B_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"BEGIN_DATE_TIME"])
  detail_data_clean_pd.loc[:,"E_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"END_DATE_TIME"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  detail_data_clean_pd.loc[:,"E_DATE_TIME"] = pd.to_datetime(detail_data_clean_pd.loc[:,"END_DATE_TIME"])
A value is trying to be set on a copy of a sli

In [673]:
detail_data_clean_pd.columns

Index(['index', 'BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH',
       'END_DAY', 'END_TIME', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_FIPS',
       'YEAR', 'MONTH_NAME', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME',
       'WFO', 'BEGIN_DATE_TIME', 'CZ_TIMEZONE', 'END_DATE_TIME',
       'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
       'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE',
       'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE',
       'TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH',
       'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'DATA_SOURCE', 'B_DATE_TIME',
       'E_DATE_TIME', 'BEGIN_TIMESTAMP', 'END_TIMESTAMP'],
      dtype='object')

In [674]:
#detail_data_clean_pd = detail_data_clean_pd.drop(columns=["MONTH_NAME",  "SOURCE",
#                                                          "MAGNITUDE", "MAGNITUDE_TYPE", "FLOOD_CAUSE", "CATEGORY",
#                                                          "DATA_SOURCE"])
#detail_data_clean_pd.columns

In [675]:
#detail_data_clean_pd["BEGIN_YEARMONTH"] = detail_data_clean_pd["BEGIN_YEARMONTH"].astype(str)
#detail_data_clean_pd["BEGIN_YEARMONTH"]

In [676]:
#pattern = r"(\d{4})"
#detail_data_clean_pd["BEGIN_YEAR"] = detail_data_clean_pd["BEGIN_YEARMONTH"].str.extract(pattern)
#detail_data_clean_pd["BEGIN_YEAR"]

In [677]:
#pattern = r"\d{4}(\d{2})"
#detail_data_clean_pd["BEGIN_MONTH"] = detail_data_clean_pd["BEGIN_YEARMONTH"].str.extract(pattern)
#detail_data_clean_pd["BEGIN_MONTH"]

In [678]:
#detail_data_clean_pd["END_YEARMONTH"] = detail_data_clean_pd["END_YEARMONTH"].astype(str)
#detail_data_clean_pd["END_YEARMONTH"]

In [679]:
#pattern = r"(\d{4})"
#detail_data_clean_pd["END_YEAR"] = detail_data_clean_pd["END_YEARMONTH"].str.extract(pattern)
#detail_data_clean_pd["END_YEAR"]

In [680]:
#pattern = r"\d{4}(\d{2})"
#detail_data_clean_pd["END_MONTH"] = detail_data_clean_pd["END_YEARMONTH"].str.extract(pattern)
#detail_data_clean_pd["END_MONTH"]

In [681]:
#detail_data_clean_pd["BEGIN_YEAR"] = detail_data_clean_pd["BEGIN_YEAR"].astype("Int64")
#detail_data_clean_pd["BEGIN_MONTH"] = detail_data_clean_pd["BEGIN_MONTH"].astype("Int64")
#detail_data_clean_pd["END_YEAR"] = detail_data_clean_pd["END_YEAR"].astype("Int64")
#detail_data_clean_pd["END_MONTH"] = detail_data_clean_pd["END_MONTH"].astype("Int64")
#detail_data_clean_pd = detail_data_clean_pd.drop(columns=["BEGIN_YEARMONTH", "END_YEARMONTH"])

In [682]:
#detail_data_clean_pd["BEGIN_LOC"] = detail_data_clean_pd['BEGIN_RANGE'].astype(str) + " miles " + detail_data_clean_pd['BEGIN_AZIMUTH'] + " of " + detail_data_clean_pd['BEGIN_LOCATION'] + ", " + detail_data_clean_pd['STATE']
#detail_data_clean_pd["BEGIN_LOC"]

In [683]:
#detail_data_clean_pd["END_LOC"] = detail_data_clean_pd['END_RANGE'].astype(str) + " miles " + detail_data_clean_pd['BEGIN_AZIMUTH'] + " of " + detail_data_clean_pd['END_LOCATION'] + ", " + detail_data_clean_pd['STATE']
#detail_data_clean_pd["END_LOC"]

In [684]:
detail_data_clean_pd["DEATHS"] = detail_data_clean_pd["DEATHS_DIRECT"] + detail_data_clean_pd["DEATHS_INDIRECT"]

In [685]:
detail_data_clean_pd["INJURIES"] = detail_data_clean_pd["INJURIES_DIRECT"] + detail_data_clean_pd["INJURIES_INDIRECT"]

In [686]:
# CONVERT DAMAGE_PROPERTY TO NUMERIC
detail_data_clean_pd.loc[:,"DAMAGE_PROPERTY"] = detail_data_clean_pd.loc[:,"DAMAGE_PROPERTY"].fillna("0.00K")

pattern = r'(\d+[.]*\d+)[KM]'
detail_data_clean_pd["DMG_PRP"] = detail_data_clean_pd["DAMAGE_PROPERTY"].str.extract(pattern).astype(float)
pattern = r'\d+[.]*\d+([KM])'
detail_data_clean_pd["DMG_PRP_MULT_STR"] = detail_data_clean_pd["DAMAGE_PROPERTY"].str.extract(pattern)
detail_data_clean_pd["DMG_PRP"] = (detail_data_clean_pd["DMG_PRP"] * 
                                   np.where(detail_data_clean_pd["DMG_PRP_MULT_STR"] == "K", 1000, 1))
detail_data_clean_pd["DMG_PRP"] = (detail_data_clean_pd["DMG_PRP"] * 
                                   np.where(detail_data_clean_pd["DMG_PRP_MULT_STR"] == "M", 1000000, 1))

In [687]:
# CONVERT DAMAGE_CROPS TO NUMERIC
detail_data_clean_pd.loc[:,"DAMAGE_CROPS"] = detail_data_clean_pd.loc[:,"DAMAGE_CROPS"].fillna("0.00K")

pattern = r'(\d+[.]*\d+)[KM]'
detail_data_clean_pd["DMG_CRP"] = detail_data_clean_pd["DAMAGE_CROPS"].str.extract(pattern).astype(float)

pattern = r'\d+[.]*\d+([KM])'
detail_data_clean_pd["DMG_CRP_MULT_STR"] = detail_data_clean_pd["DAMAGE_CROPS"].str.extract(pattern)
detail_data_clean_pd["DMG_CRP"] = (detail_data_clean_pd["DMG_CRP"] * 
                                   np.where(detail_data_clean_pd["DMG_CRP_MULT_STR"] == "K", 1000, 1))
detail_data_clean_pd["DMG_CRP"] = (detail_data_clean_pd["DMG_CRP"] * 
                                   np.where(detail_data_clean_pd["DMG_CRP_MULT_STR"] == "M", 1000000, 1))

In [688]:
detail_data_clean_pd = detail_data_clean_pd[[
       'EVENT_ID', "STATE_FIPS", 'CZ_FIPS', 'WFO',
       'CZ_TIMEZONE', 'BEGIN_TIMESTAMP', 'END_TIMESTAMP',
       'DEATHS', 'INJURIES', 'DMG_PRP', 'DMG_CRP',
       'TOR_F_SCALE', 'TOR_LENGTH', 'TOR_WIDTH',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_WFO',
       'BEGIN_RANGE', 'BEGIN_AZIMUTH', 'BEGIN_LOCATION',
       'END_RANGE', 'END_AZIMUTH', 'END_LOCATION',
       'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EVENT_NARRATIVE']]

In [689]:
# Rename the property and crop damage columns back to their original names
detail_data_clean_pd = detail_data_clean_pd.rename({"DMG_PRP": "DAMAGE_PROPERTY", "DMG_CRP": "DAMAGE_CROPS"}, axis=1)

In [690]:
# Fix numeric dtypes
detail_data_clean_pd["CZ_FIPS"] = detail_data_clean_pd["CZ_FIPS"].astype("int64")
detail_data_clean_pd["TOR_OTHER_CZ_FIPS"] = detail_data_clean_pd["TOR_OTHER_CZ_FIPS"].astype("Int64")
detail_data_clean_pd["DAMAGE_PROPERTY"] = detail_data_clean_pd["DAMAGE_PROPERTY"].astype("int64")
detail_data_clean_pd["DAMAGE_CROPS"] = detail_data_clean_pd["DAMAGE_CROPS"].astype("int64")

In [691]:
# Add TORNADO_ID column
detail_data_clean_pd["TORNADO_ID"] = 0

In [692]:
def find_next_tornado_segment(current_index):    
    iterrable_df = detail_data_clean_pd.copy()
    
    iterrable_df = iterrable_df.iloc[current_index:,:]
    current_row = iterrable_df.iloc[0:1,:]

    next_fips = current_row.loc[:,"TOR_OTHER_CZ_FIPS"].values[0]
    next_wfo = current_row.loc[:,"TOR_OTHER_WFO"].values[0]
    next_timestamp = current_row.loc[:,"END_TIMESTAMP"].values[0]
    
    #print(next_fips, next_wfo, next_timestamp)
    found = iterrable_df[(iterrable_df["CZ_FIPS"] == next_fips) &
                          (iterrable_df["WFO"] == next_wfo) &
                          (iterrable_df["BEGIN_TIMESTAMP"] == next_timestamp)]
    
    #found = iterrable_df[(iterrable_df["CZ_FIPS"] == next_fips) &
    #                      (iterrable_df["WFO"] == next_wfo)]
    print(found)
    return found.index[0]
    #print(current_row)
    #for index, row in iterrable_df.iterrows():
    #    if (current_row["TOR_OTHER_CZ_FIPS"] == row["CZ_FIPS"]):
    #        print("found FIPS match!")

    

In [693]:
def identify_segments():
    segments_df = detail_data_clean_pd.copy()
    segments_df = segments_df[segments_df["TOR_OTHER_CZ_FIPS"].notna()]
    return segments_df.index

In [694]:
def idenfity_tornadoes():
    segment_indices = identify_segments()
    #print(segment_indices)
    global tornado_id
    work_df = detail_data_clean_pd.copy()
    #global detail_data_clean_pd

    for index, row in work_df.iterrows():
        if row["TORNADO_ID"] == 0:
            this_row = work_df.iloc[index:index+1,:]
            work_df.loc[index:index+1,"TORNADO_ID"] = tornado_id
            #print(this_row["TOR_OTHER_CZ_FIPS"])
            is_segment = this_row["TOR_OTHER_CZ_FIPS"].notna()
            if is_segment.values[0]:
                #print(is_segment.values[0])
                print(f"Index: {index}")
                next_segment_index = find_next_tornado_segment(index)
                print(f"   Next segment: {next_segment_index}")
                #print(f"Next: {find_next_tornado_segment(index)}")

            tornado_id = tornado_id + 1

    #tornado_id = tornado_id_temp
    #print(tornado_id)
    return work_df


In [696]:
#detail_data_clean_pd = idenfity_tornadoes()

In [488]:
detail_data_clean_pd.dtypes

EVENT_ID               int64
STATE_FIPS             int64
CZ_FIPS                int64
WFO                   object
CZ_TIMEZONE           object
BEGIN_TIMESTAMP        int64
END_TIMESTAMP          int64
DEATHS                 int64
INJURIES               int64
DAMAGE_PROPERTY        int64
DAMAGE_CROPS           int64
TOR_F_SCALE           object
TOR_LENGTH           float64
TOR_WIDTH            float64
TOR_OTHER_CZ_FIPS      Int64
TOR_OTHER_WFO         object
BEGIN_RANGE          float64
BEGIN_AZIMUTH         object
BEGIN_LOCATION        object
END_RANGE            float64
END_AZIMUTH           object
END_LOCATION          object
BEGIN_LAT            float64
BEGIN_LON            float64
END_LAT              float64
END_LON              float64
EVENT_NARRATIVE       object
TORNADO_ID             int64
dtype: object

In [489]:
detail_data_clean_pd.head()

Unnamed: 0,EVENT_ID,STATE_FIPS,CZ_FIPS,WFO,CZ_TIMEZONE,BEGIN_TIMESTAMP,END_TIMESTAMP,DEATHS,INJURIES,DAMAGE_PROPERTY,...,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EVENT_NARRATIVE,TORNADO_ID
0,1147213,45,63,CAE,EST-5,1704809400,1704809760,0,1,0,...,SYRUP,1.0,SSE,SYRUP,34.0518,-81.3548,34.0923,-81.3252,A National Weather Service storm survey team c...,0
1,1147216,45,9,CAE,EST-5,1704811560,1704811680,0,0,1800000,...,BAMBERG,0.0,NW,BAMBERG,33.2729,-81.0435,33.3027,-81.0331,A National Weather storm survey team confirmed...,0
2,1149117,12,11,MFL,EST-5,1704563220,1704563400,0,0,150000,...,PORT EVERGLADES,2.0,ESE,FT LAUDERDALE,26.1076,-80.1297,26.1199,-80.1086,A tornado affected the downtown Fort Lauderdal...,0
3,1149494,45,81,CAE,EST-5,1706386680,1706386920,0,0,0,...,MERCHANT,3.0,S,PERRYS XRDS,34.06,-81.67,34.0821,-81.6607,A National Weather Service storm survey team c...,0
4,1149609,45,79,CAE,EST-5,1706370480,1706370600,0,0,0,...,PONTIAC,3.0,N,PONTIAC,34.1614,-80.859,34.169,-80.8464,A National Weather Service storm survey team c...,0


In [490]:
#detail_data_clean_pd.to_csv(f"{file_path}Cleaned/{filename}_CLN{extension}", index=False)