In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from path import Path

### Import Data

In [2]:
# Read tornadoes csv into DataFrame
file_path = Path('Raw_Data/1950-2020_torn.csv')
tornadoes_df = pd.read_csv(file_path)
tornadoes_df

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,len,wid,ns,sn,sg,f1,f2,f3,f4,fc
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,15.80,10,1,1,1,25,0,0,0,0
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,2.00,880,1,1,1,47,0,0,0,0
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,0.10,10,1,1,1,177,0,0,0,0
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,0.10,10,1,1,1,209,0,0,0,0
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,2.00,37,1,1,1,101,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67499,67500,619522,2020,9,1,9/1/2020,18:10:00,3,TX,48,...,0.01,30,1,1,1,191,0,0,0,0
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,6.05,100,1,1,1,3,0,0,0,0
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,12.02,50,1,1,1,39,0,0,0,0
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,1.12,25,1,1,1,19,0,0,0,0


In [3]:
tornadoes_df.columns

Index(['surr_key', 'om', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'stf',
       'stn', 'f', 'inj', 'fat', 'loss', 'closs', 'slat', 'slon', 'elat',
       'elon', 'len', 'wid', 'ns', 'sn', 'sg', 'f1', 'f2', 'f3', 'f4', 'fc'],
      dtype='object')

In [4]:
tornadoes_df.dtypes

surr_key      int64
om            int64
yr            int64
mo            int64
dy            int64
date         object
time         object
tz            int64
st           object
stf           int64
stn           int64
f             int64
inj           int64
fat           int64
loss        float64
closs       float64
slat        float64
slon        float64
elat        float64
elon        float64
len         float64
wid           int64
ns            int64
sn            int64
sg            int64
f1            int64
f2            int64
f3            int64
f4            int64
fc            int64
dtype: object

In [5]:
# Sort DF by Year then State
tornadoes_df.sort_values(by=['yr','mo','st'])

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,len,wid,ns,sn,sg,f1,f2,f3,f4,fc
10,11,4,1950,1,13,1/13/1950,5:25:00,3,AR,5,...,0.60,17,1,1,1,113,0,0,0,0
12,13,6,1950,1,25,1/25/1950,21:00:00,3,IL,17,...,0.10,100,1,1,1,91,0,0,0,0
16,17,1,1950,1,3,1/3/1950,11:10:00,3,IL,17,...,3.30,100,2,1,2,119,0,0,0,0
17,18,2,1950,1,3,1/3/1950,11:55:00,3,IL,17,...,3.60,130,1,1,1,135,0,0,0,0
11,12,5,1950,1,25,1/25/1950,19:30:00,3,MO,29,...,2.30,300,1,1,1,93,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66437,66438,619601,2020,12,23,12/23/2020,15:54:00,3,TX,48,...,1.23,20,1,1,1,351,0,0,0,0
66438,66439,619602,2020,12,23,12/23/2020,18:59:00,3,TX,48,...,0.39,45,1,1,1,241,0,0,0,0
66459,66460,619623,2020,12,30,12/30/2020,10:41:00,3,TX,48,...,0.98,75,1,1,1,349,0,0,0,0
66457,66458,619621,2020,12,24,12/24/2020,22:21:00,3,VA,51,...,0.30,150,1,1,1,800,0,0,0,0


### Clean the whole dataset

In [6]:
# Merge Date & Time 
tornadoes_clean = tornadoes_df.copy()
tornadoes_clean["Timestamp"] = tornadoes_clean["date"].str.cat(tornadoes_clean["time"], sep=' ')

In [7]:
# convert timestamp to datetime
tornadoes_clean['Timestamp']= pd.to_datetime(tornadoes_clean['Timestamp'])
tornadoes_clean

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67499,67500,619522,2020,9,1,9/1/2020,18:10:00,3,TX,48,...,30,1,1,1,191,0,0,0,0,2020-09-01 18:10:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [8]:
# Filter out any bad data (ie 0<=EF<=5)
tornadoes_clean = tornadoes_clean[ (tornadoes_df['f'] >= 0) & ( tornadoes_df["f"] <= 5)]
tornadoes_clean

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,619521,2020,9,1,9/1/2020,16:24:00,3,TN,47,...,50,1,1,1,95,0,0,0,0,2020-09-01 16:24:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [9]:
# Filter for Lower 48 (exclude PR, VI, HI, AK)
tornadoes_clean = tornadoes_clean[tornadoes_clean['st'] != 'PR']
tornadoes_clean = tornadoes_clean[tornadoes_clean['st'] != 'VI']
tornadoes_clean = tornadoes_clean[tornadoes_clean['st'] != 'HI']
tornadoes_clean = tornadoes_clean[tornadoes_clean['st'] != 'AK']
tornadoes_clean

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,619521,2020,9,1,9/1/2020,16:24:00,3,TN,47,...,50,1,1,1,95,0,0,0,0,2020-09-01 16:24:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [10]:
# Replace loss and closs 0 with NaN ( 0 means no data avaliable NOT $0 in loss)
tornadoes_clean['closs']=tornadoes_clean['closs'].replace(0, np.nan)
tornadoes_clean['loss']=tornadoes_clean['loss'].replace(0, np.nan)
tornadoes_clean

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,619521,2020,9,1,9/1/2020,16:24:00,3,TN,47,...,50,1,1,1,95,0,0,0,0,2020-09-01 16:24:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [11]:
# Starting in 2016 the loss data is in whole dollar amounts need to convert rows with years prior to 2016 to whole dollar
# original loss and closs were in millions of dollars
# Multiply loss and closs by $1,000,000 to get whole number values
loss = tornadoes_clean.loc[tornadoes_clean["yr"]<2016]
loss

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61212,61213,601870,2015,9,26,9/26/2015,14:48:00,3,TX,48,...,50,1,1,1,249,0,0,0,0,2015-09-26 14:48:00
61213,61214,598567,2015,9,29,9/29/2015,8:00:00,3,FL,12,...,10,1,1,1,21,0,0,0,0,2015-09-29 08:00:00
61214,61215,599512,2015,9,29,9/29/2015,20:02:00,3,MD,24,...,75,1,1,1,33,27,0,0,0,2015-09-29 20:02:00
61215,61216,597478,2015,9,29,9/29/2015,22:40:00,3,PA,42,...,75,1,1,1,71,0,0,0,0,2015-09-29 22:40:00


In [12]:
loss["loss"] = loss["loss"]*1000000
loss["closs"] = loss["closs"]*1000000
loss

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61212,61213,601870,2015,9,26,9/26/2015,14:48:00,3,TX,48,...,50,1,1,1,249,0,0,0,0,2015-09-26 14:48:00
61213,61214,598567,2015,9,29,9/29/2015,8:00:00,3,FL,12,...,10,1,1,1,21,0,0,0,0,2015-09-29 08:00:00
61214,61215,599512,2015,9,29,9/29/2015,20:02:00,3,MD,24,...,75,1,1,1,33,27,0,0,0,2015-09-29 20:02:00
61215,61216,597478,2015,9,29,9/29/2015,22:40:00,3,PA,42,...,75,1,1,1,71,0,0,0,0,2015-09-29 22:40:00


In [13]:
# drop all rows in tor_EF between 2008-2015
tornadoes_clean_2016_2020 = tornadoes_clean[tornadoes_clean["yr"] >= 2016]
tornadoes_clean_2016_2020

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
61217,61218,614398,2016,10,12,10/12/2016,16:26:00,3,IN,18,...,30,1,1,1,141,0,0,0,0,2016-10-12 16:26:00
61218,61219,614399,2016,10,12,10/12/2016,16:30:00,3,IN,18,...,40,1,1,1,141,0,0,0,0,2016-10-12 16:30:00
61219,61220,614400,2016,10,14,10/14/2016,9:18:00,3,OR,41,...,225,1,1,1,57,0,0,0,0,2016-10-14 09:18:00
61221,61222,614382,2016,10,4,10/4/2016,16:32:00,3,KS,20,...,40,1,1,1,143,0,0,0,0,2016-10-04 16:32:00
61222,61223,614383,2016,10,4,10/4/2016,17:27:00,3,KS,20,...,75,1,1,1,27,0,0,0,0,2016-10-04 17:27:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,619521,2020,9,1,9/1/2020,16:24:00,3,TN,47,...,50,1,1,1,95,0,0,0,0,2020-09-01 16:24:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [14]:
tornadoes_clean = loss.append(tornadoes_clean_2016_2020)
tornadoes_clean

Unnamed: 0,surr_key,om,yr,mo,dy,date,time,tz,st,stf,...,wid,ns,sn,sg,f1,f2,f3,f4,fc,Timestamp
0,1,192,1950,10,1,10/1/1950,21:00:00,3,OK,40,...,10,1,1,1,25,0,0,0,0,1950-10-01 21:00:00
1,2,193,1950,10,9,10/9/1950,2:15:00,3,NC,37,...,880,1,1,1,47,0,0,0,0,1950-10-09 02:15:00
2,3,195,1950,11,20,11/20/1950,2:20:00,3,KY,21,...,10,1,1,1,177,0,0,0,0,1950-11-20 02:20:00
3,4,196,1950,11,20,11/20/1950,4:00:00,3,KY,21,...,10,1,1,1,209,0,0,0,0,1950-11-20 04:00:00
4,5,197,1950,11,20,11/20/1950,7:30:00,3,MS,28,...,37,1,1,1,101,0,0,0,0,1950-11-20 07:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,619521,2020,9,1,9/1/2020,16:24:00,3,TN,47,...,50,1,1,1,95,0,0,0,0,2020-09-01 16:24:00
67500,67501,619523,2020,9,3,9/3/2020,15:57:00,3,MD,24,...,100,1,1,1,3,0,0,0,0,2020-09-03 15:57:00
67501,67502,619524,2020,9,5,9/5/2020,16:17:00,3,CA,6,...,50,1,1,1,39,0,0,0,0,2020-09-05 16:17:00
67502,67503,619525,2020,9,5,9/5/2020,18:29:00,3,CA,6,...,25,1,1,1,19,0,0,0,0,2020-09-05 18:29:00


In [15]:
tornadoes_clean.columns

Index(['surr_key', 'om', 'yr', 'mo', 'dy', 'date', 'time', 'tz', 'st', 'stf',
       'stn', 'f', 'inj', 'fat', 'loss', 'closs', 'slat', 'slon', 'elat',
       'elon', 'len', 'wid', 'ns', 'sn', 'sg', 'f1', 'f2', 'f3', 'f4', 'fc',
       'Timestamp'],
      dtype='object')

### Filter for years of interest & Export Cleaned Data

In [16]:
# Delete columns not needed
tornadoes_export = tornadoes_clean[['surr_key','yr', 'mo', 'dy','Timestamp','st','stf','f1','slat', 'slon', 'elat','elon',
                                    'f','inj', 'fat', 'loss', 'closs','len', 'wid']]
tornadoes_export

Unnamed: 0,surr_key,yr,mo,dy,Timestamp,st,stf,f1,slat,slon,elat,elon,f,inj,fat,loss,closs,len,wid
0,1,1950,10,1,1950-10-01 21:00:00,OK,40,25,36.7300,-102.5200,36.8800,-102.3000,1,0,0,4000000.0,,15.80,10
1,2,1950,10,9,1950-10-09 02:15:00,NC,37,47,34.1700,-78.6000,0.0000,0.0000,3,3,0,5000000.0,,2.00,880
2,3,1950,11,20,1950-11-20 02:20:00,KY,21,177,37.3700,-87.2000,0.0000,0.0000,2,0,0,5000000.0,,0.10,10
3,4,1950,11,20,1950-11-20 04:00:00,KY,21,209,38.2000,-84.5000,0.0000,0.0000,1,0,0,5000000.0,,0.10,10
4,5,1950,11,20,1950-11-20 07:30:00,MS,28,101,32.4200,-89.1300,0.0000,0.0000,1,3,0,4000000.0,,2.00,37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,2020,9,1,2020-09-01 16:24:00,TN,47,95,36.3290,-89.4981,36.3517,-89.4729,0,0,0,,2000.0,2.10,50
67500,67501,2020,9,3,2020-09-03 15:57:00,MD,24,3,38.9480,-76.5695,38.9233,-76.4615,0,0,0,1250000.0,,6.05,100
67501,67502,2020,9,5,2020-09-05 16:17:00,CA,6,39,37.3500,-119.3400,37.5218,-119.3056,2,0,0,,,12.02,50
67502,67503,2020,9,5,2020-09-05 18:29:00,CA,6,19,37.2500,-119.2000,37.2598,-119.1838,1,0,0,,,1.12,25


In [17]:
# Rename columns
tornadoes_export.rename(columns = {'yr':'Year','mo':'Month','dy':'Day','st':'State','stf':'State_Fips','f1':'County_Fips',
                                   'slat':'Start_Lat','slon':'Start_Lon','elat':'End_Lat','elon':'End_Lon','f':'EF',
                                   'inj':'Injuries','fat':'Fatalities','loss':'Loss','closs':'Crop_Loss','len':'Length','wid':'Width'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [18]:
# Export All Years Cleaned Data 1950-2020
tornadoes_export
tornadoes_export.to_csv("Cleaned_Data/1950-2020_tornadoes_cleaned.csv")

In [19]:
# Filter for years 1950-2007 --> F Scale
tornadoes_F = tornadoes_export[tornadoes_export['Year'] < 2008]
tornadoes_F

Unnamed: 0,surr_key,Year,Month,Day,Timestamp,State,State_Fips,County_Fips,Start_Lat,Start_Lon,End_Lat,End_Lon,EF,Injuries,Fatalities,Loss,Crop_Loss,Length,Width
0,1,1950,10,1,1950-10-01 21:00:00,OK,40,25,36.73,-102.52,36.88,-102.30,1,0,0,4000000.0,,15.80,10
1,2,1950,10,9,1950-10-09 02:15:00,NC,37,47,34.17,-78.60,0.00,0.00,3,3,0,5000000.0,,2.00,880
2,3,1950,11,20,1950-11-20 02:20:00,KY,21,177,37.37,-87.20,0.00,0.00,2,0,0,5000000.0,,0.10,10
3,4,1950,11,20,1950-11-20 04:00:00,KY,21,209,38.20,-84.50,0.00,0.00,1,0,0,5000000.0,,0.10,10
4,5,1950,11,20,1950-11-20 07:30:00,MS,28,101,32.42,-89.13,0.00,0.00,1,3,0,4000000.0,,2.00,37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51187,51188,2007,9,6,2007-09-06 13:45:00,NM,35,17,32.65,-108.50,32.61,-108.46,0,0,0,,,3.05,40
51188,51189,2007,9,6,2007-09-06 16:27:00,NE,31,53,41.69,-96.50,41.73,-96.48,0,0,0,,,3.16,50
51189,51190,2007,9,6,2007-09-06 17:28:00,IL,17,167,39.95,-89.57,39.95,-89.57,0,0,0,,,0.10,10
51190,51191,2007,9,7,2007-09-07 14:43:00,TX,48,69,34.57,-102.04,34.55,-102.03,1,0,0,75000.0,,1.50,75


In [20]:
# Export Cleaned Data to CSV
tornadoes_F.to_csv("Cleaned_Data/1950-2007_tornadoes_cleaned.csv")

In [21]:
# Filter for years 2008-2020 --> EF Scale
tornadoes_EF = tornadoes_export[tornadoes_export['Year'] >= 2008]
tornadoes_EF

Unnamed: 0,surr_key,Year,Month,Day,Timestamp,State,State_Fips,County_Fips,Start_Lat,Start_Lon,End_Lat,End_Lon,EF,Injuries,Fatalities,Loss,Crop_Loss,Length,Width
51192,51193,2008,10,11,2008-10-11 18:33:00,NM,35,49,35.1500,-105.9400,0.0000,0.0000,0,0,0,12000.0,,0.02,10
51193,51194,2008,10,11,2008-10-11 21:54:00,KS,20,181,39.3300,-101.5500,0.0000,0.0000,0,0,0,,,0.25,10
51194,51195,2008,10,24,2008-10-24 16:55:00,GA,13,39,31.0200,-81.8100,0.0000,0.0000,0,0,0,1000.0,,0.25,25
51195,51196,2008,10,6,2008-10-06 13:46:00,AR,5,113,34.5800,-94.1500,34.6300,-94.1200,1,0,0,75000.0,,3.41,250
51196,51197,2008,10,6,2008-10-06 16:05:00,TX,48,67,33.0700,-94.1000,33.0900,-94.0800,0,0,0,75000.0,,1.50,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67498,67499,2020,9,1,2020-09-01 16:24:00,TN,47,95,36.3290,-89.4981,36.3517,-89.4729,0,0,0,,2000.0,2.10,50
67500,67501,2020,9,3,2020-09-03 15:57:00,MD,24,3,38.9480,-76.5695,38.9233,-76.4615,0,0,0,1250000.0,,6.05,100
67501,67502,2020,9,5,2020-09-05 16:17:00,CA,6,39,37.3500,-119.3400,37.5218,-119.3056,2,0,0,,,12.02,50
67502,67503,2020,9,5,2020-09-05 18:29:00,CA,6,19,37.2500,-119.2000,37.2598,-119.1838,1,0,0,,,1.12,25


In [22]:
# Export Cleaned Data to CSV
tornadoes_EF.to_csv("Cleaned_Data/2008-2020_tornadoes_cleaned.csv")