## Severe Weather Data Cleanup
#### CSVs come from: https://www.ncdc.noaa.gov/data-access

* 210 files total | 3 types
* Storm Event Details
* Storm Event Locations
* Storm Event Fatalities
* Dates: 1950-2019(Jan)

In [1]:
# Import Dependencies
import glob
import os
import pandas as pd
import numpy as np

# Merging & Cleaning | StormEventsDetails CSVs

In [2]:
# Reading StormEventsLocations CSVs in using glob

path = '../Resources/StormData/StormEventsDetails'

dfiles = glob.glob(os.path.join(path, '*.csv'))

ddata = []
for dfile in dfiles:
    record = pd.read_csv(dfile)
    ddata.append(record)

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# Concat CSVs into single DataFrame & check for duplicate columns
dfulldata = pd.concat(ddata, ignore_index=True)
dfulldata.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,196007,11,2141,196007,11,2141,,10071755,NEBRASKA,31.0,...,0.0,,,41.0,-98.28,,,,,PUB
1,196006,15,2030,196006,15,2030,,10071700,NEBRASKA,31.0,...,0.0,,,40.58,-98.4,,,,,PUB
2,196007,22,1525,196007,22,1525,,10071180,MONTANA,30.0,...,0.0,,,46.6,-112.0,,,,,PUB
3,196003,29,1900,196003,29,1900,,10018150,IOWA,19.0,...,0.0,,,40.78,-91.08,,,,,PUB
4,196005,5,1550,196005,5,1550,,10093963,OKLAHOMA,40.0,...,0.0,,,35.12,-97.05,35.25,-96.97,,,PUB


In [4]:
# Drop low numbers and temperature-related events
to_drop = ['TORNADOES, TSTM WIND, HAIL', 'HAIL/ICY ROADS', 'THUNDERSTORM WINDS/FLOODING', 'THUNDERSTORM WINDS/ FLOOD', 
          'THUNDERSTORM WINDS HEAVY RAIN', 'THUNDERSTORM WINDS/HEAVY RAIN', 'TORNADO/WATERSPOUT', 'THUNDERSTORM WINDS FUNNEL CLOU', 
          'THUNDERSTORM WIND/ TREE', 'THUNDERSTORM WIND/ TREES', 'THUNDERSTORM WINDS LIGHTNING', 'THUNDERSTORM WINDS/FLASH FLOOD', 
          'HAIL FLOODING', 'OTHER', 'Marine Tropical Depression', 'Marine Lightning', 'Northern Lights', 'Heavy Wind', 'High Snow', 
          'Marine Dense Fog', 'Extreme Cold/Wind Chill', 'Excessive Heat', 'Heat', 'Cold/Wind Chill']

dfulldata = dfulldata[~dfulldata['EVENT_TYPE'].isin(to_drop)]

In [5]:
# Combine Volcanic Ash & Volcanic Ashfall as one EVENT TYPE
dfulldata["EVENT_TYPE"] = dfulldata["EVENT_TYPE"].replace(
{"Volcanic Ash": "Volcanic Ashfall"})

In [6]:
# Create a new column for WX_CATEGORIES using binning on the EVENT TYPE column
thunderstormWind = ["Thunderstorm Wind", "High Wind", "Marine Thunderstorm Wind", "Strong Wind", "Lightning", "Waterspout", "Marine High Wind", "Marine Strong Wind"]
tornado = ["Tornado", "Funnel Cloud"]
hurricaneTropical = ["Tropical Storm", "Hurricane (Typhoon)", "Tropical Depression", "Hurricane", "Marine Tropical Storm", "Marine Hurricane/Typhoon"]
hail = ["Hail", "Marine Hail"]
droughtFire = ["Drought", "Wildfire", "Dust Storm", "Debris Flow", "Dust Devil", "Dense Smoke", "Volcanic Ashfall"]
flooding = ["Flash Flood","Flood", "Heavy Rain", "Coastal Flood", "Landslide", "Lakeshore Flood"]
ocean = ["High Surf", "Storm Surge/Tide", "Rip Current", "Astronomical Low Tide", "Seiche", "Tsunami", "Sneakerwave"]
winter = ["Winter Storm", "Heavy Snow", "Winter Weather", "Blizzard", "Frost/Freeze", "Ice Storm", "Lake-Effect Snow", "Sleet", "Avalanche"]
fog = ["Dense Fog", "Freezing Fog"]

wxevents = dfulldata["EVENT_TYPE"]
categories = []

for wxevent in wxevents:
    if wxevent in thunderstormWind:
        categories.append("Thunderstorm/Wind")
    elif wxevent in tornado:
        categories.append("Tornado")
    elif wxevent in hurricaneTropical:
        categories.append("Hurricane/Tropical")
    elif wxevent in hail:
        categories.append("Hail")
    elif wxevent in droughtFire:
        categories.append("Drought/Fire")
    elif wxevent in flooding:
        categories.append("Flooding")
    elif wxevent in ocean:
        categories.append("Ocean")
    elif wxevent in winter:
        categories.append("Winter")
    else:
        categories.append("Fog")

dfulldata["WX_CATEGORY"] = categories

In [7]:
# Create a new column for Decades using binning on the YEAR column
bins = [1950, 1960, 1970, 1980, 1990, 2000, 2010, 2019]
decades = ['1950-1959', '1960-1969', '1970-1979', '1980-1989', '1990-1999', '2000-2010', '2010-2019']

dfulldata['DECADE'] = pd.cut(dfulldata["YEAR"], bins, labels=decades)

In [8]:
# Drop unnecessary columns
dfulldata = dfulldata.drop(['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH', 'END_DAY', 
                              'END_TIME', 'CATEGORY', 'BEGIN_RANGE', 'BEGIN_AZIMUTH', 'END_RANGE', 'END_AZIMUTH', 
                              'DATA_SOURCE', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE', 'TOR_OTHER_CZ_FIPS', 
                              'TOR_OTHER_CZ_NAME'], axis=1)

In [9]:
# Rename columns
dfulldata = dfulldata.rename(columns={"STATE_FIPS":"STATE_ID", "MONTH_NAME":"MONTH", "EVENT_TYPE":"WX_EVENT", 
                                        "CZ_TYPE":"COUNTY+ZONE+MARINE", "CZ_FIPS":"COUNTY_ID", "CZ_NAME":"COUNTY_NAME", 
                                        "WFO":"NWS_STATION", "BEGIN_DATE_TIME":"BEGIN_DATETIME", 
                                        "CZ_TIMEZONE":"TIMEZONE", "END_DATE_TIME":"END_DATETIME", 
                                        "MAGNITUDE":"WIND+HAIL_MAGNITUDE", "MAGNITUDE_TYPE":"WIND_TYPE", 
                                        "TOR_F_SCALE":"TORNADO_FSCALE", "TOR_LENGTH":"TORNADO_LENGTH", 
                                        "TOR_WIDTH":"TORNADO_WIDTH", "TOR_OTHER_WFO":"NEXT_NWS_STATION", 
                                        "TOR_OTHER_CZ_STATE":"NEXT_STATE", "TOR_OTHER_CZ_FIPS":"NEXT_COUNTY_ID", 
                                        "TOR_OTHER_CZ_NAME":"NEXT_COUNTY_NAME"})

In [10]:
# Re-order columns
dfulldata = dfulldata[['EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_ID', 'DECADE', 'YEAR', 'MONTH', 
                         'WX_EVENT', 'COUNTY+ZONE+MARINE', 'COUNTY_ID', 'COUNTY_NAME', 'NWS_STATION', 
                         'BEGIN_DATETIME', 'TIMEZONE', 'END_DATETIME', 'INJURIES_DIRECT', 
                         'INJURIES_INDIRECT', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 
                         'DAMAGE_CROPS', 'SOURCE', 'WIND+HAIL_MAGNITUDE', 'WIND_TYPE', 'FLOOD_CAUSE', 
                         'TORNADO_FSCALE', 'TORNADO_LENGTH', 'TORNADO_WIDTH', 'BEGIN_LOCATION', 'BEGIN_LAT', 
                         'BEGIN_LON', 'END_LOCATION', 'END_LAT', 'END_LON', 'EPISODE_NARRATIVE', 
                         'EVENT_NARRATIVE', 'WX_CATEGORY']]

In [11]:
# Changing DAMAGE_CROPS to object
dfulldata["DAMAGE_CROPS"] = dfulldata["DAMAGE_CROPS"].astype(object)

In [12]:
# Fill NaN values with 0
dfulldata[["DAMAGE_PROPERTY", "DAMAGE_CROPS"]] = dfulldata[["DAMAGE_PROPERTY", "DAMAGE_CROPS"]].fillna(0)

In [13]:
# Process of changing damages to numeric data

# Extracting the K, M or B unit from DAMAGE_PROPERTY & DAMAGE_CROPS
dfulldata["DAMAGE_P_UNIT"] = dfulldata["DAMAGE_PROPERTY"].str.extract(r'([KMB])')
dfulldata["DAMAGE_C_UNIT"] = dfulldata["DAMAGE_CROPS"].str.extract(r'([KMB])')

In [14]:
# Splitting the values at the '.'  & removing the K, M or B unit from DAMAGE PROPERTY & DAMAGE_CROPS
dfulldata["DAMAGE_P_VALUE"] = dfulldata["DAMAGE_PROPERTY"].astype(str).str.split('.').str[0]
dfulldata["DAMAGE_P_VALUE"] = dfulldata["DAMAGE_P_VALUE"].replace(r'[KMB]', '', regex=True)
dfulldata["DAMAGE_C_VALUE"] = dfulldata["DAMAGE_CROPS"].astype(str).str.split('.').str[0]
dfulldata["DAMAGE_C_VALUE"] = dfulldata["DAMAGE_CROPS"].replace(r'[KMB]', '', regex=True)

In [15]:
# Changing the K, M or B unit into a numeric value in DAMAGE_P_UNIT & DAMAGE_C_UNIT
dfulldata["DAMAGE_P_UNIT"] = dfulldata["DAMAGE_P_UNIT"].replace(['K', 'M', 'B'], [1000, 1000000, 1000000000])
dfulldata["DAMAGE_C_UNIT"] = dfulldata["DAMAGE_C_UNIT"].replace(['K', 'M', 'B'], [1000, 1000000, 1000000000])

In [16]:
# Making DAMAGE_P_VALUE & DAMAGE_C_VALUE columns a float/numeric
dfulldata["DAMAGE_P_VALUE"] = pd.to_numeric(dfulldata["DAMAGE_P_VALUE"], errors='coerce')
dfulldata["DAMAGE_C_VALUE"] = pd.to_numeric(dfulldata["DAMAGE_C_VALUE"], errors='coerce')

In [17]:
# Multiply the DAMAGE_P_UNIT * DAMAGE_P_VALUE to get a numeric value in DAMAGE_PROPERTY_NEW
dfulldata["DAMAGE_PROPERTY_NEW"] = dfulldata["DAMAGE_P_UNIT"] * dfulldata["DAMAGE_P_VALUE"]

# Multiply the DAMAGE_C_UNIT * DAMAGE_C_VALUE to get a numeric value in DAMAGE_CROPS_NEW
dfulldata["DAMAGE_CROPS_NEW"] = dfulldata["DAMAGE_C_UNIT"] * dfulldata["DAMAGE_C_VALUE"]

In [18]:
# Fill NaN values with 0
dfulldata[["DAMAGE_PROPERTY_NEW", "DAMAGE_CROPS_NEW"]] = dfulldata[["DAMAGE_PROPERTY_NEW", "DAMAGE_CROPS_NEW"]].fillna(0)

In [19]:
# Totaling deaths, injuries & damages totals
dfulldata["DEATHS_TOTAL"] = dfulldata["DEATHS_DIRECT"] + dfulldata["DEATHS_INDIRECT"]
dfulldata["INJURIES_TOTAL"] = dfulldata["INJURIES_DIRECT"] + dfulldata["INJURIES_INDIRECT"]
dfulldata["DAMAGES_TOTAL"] = dfulldata["DAMAGE_PROPERTY_NEW"] + dfulldata["DAMAGE_CROPS_NEW"]

In [20]:
# Re-order columns
dfulldata = dfulldata[['DAMAGES_TOTAL', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_ID', 'DECADE', 'YEAR', 'MONTH', 
                                   'WX_EVENT', 'COUNTY+ZONE+MARINE', 'COUNTY_ID', 'COUNTY_NAME', 'NWS_STATION', 
                                   'BEGIN_DATETIME', 'TIMEZONE', 'END_DATETIME', 'INJURIES_TOTAL', 'DEATHS_TOTAL', 
                                   'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE', 'WIND+HAIL_MAGNITUDE', 
                                   'WIND_TYPE', 'FLOOD_CAUSE', 'TORNADO_FSCALE', 'TORNADO_LENGTH', 'TORNADO_WIDTH', 
                                   'BEGIN_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LOCATION', 'END_LAT', 'END_LON', 
                                   'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'WX_CATEGORY']]

In [21]:
# Create new columns for Damage and Fatal score using binning on the DAMAGE_SCORE & DEATHS_TOTAL columns
damagebins = [0, 25000, 100000, 500000, 5000000, 500000000, 50000000000]
fatalbins = [0, 2, 4, 6, 8, 10, 100000]

damagelabels = ['0', '1', '2', '3', '4', '5']
fatallabels = ['0', '1', '2', '3', '4', '5']

dfulldata['DAMAGE_SCORE'] = pd.cut(dfulldata["DAMAGES_TOTAL"], damagebins, labels=damagelabels)
dfulldata['FATAL_SCORE'] = pd.cut(dfulldata["DEATHS_TOTAL"], fatalbins, labels=fatallabels)

In [22]:
# Turn scores into numeric data, then add them together to get the AGGREGATED_DEVASTATION_SCORE for total damages & fatalities
dfulldata["DAMAGE_SCORE"] = pd.to_numeric(dfulldata["DAMAGE_SCORE"])
dfulldata["FATAL_SCORE"] = pd.to_numeric(dfulldata["FATAL_SCORE"])
dfulldata["AGGREGATED_DEVASTATION_SCORE"] = dfulldata[["DAMAGE_SCORE", "FATAL_SCORE"]].sum(axis=1)

In [23]:
dfulldata.head()

Unnamed: 0,DAMAGES_TOTAL,EPISODE_ID,EVENT_ID,STATE,STATE_ID,DECADE,YEAR,MONTH,WX_EVENT,COUNTY+ZONE+MARINE,...,BEGIN_LON,END_LOCATION,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,WX_CATEGORY,DAMAGE_SCORE,FATAL_SCORE,AGGREGATED_DEVASTATION_SCORE
0,0.0,,10071755,NEBRASKA,31.0,1950-1959,1960,July,Thunderstorm Wind,C,...,-98.28,,,,,,Thunderstorm/Wind,,,0.0
1,0.0,,10071700,NEBRASKA,31.0,1950-1959,1960,June,Thunderstorm Wind,C,...,-98.4,,,,,,Thunderstorm/Wind,,,0.0
2,0.0,,10071180,MONTANA,30.0,1950-1959,1960,July,Tornado,C,...,-112.0,,,,,,Tornado,,,0.0
3,0.0,,10018150,IOWA,19.0,1950-1959,1960,March,Hail,C,...,-91.08,,,,,,Hail,,,0.0
4,250000.0,,10093963,OKLAHOMA,40.0,1950-1959,1960,May,Tornado,C,...,-97.05,,35.25,-96.97,,,Tornado,2.0,,2.0


In [24]:
dfulldata.columns

Index(['DAMAGES_TOTAL', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_ID',
       'DECADE', 'YEAR', 'MONTH', 'WX_EVENT', 'COUNTY+ZONE+MARINE',
       'COUNTY_ID', 'COUNTY_NAME', 'NWS_STATION', 'BEGIN_DATETIME', 'TIMEZONE',
       'END_DATETIME', 'INJURIES_TOTAL', 'DEATHS_TOTAL', 'DAMAGE_PROPERTY',
       'DAMAGE_CROPS', 'SOURCE', 'WIND+HAIL_MAGNITUDE', 'WIND_TYPE',
       'FLOOD_CAUSE', 'TORNADO_FSCALE', 'TORNADO_LENGTH', 'TORNADO_WIDTH',
       'BEGIN_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LOCATION', 'END_LAT',
       'END_LON', 'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'WX_CATEGORY',
       'DAMAGE_SCORE', 'FATAL_SCORE', 'AGGREGATED_DEVASTATION_SCORE'],
      dtype='object')

In [25]:
# Re-order columns
dfulldata = dfulldata[['EVENT_ID', 'EPISODE_ID', 'WX_EVENT', 'WX_CATEGORY', 'BEGIN_DATETIME', 'BEGIN_LOCATION', 
                      'COUNTY_NAME', 'COUNTY_ID', 'STATE', 'STATE_ID', 'NWS_STATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_DATETIME', 
                      'END_LOCATION', 'END_LAT', 'END_LON', 'TORNADO_FSCALE', 'WIND_TYPE', 'WIND+HAIL_MAGNITUDE', 
                      'INJURIES_TOTAL', 'DEATHS_TOTAL', 'FATAL_SCORE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'DAMAGES_TOTAL', 
                      'DAMAGE_SCORE', 'AGGREGATED_DEVASTATION_SCORE', 'YEAR', 'DECADE']]

dfulldata.head()

Unnamed: 0,EVENT_ID,EPISODE_ID,WX_EVENT,WX_CATEGORY,BEGIN_DATETIME,BEGIN_LOCATION,COUNTY_NAME,COUNTY_ID,STATE,STATE_ID,...,INJURIES_TOTAL,DEATHS_TOTAL,FATAL_SCORE,DAMAGE_PROPERTY,DAMAGE_CROPS,DAMAGES_TOTAL,DAMAGE_SCORE,AGGREGATED_DEVASTATION_SCORE,YEAR,DECADE
0,10071755,,Thunderstorm Wind,Thunderstorm/Wind,11-JUL-60 21:41:00,,MERRICK,121,NEBRASKA,31.0,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
1,10071700,,Thunderstorm Wind,Thunderstorm/Wind,15-JUN-60 20:30:00,,ADAMS,1,NEBRASKA,31.0,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
2,10071180,,Tornado,Tornado,22-JUL-60 15:25:00,,LEWIS AND CLARK,49,MONTANA,30.0,...,0,0,,0K,0,0.0,,0.0,1960,1950-1959
3,10018150,,Hail,Hail,29-MAR-60 19:00:00,,,0,IOWA,19.0,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
4,10093963,,Tornado,Tornado,05-MAY-60 15:50:00,,CLEVELAND,27,OKLAHOMA,40.0,...,0,0,,250K,0,250000.0,2.0,2.0,1960,1950-1959


In [26]:
# Drop any entries that do not have BEGIN_LAT or BEGIN_LON values
dfulldata2 = dfulldata.dropna(subset=['BEGIN_LAT', 'BEGIN_LON'])

In [27]:
# Merging COUNTY_ID & STATE_ID to make linkable COUNTY_ID_NEW
# Checking dtypes
dfulldata2[["COUNTY_ID", "STATE_ID"]].dtypes

COUNTY_ID      int64
STATE_ID     float64
dtype: object

In [28]:
# Changing COUNTY_ID dtype
dfulldata2['COUNTY_ID'] = dfulldata2['COUNTY_ID'].astype('float64')
dfulldata2[["COUNTY_ID", "STATE_ID"]].dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


COUNTY_ID    float64
STATE_ID     float64
dtype: object

In [29]:
# Check numbers for next step
dfulldata2[["COUNTY_ID", "STATE_ID"]].head()

Unnamed: 0,COUNTY_ID,STATE_ID
0,121.0,31.0
1,1.0,31.0
2,49.0,30.0
3,0.0,19.0
4,27.0,40.0


In [30]:
# Format Numbers to combine into new COUNTY_ID_NEW column
dfulldata2[["COUNTY_ID", "STATE_ID"]] = dfulldata2[["COUNTY_ID", "STATE_ID"]].fillna(0)
dfulldata2["COUNTY_ID"] = dfulldata2["COUNTY_ID"].map('{:03.0f}'.format)
dfulldata2["STATE_ID"] = dfulldata2["STATE_ID"].map('{:,.0f}'.format)
dfulldata2[["COUNTY_ID", "STATE_ID"]].head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,COUNTY_ID,STATE_ID
0,121,31
1,1,31
2,49,30
3,0,19
4,27,40


In [31]:
# Combine STATE_ID & COUNTY_ID into new COUNTY_ID_NEW
dfulldata2["COUNTY_ID_NEW"] = dfulldata2["STATE_ID"].map(str) + dfulldata2["COUNTY_ID"].map(str) 
dfulldata2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,EVENT_ID,EPISODE_ID,WX_EVENT,WX_CATEGORY,BEGIN_DATETIME,BEGIN_LOCATION,COUNTY_NAME,COUNTY_ID,STATE,STATE_ID,...,DEATHS_TOTAL,FATAL_SCORE,DAMAGE_PROPERTY,DAMAGE_CROPS,DAMAGES_TOTAL,DAMAGE_SCORE,AGGREGATED_DEVASTATION_SCORE,YEAR,DECADE,COUNTY_ID_NEW
0,10071755,,Thunderstorm Wind,Thunderstorm/Wind,11-JUL-60 21:41:00,,MERRICK,121,NEBRASKA,31,...,0,,0,0,0.0,,0.0,1960,1950-1959,31121
1,10071700,,Thunderstorm Wind,Thunderstorm/Wind,15-JUN-60 20:30:00,,ADAMS,1,NEBRASKA,31,...,0,,0,0,0.0,,0.0,1960,1950-1959,31001
2,10071180,,Tornado,Tornado,22-JUL-60 15:25:00,,LEWIS AND CLARK,49,MONTANA,30,...,0,,0K,0,0.0,,0.0,1960,1950-1959,30049
3,10018150,,Hail,Hail,29-MAR-60 19:00:00,,,0,IOWA,19,...,0,,0,0,0.0,,0.0,1960,1950-1959,19000
4,10093963,,Tornado,Tornado,05-MAY-60 15:50:00,,CLEVELAND,27,OKLAHOMA,40,...,0,,250K,0,250000.0,2.0,2.0,1960,1950-1959,40027


In [32]:
# Re-order columns
dfulldata2 = dfulldata2[['EVENT_ID', 'EPISODE_ID', 'WX_EVENT', 'WX_CATEGORY', 'BEGIN_DATETIME', 'BEGIN_LOCATION', 
                      'COUNTY_NAME', 'COUNTY_ID_NEW', 'STATE', 'NWS_STATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_DATETIME', 
                      'END_LOCATION', 'END_LAT', 'END_LON', 'TORNADO_FSCALE', 'WIND_TYPE', 'WIND+HAIL_MAGNITUDE', 
                      'INJURIES_TOTAL', 'DEATHS_TOTAL', 'FATAL_SCORE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'DAMAGES_TOTAL', 
                      'DAMAGE_SCORE', 'AGGREGATED_DEVASTATION_SCORE', 'YEAR', 'DECADE']]

dfulldata2.head()

Unnamed: 0,EVENT_ID,EPISODE_ID,WX_EVENT,WX_CATEGORY,BEGIN_DATETIME,BEGIN_LOCATION,COUNTY_NAME,COUNTY_ID_NEW,STATE,NWS_STATION,...,INJURIES_TOTAL,DEATHS_TOTAL,FATAL_SCORE,DAMAGE_PROPERTY,DAMAGE_CROPS,DAMAGES_TOTAL,DAMAGE_SCORE,AGGREGATED_DEVASTATION_SCORE,YEAR,DECADE
0,10071755,,Thunderstorm Wind,Thunderstorm/Wind,11-JUL-60 21:41:00,,MERRICK,31121,NEBRASKA,,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
1,10071700,,Thunderstorm Wind,Thunderstorm/Wind,15-JUN-60 20:30:00,,ADAMS,31001,NEBRASKA,,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
2,10071180,,Tornado,Tornado,22-JUL-60 15:25:00,,LEWIS AND CLARK,30049,MONTANA,,...,0,0,,0K,0,0.0,,0.0,1960,1950-1959
3,10018150,,Hail,Hail,29-MAR-60 19:00:00,,,19000,IOWA,,...,0,0,,0,0,0.0,,0.0,1960,1950-1959
4,10093963,,Tornado,Tornado,05-MAY-60 15:50:00,,CLEVELAND,40027,OKLAHOMA,,...,0,0,,250K,0,250000.0,2.0,2.0,1960,1950-1959


In [33]:
# Export the ffulldata as 1 CSV file
dfulldata2.to_csv('../Resources/StormEventsDetailsALL.csv')