In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
import hvplot.pandas

In [4]:
#merging all the datasets into one
mergestorm_df = pd.concat( 
    map(pd.read_csv, ["resources/stormevents_2022.csv", 
                      "resources/stormevents_2021.csv",
                      "resources/stormevents_2020.csv",
                      "resources/stormevents_2019.csv",
                      "resources/stormevents_2018.csv", 
                      "resources/stormevents_2017.csv", 
                      "resources/stormevents_2016.csv", 
                      "resources/stormevents_2015.csv", 
                      "resources/stormevents_2014.csv", 
                      "resources/stormevents_2013.csv", 
                      "resources/stormevents_2012.csv", 
                      "resources/stormevents_2011.csv", 
                      "resources/stormevents_2010.csv", 
                      "resources/stormevents_2009.csv", 
                      "resources/stormevents_2008.csv"]), ignore_index=True) 
mergestorm_df.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,202202,20,2118,202202,20,2218,165464,999902,NEVADA,32,...,,,,,,,,Strong winds increased ahead of an approaching...,"Station (UP994) 3.1 SE West Wendover, Elevatio...",CSV
1,202202,21,800,202202,22,1000,165465,999903,NEVADA,32,...,,,,,,,,A low centered over northern and central Nevad...,Thirteen inches fell at station (BCSN2) Big Cr...,CSV
2,202202,22,200,202202,22,900,165465,999904,NEVADA,32,...,,,,,,,,A low centered over northern and central Nevad...,Fifteen inches fell at station (TJMN2) Toe Jam...,CSV
3,202202,18,1609,202202,18,1609,165611,1001181,ATLANTIC SOUTH,87,...,7.0,SE,PONTE VEDRA,30.05,-81.17,30.05,-81.17,Pre-frontal showers and thunderstorms moved so...,A brief waterspout was observed offshore of So...,CSV
4,202202,2,0,202202,3,0,165668,1001527,AMERICAN SAMOA,97,...,5.0,NNW,VAITOGI,-14.333,-170.7157,-14.3393,-170.7268,A surface trough over the Islands held the po...,"Over a 24-hour period, WSO Pago Pago recorded ...",CSV


In [5]:
#merging all the datasets into one for earliest available data years 
mergestorm1950_1964_df = pd.concat( 
    map(pd.read_csv, ["resources/stormevents_1950.csv", 
                      "resources/stormevents_1951.csv", 
                      "resources/stormevents_1952.csv", 
                      "resources/stormevents_1953.csv", 
                      "resources/stormevents_1954.csv", 
                      "resources/stormevents_1955.csv",
                      "resources/stormevents_1956.csv", 
                      "resources/stormevents_1957.csv", 
                      "resources/stormevents_1958.csv",
                      "resources/stormevents_1959.csv", 
                      "resources/stormevents_1960.csv", 
                      "resources/stormevents_1961.csv",
                      "resources/stormevents_1962.csv", 
                      "resources/stormevents_1963.csv", 
                      "resources/stormevents_1964.csv"]),
                      ignore_index=True)
mergestorm1950_1964_df.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,195004,28,1445,195004,28,1445,,10096222,OKLAHOMA,40,...,0,,,35.12,-99.2,35.17,-99.2,,,PUB
1,195004,29,1530,195004,29,1530,,10120412,TEXAS,48,...,0,,,31.9,-98.6,31.73,-98.6,,,PUB
2,195007,5,1800,195007,5,1800,,10104927,PENNSYLVANIA,42,...,0,,,40.58,-75.7,40.65,-75.47,,,PUB
3,195007,5,1830,195007,5,1830,,10104928,PENNSYLVANIA,42,...,0,,,40.6,-76.75,,,,,PUB
4,195007,24,1440,195007,24,1440,,10104929,PENNSYLVANIA,42,...,0,,,41.63,-79.68,,,,,PUB


In [10]:
# Create a DataFrame with selected columns 
clean_mergestorm_df = mergestorm_df[["YEAR", 
                                     "STATE", 
                                     "EVENT_TYPE",  
                                     "INJURIES_DIRECT", 
                                     "INJURIES_INDIRECT",
                                     "DEATHS_DIRECT", 
                                     "DEATHS_INDIRECT",
                                     "DAMAGE_PROPERTY",
                                     "DAMAGE_CROPS", 
                                     "SOURCE", 
                                     "MAGNITUDE", 
                                     "MAGNITUDE_TYPE", 
                                     "TOR_F_SCALE", 
                                     "TOR_LENGTH",
                                     "TOR_WIDTH",
                                     "TOR_OTHER_CZ_STATE",
                                     "BEGIN_LOCATION", 
                                     "BEGIN_LAT", 
                                     "BEGIN_LON", 
                                     "END_LAT", 
                                     "END_LON", 
                                     "EPISODE_ID", 
                                     "EVENT_ID",]]
clean_mergestorm_df

Unnamed: 0,YEAR,STATE,EVENT_TYPE,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,...,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_STATE,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_ID,EVENT_ID
0,2022,NEVADA,High Wind,0,0,0,0,0.00K,0.00K,Mesonet,...,,,,,,,,,165464,999902
1,2022,NEVADA,Heavy Snow,0,0,0,0,0.00K,0.00K,SNOTEL,...,,,,,,,,,165465,999903
2,2022,NEVADA,Heavy Snow,0,0,0,0,0.00K,0.00K,SNOTEL,...,,,,,,,,,165465,999904
3,2022,ATLANTIC SOUTH,Waterspout,0,0,0,0,0.00K,0.00K,Trained Spotter,...,,,,PONTE VEDRA,30.0500,-81.1700,30.0500,-81.1700,165611,1001181
4,2022,AMERICAN SAMOA,Heavy Rain,0,0,0,0,50.00K,0.00K,Official NWS Observations,...,,,,VAITOGI,-14.3330,-170.7157,-14.3393,-170.7268,165668,1001527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
948281,2008,FLORIDA,Hail,0,0,0,0,0.00K,0.00K,Public,...,,,,RITAL,28.5233,-82.2320,28.5233,-82.2320,18919,111312
948282,2008,FLORIDA,Thunderstorm Wind,0,0,0,0,0.50K,0.00K,Public,...,,,,HULL,27.1323,-81.9050,27.1323,-81.9050,18495,108243
948283,2008,GULF OF MEXICO,Marine Thunderstorm Wind,0,0,0,0,0.00K,0.00K,ASOS,...,,,,MIDDLE TAMPA BAY,27.7651,-82.6270,27.7651,-82.6270,18708,109851
948284,2008,FLORIDA,Hail,0,0,0,0,0.00K,0.00K,Public,...,,,,THONOTOSASSA,28.0762,-82.2492,28.0762,-82.2492,16372,94501


In [11]:
# Create a DataFrame with selected columns 
mergestorm1950_1964_df = mergestorm1950_1964_df[["YEAR", 
                                     "STATE", 
                                     "EVENT_TYPE",  
                                     "INJURIES_DIRECT", 
                                     "INJURIES_INDIRECT",
                                     "DEATHS_DIRECT", 
                                     "DEATHS_INDIRECT",
                                     "DAMAGE_PROPERTY",
                                     "DAMAGE_CROPS", 
                                     "SOURCE", 
                                     "MAGNITUDE", 
                                     "MAGNITUDE_TYPE", 
                                     "TOR_F_SCALE", 
                                     "TOR_LENGTH",
                                     "TOR_WIDTH",
                                     "TOR_OTHER_CZ_STATE",
                                     "BEGIN_LOCATION", 
                                     "BEGIN_LAT", 
                                     "BEGIN_LON", 
                                     "END_LAT", 
                                     "END_LON", 
                                     "EPISODE_ID", 
                                     "EVENT_ID",]]
mergestorm1950_1964_df

Unnamed: 0,YEAR,STATE,EVENT_TYPE,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,...,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_STATE,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_ID,EVENT_ID
0,1950,OKLAHOMA,Tornado,0,0,0,0,250K,0,,...,3.4,400,,,35.12,-99.20,35.17,-99.20,,10096222
1,1950,TEXAS,Tornado,0,0,0,0,25K,0,,...,11.5,200,,,31.90,-98.60,31.73,-98.60,,10120412
2,1950,PENNSYLVANIA,Tornado,2,0,0,0,25K,0,,...,12.9,33,,,40.58,-75.70,40.65,-75.47,,10104927
3,1950,PENNSYLVANIA,Tornado,0,0,0,0,2.5K,0,,...,0.0,13,,,40.60,-76.75,,,,10104928
4,1950,PENNSYLVANIA,Tornado,0,0,0,0,2.5K,0,,...,0.0,33,,,41.63,-79.68,,,,10104929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22082,1964,KANSAS,Hail,0,0,0,0,0,0,,...,0.0,0,,,39.38,-99.28,,,,10025243
22083,1964,KANSAS,Tornado,2,0,0,0,0K,0,,...,25.7,880,,,37.95,-98.88,38.27,-98.63,,10025244
22084,1964,KANSAS,Tornado,0,0,0,0,250K,0,,...,34.0,880,,,38.27,-98.63,38.70,-98.33,,10025245
22085,1964,KANSAS,Hail,0,0,0,0,0,0,,...,0.0,0,,,37.88,-95.40,,,,10025246


In [12]:
clean_mergestorm_df["DAMAGE_PROPERTY"] = clean_mergestorm_df["DAMAGE_PROPERTY"].fillna("0.00K")
clean_mergestorm_df["DAMAGE_CROPS"] = clean_mergestorm_df["DAMAGE_CROPS"].fillna("0.00K")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_mergestorm_df["DAMAGE_PROPERTY"] = clean_mergestorm_df["DAMAGE_PROPERTY"].fillna("0.00K")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_mergestorm_df["DAMAGE_CROPS"] = clean_mergestorm_df["DAMAGE_CROPS"].fillna("0.00K")


In [15]:
mergestorm1950_1964_df["DAMAGE_PROPERTY"] = mergestorm1950_1964_df["DAMAGE_PROPERTY"].fillna("0.00K")
mergestorm1950_1964_df["DAMAGE_CROPS"] = mergestorm1950_1964_df["DAMAGE_CROPS"].fillna("0.00K")

In [16]:
clean_mergestorm_df["TOTAL DEATHS"] = clean_mergestorm_df["DEATHS_DIRECT"] + clean_mergestorm_df["DEATHS_INDIRECT"]
clean_mergestorm_df["TOTAL INJURIES"] = clean_mergestorm_df["INJURIES_DIRECT"] + clean_mergestorm_df["INJURIES_INDIRECT"]
clean_mergestorm_df["TOTAL DAMAGES"] = clean_mergestorm_df["DAMAGE_PROPERTY"] + clean_mergestorm_df["DAMAGE_CROPS"]
clean_mergestorm_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_mergestorm_df["TOTAL DEATHS"] = clean_mergestorm_df["DEATHS_DIRECT"] + clean_mergestorm_df["DEATHS_INDIRECT"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_mergestorm_df["TOTAL INJURIES"] = clean_mergestorm_df["INJURIES_DIRECT"] + clean_mergestorm_df["INJURIES_INDIRECT"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#

Unnamed: 0,YEAR,STATE,EVENT_TYPE,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,...,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_ID,EVENT_ID,TOTAL DEATHS,TOTAL INJURIES,TOTAL DAMAGES
0,2022,NEVADA,High Wind,0,0,0,0,0.00K,0.00K,Mesonet,...,,,,,,165464,999902,0,0,0.00K0.00K
1,2022,NEVADA,Heavy Snow,0,0,0,0,0.00K,0.00K,SNOTEL,...,,,,,,165465,999903,0,0,0.00K0.00K
2,2022,NEVADA,Heavy Snow,0,0,0,0,0.00K,0.00K,SNOTEL,...,,,,,,165465,999904,0,0,0.00K0.00K
3,2022,ATLANTIC SOUTH,Waterspout,0,0,0,0,0.00K,0.00K,Trained Spotter,...,PONTE VEDRA,30.05,-81.17,30.05,-81.17,165611,1001181,0,0,0.00K0.00K
4,2022,AMERICAN SAMOA,Heavy Rain,0,0,0,0,50.00K,0.00K,Official NWS Observations,...,VAITOGI,-14.333,-170.7157,-14.3393,-170.7268,165668,1001527,0,0,50.00K0.00K


In [None]:
clean_mergestorm_df["TOTAL DEATHS"] = clean_mergestorm_df["DEATHS_DIRECT"] + clean_mergestorm_df["DEATHS_INDIRECT"]
clean_mergestorm_df["TOTAL INJURIES"] = clean_mergestorm_df["INJURIES_DIRECT"] + clean_mergestorm_df["INJURIES_INDIRECT"]
clean_mergestorm_df["TOTAL DAMAGES"] = clean_mergestorm_df["DAMAGE_PROPERTY"] + clean_mergestorm_df["DAMAGE_CROPS"]
clean_mergestorm_df.head()