In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

### Get Strom Events Data from 2010 to 2019 from National Centers for Environmental Information (NCEI)

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2010_c20191116.csv.gz
!mv StormEvents_details-ftp_v1.0_d2010_c20191116.csv.gz StormEvents_2010.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2011_c20180718.csv.gz
!mv StormEvents_details-ftp_v1.0_d2011_c20180718.csv.gz StormEvents_2011.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2012_c20200317.csv.gz
!mv StormEvents_details-ftp_v1.0_d2012_c20200317.csv.gz StormEvents_2012.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2013_c20170519.csv.gz
!mv StormEvents_details-ftp_v1.0_d2013_c20170519.csv.gz StormEvents_2013.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2014_c20191116.csv.gz
!mv StormEvents_details-ftp_v1.0_d2014_c20191116.csv.gz StormEvents_2014.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2015_c20191116.csv.gz
!mv StormEvents_details-ftp_v1.0_d2015_c20191116.csv.gz StormEvents_2015.csv.gz
    
!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2016_c20190817.csv.gz    
!mv StormEvents_details-ftp_v1.0_d2016_c20190817.csv.gz StormEvents_2016.csv.gz 

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2017_c20200121.csv.gz
!mv StormEvents_details-ftp_v1.0_d2017_c20200121.csv.gz StormEvents_2017.csv.gz  

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2018_c20200317.csv.gz
!mv StormEvents_details-ftp_v1.0_d2018_c20200317.csv.gz StormEvents_2018.csv.gz

!wget https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d2019_c20200317.csv.gz
!mv StormEvents_details-ftp_v1.0_d2019_c20200317.csv.gz StormEvents_2019.csv.gz       


In [None]:
# columns of interest from the yearly Storm Events Datasets

selected_columns = ['STATE', 'STATE_FIPS','YEAR', 'MONTH_NAME',
                    'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME',
                    'WFO', 'BEGIN_DATE_TIME', 'END_DATE_TIME',
                    'INJURIES_DIRECT', 'INJURIES_INDIRECT', 
                    'DEATHS_DIRECT','DEATHS_INDIRECT', 
                    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE',
                    'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE',
                    'TOR_F_SCALE','TOR_LENGTH', 'TOR_WIDTH', 
                    'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE',
                    'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 
                    'BEGIN_LOCATION', 'END_LOCATION',
                    'BEGIN_RANGE', 'END_RANGE','BEGIN_AZIMUTH',
                    'END_AZIMUTH','BEGIN_LAT',
                    'BEGIN_LON', 'END_LAT', 'END_LON']

In [None]:
# read and concatenate yearly Storm Events Datasets into a single data frame 

df = pd.read_csv('StormEvents_2010.csv.gz', compression = 'gzip')[selected_columns]

for i in range(1,10):
    df_temp = pd.read_csv('StormEvents_' + str(2010 + i) + '.csv.gz', compression = 'gzip')
    df = pd.concat([df, df_temp[selected_columns]])
   

In [None]:
df.shape

In [None]:
df.head()

In [None]:
# number of distinct event types
print("There are {} storm event types.".format(len(df['EVENT_TYPE'].unique())))

## Event type

In [None]:
df['EVENT_TYPE'].value_counts()[0:10]\
.plot(kind = 'bar', title ='Top 10 Strom Events from 2010 to 2019')

## Fatalities

In [None]:
def fatality(col_name):
    """ Returns direct, indirect, and total fatalities grouped by col_name. """
    
    df_fatality = df.groupby(col_name).sum()[['DEATHS_DIRECT','DEATHS_INDIRECT']]
    df_fatality['DEATHS_TOTAL'] = df_fatality['DEATHS_DIRECT'] \
                                 + df_fatality['DEATHS_INDIRECT']
    
    return df_fatality    

### Total fatalities by event type

In [None]:
df_fatality_event = fatality('EVENT_TYPE').sort_values('DEATHS_TOTAL', ascending = False)

In [None]:
df_fatality_event.head()

In [None]:
df_fatality_event['DEATHS_TOTAL'] \
.plot(kind = 'bar', title = 'Total Fatalities', figsize = (15,7))

### Total fatalities by state

In [None]:
df_fatality_state = fatality('STATE').sort_values('DEATHS_TOTAL', ascending = False)

In [None]:
df_fatality_state.head()

In [None]:
df_fatality_state['DEATHS_TOTAL'] \
.plot(kind = 'bar', title = 'Total Fatalities', figsize = (15,7))

### Total fatalities by year

In [None]:
df_fatality_year = fatality('YEAR').sort_values('DEATHS_TOTAL', ascending = False)

In [None]:
df_fatality_year.head()

In [None]:
df_fatality_year['DEATHS_TOTAL'] \
.plot(kind = 'bar', title = 'Total Fatalities', figsize = (15,7))

## Direct fatalities by event type

In [None]:
df_fatalityD_event = fatality('EVENT_TYPE').sort_values('DEATHS_DIRECT', ascending = False)

In [None]:
df_fatalityD_event.head()

In [None]:
df_fatalityD_event['DEATHS_DIRECT'] \
.plot(kind = 'bar', title = 'Direct Fatalities', figsize = (15,7))

In [None]:
top5_events = ['Tornado', 'Flash Flood', 'Excessive Heat', 'Heat', 'Rip Current']

## Yearly direct fatalities by tornados

In [None]:
df_tornados = df[df['EVENT_TYPE'] == 'Tornado'].groupby('YEAR').sum()['DEATHS_DIRECT']
print(df_tornados)
df_tornados.plot(kind='bar', logy = False, title = 'Direct Fatalities by Tornados')

## Top 5 direct fatalities by year

In [None]:
df_yearly_deaths = df[df['EVENT_TYPE'] == 'Tornado'].groupby('YEAR').sum()['DEATHS_DIRECT']

for event in top5_events[1:]:
    df_yearly_temp = df[df['EVENT_TYPE'] == event].groupby('YEAR').sum()['DEATHS_DIRECT']
    df_yearly_deaths = pd.concat([df_yearly_deaths, df_yearly_temp], axis=1)

df_yearly_deaths.columns = top5_events # rename the columns

In [None]:
df_yearly_deaths

In [None]:
df_yearly_deaths.describe().transpose()

In [None]:
df_yearly_deaths.plot(kind = 'bar',
                      stacked = False,
                      logy = True, title = 'Direct fatalities by top 5 strom events', 
                      figsize = (15,7))

## Property Damage

In [None]:
df_damage = df[['STATE','EVENT_TYPE','YEAR','MONTH_NAME','DAMAGE_PROPERTY']].dropna()

In [None]:
df_damage.shape

In [None]:
df_damage.head()

In [None]:
df_damage_1 = df_damage[df_damage['DAMAGE_PROPERTY'].str.contains('K|M|B')== True].copy()

In [None]:
# check if any DAMAGE_PROPERTY values are without K, M, or B
df_damage[df_damage['DAMAGE_PROPERTY'].str.contains('K|M|B')== False]

In [None]:
df_damage_1.head()

In [None]:
# Create a numeric column for DAMAGE_PROPERTY in million. 
dict_1 ={'K':0.001, 'M':1, 'B':1000}

df_damage_1['DAMAGE_PROPERTY_million'] = df_damage_1['DAMAGE_PROPERTY']\
                                        .transform(lambda x: float(x[:-1]) * dict_1[x[-1]])

## Distribution of property damage by event type

In [None]:
df_damage_1.groupby('EVENT_TYPE')\
.sum()['DAMAGE_PROPERTY_million']\
.sort_values(ascending = False)[:15]\
.plot(kind = 'bar', figsize = (12,5), title = 'Damage_property in million $')


In [None]:
# top 5 damage events
top5_damage = ['Flash Flood', 'Wildfire', 'Flood', 'Tornado', 'Coastal Flood']

In [None]:
# yearly distribution of top 5 property damage

df_yearly_damage = df_damage_1[df_damage_1['EVENT_TYPE'] == 'Flash Flood']\
                   .groupby('YEAR')\
                   .sum()['DAMAGE_PROPERTY_million']

for event in top5_damage[1:]:
    df_yearly_temp = df_damage_1[df_damage_1['EVENT_TYPE'] == event]\
                     .groupby('YEAR')\
                     .sum()['DAMAGE_PROPERTY_million']
    df_yearly_damage = pd.concat([df_yearly_damage, df_yearly_temp], axis=1)

df_yearly_damage.columns = top5_damage # rename the columns

In [None]:
df_yearly_damage

In [None]:
df_yearly_damage.plot(kind = 'bar',
                      stacked = False,
                      logy = True, 
                      title = 'Property damage in $millions by top 5 strom events', 
                      figsize = (15,7))

In [None]:
df_damage_1.groupby('STATE')\
.sum()['DAMAGE_PROPERTY_million']\
.sort_values(ascending = False)[:10]\
.plot(kind = 'bar', y = 'Y', figsize = (12,5), title = 'Propety damage in $millions')

In [None]:
df_damage_1.groupby('MONTH_NAME')\
.sum()['DAMAGE_PROPERTY_million']\
.sort_values(ascending = False)\
.plot(kind = 'bar', figsize = (12,5), title = 'Property damage in $ million')