In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymannkendall as mk
plt.rcParams['figure.figsize']=(12,8)

In [2]:
def replace_string(data, column):
    k_columns = data[data[column].str.contains("K")]
    k_columns = k_columns.loc[k_columns[column]!='K']
    k_columns[column] = k_columns[column].str.strip('K').astype('float') * 1000
    m_columns = data[data[column].str.contains("M")]
    m_columns = m_columns.loc[m_columns[column]!='M']
    m_columns[column] = m_columns[column].str.strip('M').astype('float') * 1e6
    b_columns = data[data[column].str.contains("B")]
    b_columns = b_columns.loc[b_columns[column]!='B']
    b_columns[column] = b_columns[column].str.strip('B').astype('float') * 1e9
    columns = pd.concat([k_columns, m_columns, b_columns], axis=0)
    
    columns = columns.sort_index()
    
    return columns

In [5]:
%%time
storm_events = []
init_path = '/Users/allanbellahsene/Desktop/THESIS/DATA/NCFEI/StormEvents_'
for years in list(np.arange(1962,2020)):
    path = init_path + str(years) + '.csv'
    storm_data = pd.read_csv(path, usecols=['BEGIN_DAY', 'MONTH_NAME', 'YEAR', 'STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'EVENT_TYPE', 'EPISODE_ID', 'EVENT_ID', 'DEATHS_DIRECT', 'DEATHS_INDIRECT'])
    storm_data.index = pd.to_datetime(storm_data['BEGIN_DAY'].astype('str')+ '-' + storm_data['MONTH_NAME'] + '-' + storm_data.YEAR.astype('str'))
    storm_data = storm_data[['STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'EVENT_TYPE', 'EVENT_ID', 'EPISODE_ID']] #keep only relevant columns
    storm_data.dropna(inplace=True) #drop missing values
    storm_data.sort_index(inplace=True) #sort data by date
    storm_events.append(storm_data)
    #print(years)
STORMS = pd.concat([storm_events[0], storm_events[1]], axis=0)
for i in range(2, len(storm_events)):
    STORMS = pd.concat([STORMS, storm_events[i]], axis=0)
print(STORMS.shape)
df = STORMS
df['DAMAGE_PROPERTY'] = df['DAMAGE_PROPERTY'].astype('str')
df['DAMAGE_CROPS'] = df['DAMAGE_CROPS'].astype('str')
STORMS = df
STORMS = STORMS.loc[((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS =='0')) | ((STORMS.DAMAGE_PROPERTY=='0') & (STORMS.DAMAGE_CROPS !='0')) | ((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS !='0'))]
STORMS = STORMS.loc[STORMS.DAMAGE_PROPERTY!='K'] #delete rows that only contain "K"
STORMS = STORMS.loc[STORMS.DAMAGE_CROPS!='K']
STORMS = replace_string(STORMS, 'DAMAGE_PROPERTY')
STORMS = replace_string(STORMS, 'DAMAGE_CROPS')
STORMS['TOTAL DAMAGES'] = STORMS['DAMAGE_PROPERTY'] + STORMS['DAMAGE_CROPS']
STORMS['Date'] = STORMS.index
STORMS = STORMS[['Date','STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'TOTAL DAMAGES', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'EVENT_TYPE', 'EVENT_ID', 'EPISODE_ID']]
STORMS = STORMS.loc[STORMS['DAMAGE_PROPERTY']!=0]
df = STORMS.groupby(['STATE', 'Date']).agg({'DAMAGE_PROPERTY':sum})
States = list(STORMS.STATE.unique())

(708372, 8)
CPU times: user 18.1 s, sys: 2.62 s, total: 20.8 s
Wall time: 21.7 s


In [17]:
STORMS.groupby(['EPISODE_ID']).agg({'DAMAGE_PROPERTY': sum}).nlargest(10, 'DAMAGE_PROPERTY')

Unnamed: 0_level_0,DAMAGE_PROPERTY
EPISODE_ID,Unnamed: 1_level_1
119753.0,38257190000.0
68471.0,24959090000.0
120357.0,18263750000.0
131864.0,17000000000.0
109514.0,6983176000.0
1198432.0,5884300000.0
1178038.0,5382000000.0
119746.0,5290005000.0
1181034.0,4830000000.0
130185.0,4200849000.0


In [13]:
STORMS['TOTAL DAMAGES'].loc[STORMS.EPISODE_ID==119753].sum()

38257531000.0

In [24]:
STORMS.nlargest(10, 'TOTAL DAMAGES')

Unnamed: 0,Date,STATE,DAMAGE_PROPERTY,DAMAGE_CROPS,TOTAL DAMAGES,EVENT_TYPE,EVENT_ID,EPISODE_ID
2018-11-08,2018-11-08,CALIFORNIA,17000000000.0,0.0,17000000000.0,Wildfire,788675,131864.0
2017-08-26,2017-08-26,TEXAS,10000000000.0,100000.0,10000100000.0,Flash Flood,720861,119753.0
2017-08-26,2017-08-26,TEXAS,10000000000.0,10000.0,10000010000.0,Flash Flood,720871,119753.0
2017-08-26,2017-08-26,TEXAS,8000000000.0,50000.0,8000050000.0,Flash Flood,720852,119753.0
2012-10-29,2012-10-29,NEW JERSEY,7500000000.0,0.0,7500000000.0,Coastal Flood,416942,68471.0
2017-08-27,2017-08-27,TEXAS,7000000000.0,10000.0,7000010000.0,Flash Flood,728349,119753.0
2012-10-28,2012-10-28,NEW JERSEY,5000000000.0,0.0,5000000000.0,Coastal Flood,416948,68471.0
2012-10-28,2012-10-28,NEW JERSEY,5000000000.0,0.0,5000000000.0,Coastal Flood,416946,68471.0
2004-09-13,2004-09-13,FLORIDA,4000000000.0,25000000.0,4025000000.0,Hurricane (Typhoon),5422303,1179851.0
1999-09-15,1999-09-15,NORTH CAROLINA,3000000000.0,500000000.0,3500000000.0,Hurricane (Typhoon),5724413,1088948.0


In [9]:
STORMS.nlargest(10, 'DAMAGE_CROPS')

Unnamed: 0,Date,STATE,DAMAGE_PROPERTY,DAMAGE_CROPS,TOTAL DAMAGES,EVENT_TYPE
2011-06-01,2011-06-01,TEXAS,200000000.0,1050000000.0,1250000000.0,Drought
2007-01-11,2007-01-11,CALIFORNIA,150000.0,568600000.0,568750000.0,Frost/Freeze
1999-09-15,1999-09-15,NORTH CAROLINA,3000000000.0,500000000.0,3500000000.0,Hurricane (Typhoon)
2000-10-03,2000-10-03,FLORIDA,440000000.0,500000000.0,940000000.0,Flood
2007-07-01,2007-07-01,MISSOURI,5000.0,500000000.0,500005000.0,Flood
2018-10-10,2018-10-10,GEORGIA,150000000.0,500000000.0,650000000.0,Hurricane
2018-10-10,2018-10-10,GEORGIA,250000000.0,500000000.0,750000000.0,Hurricane
2018-10-10,2018-10-10,GEORGIA,300000000.0,500000000.0,800000000.0,Hurricane
2018-10-10,2018-10-10,GEORGIA,300000000.0,500000000.0,800000000.0,Hurricane
2005-08-25,2005-08-25,FLORIDA,64000000.0,423000000.0,487000000.0,Hurricane (Typhoon)


In [17]:
1.7e10 == 17000000000

True

In [8]:
STORMS.groupby(['EPISODE_ID']).agg({'DEATHS_INDIRECT': sum}).nlargest(10, 'DEATHS_INDIRECT')

Unnamed: 0_level_0,DEATHS_INDIRECT
EPISODE_ID,Unnamed: 1_level_1
130185.0,32
68471.0,19
2326.0,14
119520.0,13
68867.0,12
1525.0,11
25500.0,10
68481.0,10
118772.0,10
50455.0,9


In [3]:
%%time
storm_events = []
init_path = '/Users/allanbellahsene/Desktop/THESIS/DATA/NCFEI/StormEvents_'
for years in list(np.arange(1962,2020)):
    path = init_path + str(years) + '.csv'
    storm_data = pd.read_csv(path, usecols=['BEGIN_DAY', 'MONTH_NAME', 'YEAR', 'STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'EVENT_TYPE', 'EPISODE_ID', 'EVENT_ID', 'DEATHS_DIRECT', 'DEATHS_INDIRECT'])
    storm_data.index = pd.to_datetime(storm_data['BEGIN_DAY'].astype('str')+ '-' + storm_data['MONTH_NAME'] + '-' + storm_data.YEAR.astype('str'))
    storm_data = storm_data[['STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'EVENT_TYPE', 'EPISODE_ID']] #keep only relevant columns
    storm_data.dropna(inplace=True) #drop missing values
    storm_data.sort_index(inplace=True) #sort data by date
    storm_events.append(storm_data)
    #print(years)
STORMS = pd.concat([storm_events[0], storm_events[1]], axis=0)
for i in range(2, len(storm_events)):
    STORMS = pd.concat([STORMS, storm_events[i]], axis=0)
print(STORMS.shape)
df = STORMS
df['DAMAGE_PROPERTY'] = df['DAMAGE_PROPERTY'].astype('str')
df['DAMAGE_CROPS'] = df['DAMAGE_CROPS'].astype('str')
STORMS = df
STORMS = STORMS.loc[((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS =='0')) | ((STORMS.DAMAGE_PROPERTY=='0') & (STORMS.DAMAGE_CROPS !='0')) | ((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS !='0'))]
STORMS = STORMS.loc[STORMS.DAMAGE_PROPERTY!='K'] #delete rows that only contain "K"
STORMS = STORMS.loc[STORMS.DAMAGE_CROPS!='K']
STORMS = replace_string(STORMS, 'DAMAGE_PROPERTY')
STORMS = replace_string(STORMS, 'DAMAGE_CROPS')
STORMS['TOTAL DAMAGES'] = STORMS['DAMAGE_PROPERTY'] + STORMS['DAMAGE_CROPS']
STORMS['Date'] = STORMS.index
STORMS = STORMS[['Date', 'STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'EVENT_TYPE', 'EPISODE_ID']]
STORMS = STORMS.loc[STORMS['DAMAGE_PROPERTY']!=0]
df = STORMS.groupby(['STATE', 'Date']).agg({'DAMAGE_PROPERTY':sum})
States = list(STORMS.STATE.unique())

(708372, 5)
CPU times: user 16.8 s, sys: 2.21 s, total: 19 s
Wall time: 19.7 s
