In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress



In [3]:
#Filtering down columns
#Disaster Decloration Summaries - disasterNumber, state, declarationType, fyDeclared, incidentType,  
#Hazard Mitigation Assistance Projects - disasterNumber, projectType, numberOfProperties, numberOfFinalProperties, projectAmount, federalShareObligated, benefitCostRatio, netValueBenefits
#Hazard Mitigation Grant Program Disaster Summaries - disasterNumber and obligatedTotalAmount

In [4]:
#Disaster Decloration Summaries - disasterNumber, state, declarationType, fyDeclared, incidentType,  
disaster_sum = pd.read_csv('DisasterDeclarationsSummaries.csv')
disaster_sum_clean = disaster_sum[['disasterNumber', 'state', 'declarationType', 'fyDeclared', 'incidentType']]
disaster_sum_clean.head()

Unnamed: 0,disasterNumber,state,declarationType,fyDeclared,incidentType
0,1,GA,DR,1953,Tornado
1,4,MI,DR,1953,Tornado
2,3,LA,DR,1953,Flood
3,6,MI,DR,1953,Tornado
4,2,TX,DR,1953,Tornado


In [5]:
#Hazard Mitigation Assistance Projects - disasterNumber, projectType, numberOfProperties, numberOfFinalProperties, projectAmount, federalShareObligated, benefitCostRatio, netValueBenefits
haz_project = pd.read_csv('HazardMitigationAssistanceProjects.csv')
haz_project_clean = haz_project[['disasterNumber', 'projectType', 'numberOfProperties', 'numberOfFinalProperties', 'projectAmount', 'federalShareObligated', 'benefitCostRatio', 'netValueBenefits']]
haz_project_clean.head()


Unnamed: 0,disasterNumber,projectType,numberOfProperties,numberOfFinalProperties,projectAmount,federalShareObligated,benefitCostRatio,netValueBenefits
0,1971.0,206.1: Safe Room (Tornado and Severe Wind Shel...,5,4,31477.0,20086.0,1.371,21420.0
1,1971.0,206.1: Safe Room (Tornado and Severe Wind Shel...,4,4,19032.0,13650.0,1.371,21420.0
2,1971.0,206.2: Safe Room (Tornado and Severe Wind Shel...,1,0,769334.0,577000.0,4.46,3181553.0
3,1971.0,601.1: Generators,0,0,42578.0,31934.0,0.0,0.0
4,1971.0,206.2: Safe Room (Tornado and Severe Wind Shel...,1,1,143899.0,107924.0,3.097,500916.0


In [6]:
#grouby disasterNumber
haz_project_clean_gr = haz_project_clean.groupby('disasterNumber').sum()
haz_project_clean_gr.head()

Unnamed: 0_level_0,numberOfProperties,numberOfFinalProperties,projectAmount,federalShareObligated,benefitCostRatio,netValueBenefits
disasterNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
820.0,1,1,292000.0,87769.0,2.0,292000.0
821.0,17,16,1092740.0,542043.0,0.0,0.0
822.0,0,0,406213.0,206105.0,6.406,7865253.0
824.0,7,7,491845.0,239923.0,4.0,351845.0
825.0,0,0,381741.0,122871.0,4.0,366741.0


In [7]:
#Hazard Mitigation Grant Program Disaster Summaries
hazard_sum = pd.read_csv('HazardMitigationGrantProgramDisasterSummaries.csv')
hazard_sum_clean = hazard_sum[['disasterNumber', 'obligatedTotalAmount']]
hazard_sum_clean.head()


Unnamed: 0,disasterNumber,obligatedTotalAmount
0,1071,2834575.0
1,1337,782081.0
2,1326,333150.0
3,1506,1791043.0
4,1211,413384.0


In [8]:
#merge Disaster Decloration Summaries and Hazard Mitigation Assistance Projects
disaster_projects = pd.merge(haz_project_clean_gr, disaster_sum_clean, on='disasterNumber')
disaster_projects.head()

Unnamed: 0,disasterNumber,numberOfProperties,numberOfFinalProperties,projectAmount,federalShareObligated,benefitCostRatio,netValueBenefits,state,declarationType,fyDeclared,incidentType
0,820.0,1,1,292000.0,87769.0,2.0,292000.0,UT,DR,1989,Flood
1,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood
2,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood
3,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood
4,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood


In [9]:
len(disaster_projects)

27575

In [10]:
df = pd.merge(disaster_projects, hazard_sum_clean, on='disasterNumber')
df.head()

Unnamed: 0,disasterNumber,numberOfProperties,numberOfFinalProperties,projectAmount,federalShareObligated,benefitCostRatio,netValueBenefits,state,declarationType,fyDeclared,incidentType,obligatedTotalAmount
0,820.0,1,1,292000.0,87769.0,2.0,292000.0,UT,DR,1989,Flood,95048.0
1,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
2,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
3,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
4,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0


In [11]:
len(df)

27501

In [12]:
clean_data = df.dropna(inplace=False)
clean_data.head()

Unnamed: 0,disasterNumber,numberOfProperties,numberOfFinalProperties,projectAmount,federalShareObligated,benefitCostRatio,netValueBenefits,state,declarationType,fyDeclared,incidentType,obligatedTotalAmount
0,820.0,1,1,292000.0,87769.0,2.0,292000.0,UT,DR,1989,Flood,95048.0
1,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
2,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
3,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0
4,821.0,17,16,1092740.0,542043.0,0.0,0.0,KY,DR,1989,Flood,579052.0


In [13]:
len(clean_data)

27501