In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import hvplot.pandas

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ChrisClass")

from scipy.stats import linregress
from citipy import citipy
from api_keys import bea_key

In [2]:
#Upload landfall and damage CSVs to notebook
landfall_df = pd.read_csv("Resources/landfall.csv")
damage_df = pd.read_csv("Resources/costliness.csv")

#Merge the two data frames into one
merged_df = pd.merge(landfall_df,damage_df,on = ['Season','Name'],how='outer')

#Drop rows with empty columns
merged_df = merged_df.dropna()

#Rename Season columns to Year
merged_df = merged_df.rename(columns={'Season':'Year'})

#Show dataframe preview
merged_df.head()


Unnamed: 0,#,Date,Year,Time,Latitude,Longitude,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),States Affected,Name,Category,Adjusted Costs
1,3.0,8/18/83,1983,0700Z,29.1,-95.1,100.0,3.0,10,962,CTX3,Alicia,3.0,$9.4B
5,5.0,9/2/85,1985,1300Z,30.4,-89.2,100.0,3.0,10,959,"AL3,MS3,AFL3",Elena,3.0,$3.8B
8,12.0,10/29/85,1985,1100Z,29.5,-91.4,75.0,1.0,15,971,LA1,Juan,1.0,$4.3B
15,11.0,9/22/89,1989,0400Z,32.8,-79.8,120.0,4.0,20,934,"SC4,INC1",Hugo,4.0,$22.7B
17,3.0,8/19/91,1991,1720Z,41.2,-71.6,90.0,2.0,25,962,"RI2,MA2,NY2,CT2",Bob,2.0,$3.5B


In [3]:
#Create lists to store states, counties, and coordinates.
counties = []
states = []
lat_lngs = []

#Convert Lat and Long to a string
merged_df['Longitude'] = merged_df['Longitude'].astype(str)
merged_df['Latitude'] = merged_df['Latitude'].astype(str)

#Add lattitude and longitude values to list
lat_lngs = merged_df[['Latitude','Longitude']].values.tolist()

#Loop through lat_lng list to pull county and state names with geopy  
for lat_lng in lat_lngs:
    try:
        county = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        county = county.raw['address']['county']
        counties.append(county)
        state = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        state = state.raw['address']['state']
        states.append(state)
    #To handle errors if data isn't found
    except:
        counties.append('')
        states.append('')

#Add county and state names to dataframe and remove 'county/parish' from string
merged_df['County'] = counties
merged_df['State'] = states
merged_df['County'] = merged_df['County'].str.replace(' County','')
merged_df['County'] = merged_df['County'].str.replace(' Parish','')

#Dictionary to store State names and abb.
state_to_abbrev = {"Alabama": "AL","Alaska": "AK","Arizona": "AZ","Arkansas": "AR","California": "CA",
    "Colorado": "CO","Connecticut": "CT","Delaware": "DE","Florida": "FL","Georgia": "GA",
    "Hawaii": "HI","Idaho": "ID","Illinois": "IL","Indiana": "IN","Iowa": "IA",
    "Kansas": "KS","Kentucky": "KY","Louisiana": "LA","Maine": "ME","Maryland": "MD",
    "Massachusetts": "MA","Michigan": "MI","Minnesota": "MN","Mississippi": "MS","Missouri": "MO",
    "Montana": "MT","Nebraska": "NE","Nevada": "NV","New Hampshire": "NH","New Jersey": "NJ",
    "New Mexico": "NM","New York": "NY","North Carolina": "NC","North Dakota": "ND","Ohio": "OH",
    "Oklahoma": "OK","Oregon": "OR","Pennsylvania": "PA","Rhode Island": "RI","South Carolina": "SC",
    "South Dakota": "SD","Tennessee": "TN","Texas": "TX","Utah": "UT","Vermont": "VT",
    "Virginia": "VA","Washington": "WA","West Virginia": "WV","Wisconsin": "WI","Wyoming": "WY","District of Columbia": "DC"}

#Replace State Names with codes
merged_df['State'] = merged_df['State'].replace(state_to_abbrev)
merged_df.head()

Unnamed: 0,#,Date,Year,Time,Latitude,Longitude,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),States Affected,Name,Category,Adjusted Costs,County,State
1,3.0,8/18/83,1983,0700Z,29.1,-95.1,100.0,3.0,10,962,CTX3,Alicia,3.0,$9.4B,Galveston,TX
5,5.0,9/2/85,1985,1300Z,30.4,-89.2,100.0,3.0,10,959,"AL3,MS3,AFL3",Elena,3.0,$3.8B,Harrison,MS
8,12.0,10/29/85,1985,1100Z,29.5,-91.4,75.0,1.0,15,971,LA1,Juan,1.0,$4.3B,St. Mary,LA
15,11.0,9/22/89,1989,0400Z,32.8,-79.8,120.0,4.0,20,934,"SC4,INC1",Hugo,4.0,$22.7B,Charleston,SC
17,3.0,8/19/91,1991,1720Z,41.2,-71.6,90.0,2.0,25,962,"RI2,MA2,NY2,CT2",Bob,2.0,$3.5B,South,RI


In [4]:
#Save new csv to avoid long run time of above code 
merged_df.to_csv('Resources/merged_df.csv',index=False)

In [5]:
#Upload new merged csv to notebook
hurricanes_df = pd.read_csv('Resources/merged_df.csv')
hurricanes_df.head()

Unnamed: 0,#,Date,Year,Time,Latitude,Longitude,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),States Affected,Name,Category,Adjusted Costs,County,State
0,3.0,8/18/83,1983,0700Z,29.1,-95.1,100.0,3.0,10,962,CTX3,Alicia,3.0,$9.4B,Galveston,TX
1,5.0,9/2/85,1985,1300Z,30.4,-89.2,100.0,3.0,10,959,"AL3,MS3,AFL3",Elena,3.0,$3.8B,Harrison,MS
2,12.0,10/29/85,1985,1100Z,29.5,-91.4,75.0,1.0,15,971,LA1,Juan,1.0,$4.3B,St. Mary,LA
3,11.0,9/22/89,1989,0400Z,32.8,-79.8,120.0,4.0,20,934,"SC4,INC1",Hugo,4.0,$22.7B,Charleston,SC
4,3.0,8/19/91,1991,1720Z,41.2,-71.6,90.0,2.0,25,962,"RI2,MA2,NY2,CT2",Bob,2.0,$3.5B,South,RI


In [6]:
#Upload HPI csv to dataframe
hpi_df = pd.read_csv('Resources/hpi_county.csv')

#Format 'year' and 'FIPS code' as string add forward 0 back to the 'FIPS code'
hpi_df['Year'] = hpi_df['Year'].astype(str)
hpi_df['FIPS code'] = hpi_df['FIPS code'].astype(str)
hpi_df['FIPS code'] = hpi_df['FIPS code'].apply(lambda x: x.zfill(5))

#Replace empty '.' with 'NaN
hpi_df.replace('.',"NaN",inplace=True)

#Drop unnecessary columns and format 'Annual Change(%)'
hpi_df = hpi_df.drop(['HPI with 1990 base','HPI with 2000 base'], axis=1)
hpi_df = hpi_df.rename(columns={'Annual Change (%)': 'Annual HPI Change (%)'})
hpi_df

Unnamed: 0,State,County,FIPS code,Year,Annual HPI Change (%),HPI
0,AL,Autauga,01001,1986,,100
1,AL,Autauga,01001,1987,-1.94,98.06
2,AL,Autauga,01001,1988,2.57,100.58
3,AL,Autauga,01001,1989,4.32,104.92
4,AL,Autauga,01001,1990,-0.29,104.62
...,...,...,...,...,...,...
100427,WY,Weston,56045,2019,8.33,223.4
100428,WY,Weston,56045,2020,4.36,233.13
100429,WY,Weston,56045,2021,4.93,244.62
100430,WY,Weston,56045,2022,6.87,261.41


In [7]:
#Create variables for the API call
base_url = 'https://apps.bea.gov/api/data'
year_range = '1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023'

#Create URL to Bureau of Economic Analysis for per capita income by county by year
county_url = f'{base_url}?&UserID={bea_key}&method=GetData&datasetname=Regional&TableName=CAINC1&LineCode=1&Year={year_range}&GeoFips=COUNTY&ResultFormat=json'
#Pull the response into the notebook
county_json = requests.get(county_url).json()

#Display the result as a dataframe with 'json_normalize()' using only relevant columns 
json_df = pd.json_normalize(county_json['BEAAPI']['Results']['Data'])
income_df = json_df[['GeoFips','GeoName','TimePeriod','DataValue']]
#Format columns to match 'hpi_df' dataframe so that it can be merged later.
income_df = income_df.rename(columns={'GeoFips':'FIPS code','TimePeriod':'Year','GeoName':'County & State','DataValue':'PerCapita Income'})
#Convert new 'Year' column to string 
income_df['Year'] = income_df['Year'].astype(str)
#Convert new 'PerCapita Income' column to a float and divide by 100 to get dollar value
income_df['PerCapita Income'] = income_df['PerCapita Income'].astype(float)
income_df['PerCapita Income'] = income_df['PerCapita Income']/100
income_df

Unnamed: 0,FIPS code,County & State,Year,PerCapita Income
0,01001,"Autauga, AL",2005,14407.95
1,01001,"Autauga, AL",1989,5031.59
2,01001,"Autauga, AL",2016,22224.48
3,01001,"Autauga, AL",2012,19535.00
4,01001,"Autauga, AL",2001,11061.94
...,...,...,...,...
135015,56045,"Weston, WY",2009,2762.29
135016,56045,"Weston, WY",1988,1056.31
135017,56045,"Weston, WY",1999,1665.72
135018,56045,"Weston, WY",2010,2796.93


In [8]:
#Merge income and hpi datasets
income_hpi_df = pd.merge(hpi_df,income_df,on = ['FIPS code','Year'],how='outer')
#Drop NaN values from 'HPI' and 'PerCapita Income'
income_hpi_df = income_hpi_df.dropna(subset=['HPI','PerCapita Income'])
income_hpi_df

Unnamed: 0,State,County,FIPS code,Year,Annual HPI Change (%),HPI,County & State,PerCapita Income
6,AL,Autauga,01001,1986,,100,"Autauga, AL",3906.85
7,AL,Autauga,01001,1987,-1.94,98.06,"Autauga, AL",4242.53
8,AL,Autauga,01001,1988,2.57,100.58,"Autauga, AL",4656.42
9,AL,Autauga,01001,1989,4.32,104.92,"Autauga, AL",5031.59
10,AL,Autauga,01001,1990,-0.29,104.62,"Autauga, AL",5318.09
...,...,...,...,...,...,...,...,...
143045,WY,Weston,56045,2018,-0.9,206.21,"Weston, WY",3102.31
143046,WY,Weston,56045,2019,8.33,223.4,"Weston, WY",3160.01
143047,WY,Weston,56045,2020,4.36,233.13,"Weston, WY",3399.07
143048,WY,Weston,56045,2021,4.93,244.62,"Weston, WY",3365.03


In [9]:
#Get the years around eachrow in the dataframe wit the shift function
income_hpi_df['Year -2'] = income_hpi_df.groupby('State')['Year'].shift(2)
income_hpi_df['Year -1'] = income_hpi_df.groupby('State')['Year'].shift(1)
income_hpi_df['Year +1'] = income_hpi_df.groupby('State')['Year'].shift(-1)
income_hpi_df['Year +2'] = income_hpi_df.groupby('State')['Year'].shift(-2)
income_hpi_df['Year +3'] = income_hpi_df.groupby('State')['Year'].shift(-3)


#Put the values for Annual HPI chnages in the surrounding years into the same row
income_hpi_df['Annual HPI Change (%) -1'] = income_hpi_df.groupby('State')['Annual HPI Change (%)'].shift(1)
income_hpi_df['Annual HPI Change (%) +1'] = income_hpi_df.groupby('State')['Annual HPI Change (%)'].shift(-1)
income_hpi_df['Annual HPI Change (%) +2'] = income_hpi_df.groupby('State')['Annual HPI Change (%)'].shift(-2)
income_hpi_df['Annual HPI Change (%) +3'] = income_hpi_df.groupby('State')['Annual HPI Change (%)'].shift(-3)

#Put the values for 'PerCapita Income' in the surrounding years into the same row
income_hpi_df['PerCapita Income -2'] = income_hpi_df.groupby('State')['PerCapita Income'].shift(2)
income_hpi_df['PerCapita Income -1'] = income_hpi_df.groupby('State')['PerCapita Income'].shift(1)
income_hpi_df['PerCapita Income +1'] = income_hpi_df.groupby('State')['PerCapita Income'].shift(-1)
income_hpi_df['PerCapita Income +2'] = income_hpi_df.groupby('State')['PerCapita Income'].shift(-2)
income_hpi_df['PerCapita Income +3'] = income_hpi_df.groupby('State')['PerCapita Income'].shift(-3)

#Caluclate the difference in income between years and convert to percentage
income_hpi_df['Annual Income Change (%) -1'] = (income_hpi_df['PerCapita Income -1']-income_hpi_df['PerCapita Income -2'])/income_hpi_df['PerCapita Income -2']*100
income_hpi_df['Annual Income Change (%)'] = (income_hpi_df['PerCapita Income']-income_hpi_df['PerCapita Income -1'])/income_hpi_df['PerCapita Income -1']*100
income_hpi_df['Annual Income Change (%) +1'] = (income_hpi_df['PerCapita Income +1']-income_hpi_df['PerCapita Income'])/income_hpi_df['PerCapita Income']*100
income_hpi_df['Annual Income Change (%) +2'] = (income_hpi_df['PerCapita Income +2']-income_hpi_df['PerCapita Income +1'])/income_hpi_df['PerCapita Income +1']*100
income_hpi_df['Annual Income Change (%) +3'] = (income_hpi_df['PerCapita Income +3']-income_hpi_df['PerCapita Income +2'])/income_hpi_df['PerCapita Income +2']*100

#Replace all 'inf' calues with 'NaN'
income_hpi_df.replace([np.inf, -np.inf], np.nan, inplace=True)
income_hpi_df

Unnamed: 0,State,County,FIPS code,Year,Annual HPI Change (%),HPI,County & State,PerCapita Income,Year -2,Year -1,...,PerCapita Income -2,PerCapita Income -1,PerCapita Income +1,PerCapita Income +2,PerCapita Income +3,Annual Income Change (%) -1,Annual Income Change (%),Annual Income Change (%) +1,Annual Income Change (%) +2,Annual Income Change (%) +3
6,AL,Autauga,01001,1986,,100,"Autauga, AL",3906.85,,,...,,,4242.53,4656.42,5031.59,,,8.592088,9.755735,8.057048
7,AL,Autauga,01001,1987,-1.94,98.06,"Autauga, AL",4242.53,,1986,...,,3906.85,4656.42,5031.59,5318.09,,8.592088,9.755735,8.057048,5.694025
8,AL,Autauga,01001,1988,2.57,100.58,"Autauga, AL",4656.42,1986,1987,...,3906.85,4242.53,5031.59,5318.09,5750.36,8.592088,9.755735,8.057048,5.694025,8.128294
9,AL,Autauga,01001,1989,4.32,104.92,"Autauga, AL",5031.59,1987,1988,...,4242.53,4656.42,5318.09,5750.36,6138.12,9.755735,8.057048,5.694025,8.128294,6.743230
10,AL,Autauga,01001,1990,-0.29,104.62,"Autauga, AL",5318.09,1988,1989,...,4656.42,5031.59,5750.36,6138.12,6537.96,8.057048,5.694025,8.128294,6.743230,6.514047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143045,WY,Weston,56045,2018,-0.9,206.21,"Weston, WY",3102.31,2016,2017,...,3030.54,2979.32,3160.01,3399.07,3365.03,-1.690128,4.128123,1.859904,7.565166,-1.001450
143046,WY,Weston,56045,2019,8.33,223.4,"Weston, WY",3160.01,2017,2018,...,2979.32,3102.31,3399.07,3365.03,3497.68,4.128123,1.859904,7.565166,-1.001450,3.942015
143047,WY,Weston,56045,2020,4.36,233.13,"Weston, WY",3399.07,2018,2019,...,3102.31,3160.01,3365.03,3497.68,,1.859904,7.565166,-1.001450,3.942015,
143048,WY,Weston,56045,2021,4.93,244.62,"Weston, WY",3365.03,2019,2020,...,3160.01,3399.07,3497.68,,,7.565166,-1.001450,3.942015,,


In [10]:
#Save combined hpi and income df to directory
income_hpi_df.to_csv('Output_CSVs/hpi_income_df.csv',index=False)

In [11]:
#Re-upload hpi csv to notebook
hpi_income_df_csv = pd.read_csv('Output_CSVs/hpi_income_df.csv')
#Convert 'FIPS code' to string again and add leading 0 back
hpi_income_df_csv['FIPS code'] = hpi_income_df_csv['FIPS code'].astype(str)
hpi_income_df_csv['PerCapita Income'] = hpi_income_df_csv['PerCapita Income'].astype(float)
hpi_income_df_csv['FIPS code'] = hpi_income_df_csv['FIPS code'].apply(lambda x: x.zfill(5))
hpi_income_df_csv.head()

Unnamed: 0,State,County,FIPS code,Year,Annual HPI Change (%),HPI,County & State,PerCapita Income,Year -2,Year -1,...,PerCapita Income -2,PerCapita Income -1,PerCapita Income +1,PerCapita Income +2,PerCapita Income +3,Annual Income Change (%) -1,Annual Income Change (%),Annual Income Change (%) +1,Annual Income Change (%) +2,Annual Income Change (%) +3
0,AL,Autauga,1001,1986,,100.0,"Autauga, AL",3906.85,,,...,,,4242.53,4656.42,5031.59,,,8.592088,9.755735,8.057048
1,AL,Autauga,1001,1987,-1.94,98.06,"Autauga, AL",4242.53,,1986.0,...,,3906.85,4656.42,5031.59,5318.09,,8.592088,9.755735,8.057048,5.694025
2,AL,Autauga,1001,1988,2.57,100.58,"Autauga, AL",4656.42,1986.0,1987.0,...,3906.85,4242.53,5031.59,5318.09,5750.36,8.592088,9.755735,8.057048,5.694025,8.128294
3,AL,Autauga,1001,1989,4.32,104.92,"Autauga, AL",5031.59,1987.0,1988.0,...,4242.53,4656.42,5318.09,5750.36,6138.12,9.755735,8.057048,5.694025,8.128294,6.74323
4,AL,Autauga,1001,1990,-0.29,104.62,"Autauga, AL",5318.09,1988.0,1989.0,...,4656.42,5031.59,5750.36,6138.12,6537.96,8.057048,5.694025,8.128294,6.74323,6.514047


In [12]:
#Create an aggregate table to determine mean by year and by nation for HPI
by_nation_hpi = hpi_income_df_csv.groupby(['Year'])
hpi_change_by_nation = by_nation_hpi['Annual HPI Change (%)'].agg(['mean'])
hpi_change_by_nation.head()


Unnamed: 0_level_0,mean
Year,Unnamed: 1_level_1
1980,7.590396
1981,4.567931
1982,0.715065
1983,7.350132
1984,3.630926


In [13]:
#Create lists to store the mean values for the surrounding years
prev_mean_national = []
mean_national_1 = []
mean_national_2 = []
mean_national_3 = []

#Use for loops to get the value for the mean in the year prior and the three years following
for index in hpi_change_by_nation.index:
    try:
        value = hpi_change_by_nation.at[index-1,'mean']
        prev_mean_national.append(value)
    except:
        prev_mean_national.append('NaN')

for index in hpi_change_by_nation.index:
    try:
        value = hpi_change_by_nation.at[index+1,'mean']
        mean_national_1.append(value)
    except:
        mean_national_1.append('NaN')

for index in hpi_change_by_nation.index:
    try:
        value = hpi_change_by_nation.at[index+2,'mean']
        mean_national_2.append(value)
    except:
        mean_national_2.append('NaN')

for index in hpi_change_by_nation.index:
    try:
        value = hpi_change_by_nation.at[index+3,'mean']
        mean_national_3.append(value)
    except:
        mean_national_3.append('NaN')

#Append hpi mean dataframe with the surrounding year values in the same rows
hpi_change_by_nation['National HPI AC Mean -1'] = prev_mean_national
hpi_change_by_nation['National HPI AC Mean +1'] = mean_national_1
hpi_change_by_nation['National HPI AC Mean +2'] = mean_national_2
hpi_change_by_nation['National HPI AC Mean +3'] = mean_national_3
hpi_change_by_nation.head()

Unnamed: 0_level_0,mean,National HPI AC Mean -1,National HPI AC Mean +1,National HPI AC Mean +2,National HPI AC Mean +3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980,7.590396,,4.567931,0.715065,7.350132
1981,4.567931,7.590396,0.715065,7.350132,3.630926
1982,0.715065,4.567931,7.350132,3.630926,3.304653
1983,7.350132,0.715065,3.630926,3.304653,4.337647
1984,3.630926,7.350132,3.304653,4.337647,3.272305


In [14]:
#Create an aggregate table to determine mean by year and by state for HPI
by_state_hpi = hpi_income_df_csv.groupby(['State','Year'])
hpi_change_by_state = by_state_hpi['Annual HPI Change (%)'].agg(['mean'])
hpi_change_by_state = hpi_change_by_state.reset_index(level=1)
hpi_change_by_state.head()

Unnamed: 0_level_0,Year,mean
State,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,1980,4.775
AK,1981,15.595
AK,1982,12.5
AK,1983,8.11
AK,1984,11.97


In [15]:
#Use shift function to get the values of the surrounding years into the same row
hpi_change_by_state['State HPI AC Mean -1'] = hpi_change_by_state.groupby('State')['mean'].shift(1)
hpi_change_by_state['State HPI AC Mean +1'] = hpi_change_by_state.groupby('State')['mean'].shift(-1)
hpi_change_by_state['State HPI AC Mean +2'] = hpi_change_by_state.groupby('State')['mean'].shift(-2)
hpi_change_by_state['State HPI AC Mean +3'] = hpi_change_by_state.groupby('State')['mean'].shift(-3)
hpi_change_by_state.head()

Unnamed: 0_level_0,Year,mean,State HPI AC Mean -1,State HPI AC Mean +1,State HPI AC Mean +2,State HPI AC Mean +3
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,1980,4.775,,15.595,12.5,8.11
AK,1981,15.595,4.775,12.5,8.11,11.97
AK,1982,12.5,15.595,8.11,11.97,-5.578
AK,1983,8.11,12.5,11.97,-5.578,-2.798571
AK,1984,11.97,8.11,-5.578,-2.798571,-13.908571


In [16]:
#Create an aggregate table to determine mean by year by state and by nation for income
by_nation_income = hpi_income_df_csv.groupby(['Year'])
income_change_by_nation = by_nation_hpi['Annual Income Change (%)'].agg(['mean'])
income_change_by_nation.head()


Unnamed: 0_level_0,mean
Year,Unnamed: 1_level_1
1980,51.906652
1981,12.02022
1982,5.586015
1983,5.345213
1984,8.89708


In [17]:
#Create lists to store the mean values for the surrounding years
prev_mean_income_national = []
mean_income_national_1 = []
mean_income_national_2 = []
mean_income_national_3 = []

#Use for loops to get the value for the mean in the year prior and the three years following
for index in income_change_by_nation.index:
    try:
        value = income_change_by_nation.at[index-1,'mean']
        prev_mean_income_national.append(value)
    except:
        prev_mean_income_national.append('NaN')

for index in income_change_by_nation.index:
    try:
        value = income_change_by_nation.at[index+1,'mean']
        mean_income_national_1.append(value)
    except:
        mean_income_national_1.append('NaN')

for index in income_change_by_nation.index:
    try:
        value = income_change_by_nation.at[index+2,'mean']
        mean_income_national_2.append(value)
    except:
        mean_income_national_2.append('NaN')

for index in income_change_by_nation.index:
    try:
        value = income_change_by_nation.at[index+3,'mean']
        mean_income_national_3.append(value)
    except:
        mean_income_national_3.append('NaN')

#Append hpi mean dataframe with the surrounding year values in the same rows
income_change_by_nation['National Income AC Mean -1'] = prev_mean_income_national
income_change_by_nation['National Income AC Mean +1'] = mean_income_national_1
income_change_by_nation['National Income AC Mean +2'] = mean_income_national_2
income_change_by_nation['National Income AC Mean +3'] = mean_income_national_2
income_change_by_nation.head()

Unnamed: 0_level_0,mean,National Income AC Mean -1,National Income AC Mean +1,National Income AC Mean +2,National Income AC Mean +3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980,51.906652,,12.02022,5.586015,5.586015
1981,12.02022,51.906652,5.586015,5.345213,5.345213
1982,5.586015,12.02022,5.345213,8.89708,8.89708
1983,5.345213,5.586015,8.89708,0.077288,0.077288
1984,8.89708,5.345213,0.077288,-11.240121,-11.240121


In [18]:
#Create an aggregate table to determine mean by year and by state for income
by_state_income = hpi_income_df_csv.groupby(['State','Year'])
income_change_by_state = by_state_hpi['Annual Income Change (%)'].agg(['mean'])
income_change_by_state = income_change_by_state.reset_index(level=1)
income_change_by_state.head()


Unnamed: 0_level_0,Year,mean
State,Unnamed: 1_level_1,Unnamed: 2_level_1
AK,1980,-94.987654
AK,1981,14.827293
AK,1982,22.588021
AK,1983,8.174108
AK,1984,-19.523726


In [19]:
#Use shift function to get the values of the surrounding years into the same row
income_change_by_state['State Income AC Mean -1'] = income_change_by_state.groupby('State')['mean'].shift(1)
income_change_by_state['State Income AC Mean +1'] = income_change_by_state.groupby('State')['mean'].shift(-1)
income_change_by_state['State Income AC Mean +2'] = income_change_by_state.groupby('State')['mean'].shift(-2)
income_change_by_state['State Income AC Mean +3'] = income_change_by_state.groupby('State')['mean'].shift(-3)
income_change_by_state.head()

Unnamed: 0_level_0,Year,mean,State Income AC Mean -1,State Income AC Mean +1,State Income AC Mean +2,State Income AC Mean +3
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,1980,-94.987654,,14.827293,22.588021,8.174108
AK,1981,14.827293,-94.987654,22.588021,8.174108,-19.523726
AK,1982,22.588021,14.827293,8.174108,-19.523726,-20.772632
AK,1983,8.174108,22.588021,-19.523726,-20.772632,-7.458003
AK,1984,-19.523726,8.174108,-20.772632,-7.458003,-3.921448


In [20]:
#Add the state and national data for both income and HPI to the main dataframe 
merged_df = pd.merge(hpi_income_df_csv,hpi_change_by_nation,on = ['Year'],how='outer')
merged_df2 = pd.merge(merged_df,hpi_change_by_state,on=['Year','State'],how='outer')
clean_new_hpi_income_df = pd.merge(merged_df2,income_change_by_state,on=['Year','State'],how='outer')
clean_new_hpi_income_df = clean_new_hpi_income_df.rename(columns={'mean_x':'National HPI AC Mean','median_x':'National HPI AC Median',
                                          'mean_y':'State HPI AC Mean','median_y':'State HPI AC Median','mean':'State Income AC Mean'})
#Display dataframe
clean_new_hpi_income_df.head()


Unnamed: 0,State,County,FIPS code,Year,Annual HPI Change (%),HPI,County & State,PerCapita Income,Year -2,Year -1,...,State HPI AC Mean,State HPI AC Mean -1,State HPI AC Mean +1,State HPI AC Mean +2,State HPI AC Mean +3,State Income AC Mean,State Income AC Mean -1,State Income AC Mean +1,State Income AC Mean +2,State Income AC Mean +3
0,AK,Anchorage,2020,1980,0.39,148.12,"Anchorage Municipality, AK",29726.12,,,...,4.775,,15.595,12.5,8.11,-94.987654,,14.827293,22.588021,8.174108
1,AK,Fairbanks North Star,2090,1980,,,"Fairbanks North Star Borough, AK",8266.35,2021.0,2022.0,...,4.775,,15.595,12.5,8.11,-94.987654,,14.827293,22.588021,8.174108
2,AK,Juneau,2110,1980,9.16,109.16,"Juneau City and Borough, AK",4015.59,2021.0,2022.0,...,4.775,,15.595,12.5,8.11,-94.987654,,14.827293,22.588021,8.174108
3,AL,Baldwin,1003,1980,18.11,124.78,"Baldwin, AL",6159.85,2021.0,2022.0,...,6.905833,,2.655,0.711,5.480909,-2.107017,,11.962762,5.941885,8.336215
4,AL,Calhoun,1015,1980,3.95,131.07,"Calhoun, AL",9730.8,2021.0,2022.0,...,6.905833,,2.655,0.711,5.480909,-2.107017,,11.962762,5.941885,8.336215


In [21]:
#Merge HPI data with hurricane data
hurricane_hpi_income_df = pd.merge(hurricanes_df,clean_new_hpi_income_df,on=['Year','County','State'],how='inner')
hurricane_hpi_income_df.head()

Unnamed: 0,#,Date,Year,Time,Latitude,Longitude,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),...,State HPI AC Mean,State HPI AC Mean -1,State HPI AC Mean +1,State HPI AC Mean +2,State HPI AC Mean +3,State Income AC Mean,State Income AC Mean -1,State Income AC Mean +1,State Income AC Mean +2,State Income AC Mean +3
0,3.0,8/18/83,1983,0700Z,29.1,-95.1,100.0,3.0,10,962,...,9.237719,3.585926,1.364219,0.365455,0.445857,3.167452,6.871159,7.170353,5.500886,-10.866751
1,5.0,9/2/85,1985,1300Z,30.4,-89.2,100.0,3.0,10,959,...,6.701818,2.293636,0.5,2.166957,-1.587826,-1.581336,9.094874,-21.538147,5.288352,7.150734
2,11.0,9/22/89,1989,0400Z,32.8,-79.8,120.0,4.0,20,934,...,3.666923,3.210769,3.289259,2.433704,2.361034,8.27955,9.064073,7.272029,-0.987816,-7.645857
3,4.0,8/24/92,1992,0905Z,25.5,-80.3,145.0,5.0,10,922,...,2.35875,1.079,2.001818,0.763191,3.318936,-0.215606,4.983357,-0.944497,6.112222,3.75215
4,17.0,10/4/95,1995,2200Z,30.3,-87.1,100.0,3.0,50,942,...,3.318936,0.763191,3.6388,3.337358,4.533208,3.75215,6.112222,1.154737,6.397696,2.340026


In [25]:
#Make a new dataframe with only relevant columns
#clean_hurricane_hpi_income_df = hurricane_hpi_income_df[['Name','Max Winds (kt)','Adjusted Costs','FIPS code','County','State',
#                        'Year -1','Year','Year +1','Year +2',
#                        'Annual Change (%) -1','Annual Change (%)','Annual Change (%) +1','Annual Change (%) +2',
#                        'HPI -1','HPI','HPI +1','HPI +2',
#                        'National AC Mean -1','National AC Mean','National AC Mean +1','National AC Mean +2',
#                        'State AC Mean -1','State AC Mean','State AC Mean +1','State AC Mean +2',
#                        'PerCapita Income -1','PerCapita Income','PerCapita Income +1','PerCapita Income +2']]
clean_hurricane_hpi_income_df = clean_hurricane_hpi_income_df.dropna()
clean_hurricane_hpi_income_df

Unnamed: 0,#,Date,Year,Time,Latitude,Longitude,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),...,State HPI AC Mean,State HPI AC Mean -1,State HPI AC Mean +1,State HPI AC Mean +2,State HPI AC Mean +3,State Income AC Mean,State Income AC Mean -1,State Income AC Mean +1,State Income AC Mean +2,State Income AC Mean +3
0,3.0,8/18/83,1983,0700Z,29.1,-95.1,100.0,3.0,10,962,...,9.237719,3.585926,1.364219,0.365455,0.445857,3.167452,6.871159,7.170353,5.500886,-10.866751
1,5.0,9/2/85,1985,1300Z,30.4,-89.2,100.0,3.0,10,959,...,6.701818,2.293636,0.5,2.166957,-1.587826,-1.581336,9.094874,-21.538147,5.288352,7.150734
2,11.0,9/22/89,1989,0400Z,32.8,-79.8,120.0,4.0,20,934,...,3.666923,3.210769,3.289259,2.433704,2.361034,8.27955,9.064073,7.272029,-0.987816,-7.645857
3,4.0,8/24/92,1992,0905Z,25.5,-80.3,145.0,5.0,10,922,...,2.35875,1.079,2.001818,0.763191,3.318936,-0.215606,4.983357,-0.944497,6.112222,3.75215
4,17.0,10/4/95,1995,2200Z,30.3,-87.1,100.0,3.0,50,942,...,3.318936,0.763191,3.6388,3.337358,4.533208,3.75215,6.112222,1.154737,6.397696,2.340026
5,6.0,9/6/96,1996,0030Z,33.9,-78.0,100.0,3.0,40,954,...,3.822892,5.911772,4.788235,4.212069,3.728211,5.079539,3.000018,4.977479,-1.332694,2.533678
6,7.0,9/25/98,1998,1530Z,24.5,-81.8,90.0,2.0,25,981,...,4.533208,3.337358,3.443036,5.841429,8.747679,2.340026,6.397696,4.581628,7.039608,2.332215
7,7.0,9/28/98,1998,1130Z,30.4,-88.9,90.0,2.0,80,964,...,4.278913,4.939348,5.001818,3.338909,4.560893,-1.442141,5.786835,3.239284,3.263067,-1.800727
8,8.0,9/16/99,1999,0630Z,33.8,-78.0,90.0,2.0,35,956,...,3.728211,4.212069,5.468351,5.11398,3.062525,2.533678,-1.332694,5.065094,3.979576,0.627918
9,13.0,9/18/03,2003,1700Z,34.9,-76.2,90.0,2.0,40,957,...,2.179596,3.062525,5.103434,7.498889,8.108687,3.708739,0.627918,7.323197,6.03353,5.76207


In [26]:
#Save hurricanes df to directory
hurricanes_df.to_csv('Output_CSVs/hurricanes_df.csv',index=False)

#Save Merged df to directory
clean_new_hpi_income_df.to_csv('Output_CSVs/clean_new_hpi_income.csv',index=False)

#Save clean df as CSV to directory
clean_hurricane_hpi_income_df.to_csv('Output_CSVs/clean_hurricane_hpi_income_data.csv',index=False)



hurricanes_df = Total Data for hurricane strikes in the US

hpi_df = Total HPI data with new columns for every county

clean_hurricane_hpi_df = Comined previous two data frames. All hurricane strikes with HPI data.