In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import hvplot.pandas
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ChrisClass")
from scipy.stats import linregress
from citipy import citipy

In [2]:
#Upload landfall csv to notebook
landfall_df = pd.read_csv("Resources/landfall.csv")
#Upload costliest hurricane csv to notebook
damage_df = pd.read_csv("Resources/costliness.csv")

In [3]:
#Create lists to store states, counties, and coordinates.
counties = []
states = []
lat_lngs = []

#Convert Lat and Long to aa string
landfall_df['Longitude'] = landfall_df['Longitude'].astype(str)
landfall_df['Latitude'] = landfall_df['Latitude'].astype(str)

#Add lattitude and longitude values to list
lat_lngs = landfall_df[['Latitude', 'Longitude']].values.tolist()

#Loop through lat_lng list to pull county and state names with geopy  
for lat_lng in lat_lngs:
    try:
        county = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        county = county.raw['address']['county']
        counties.append(county)
        state = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        state = state.raw['address']['state']
        states.append(state)
    #To handle errors if data isn't found
    except:
        counties.append('')
        states.append('')

#Add county and state names to dataframe
county_column = counties
landfall_df['County'] = county_column
states_column = states
landfall_df['State'] = states_column
landfall_df.head()

Unnamed: 0,#,Date,Season,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,Name,County,State
0,3,8/18/83,1983,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Alicia,Galveston County,Texas
1,10,9/13/84,1984,0700Z,33.9,33.9N,-78.0,78.0W,95,2,15,979,----,---,NC2,Diana,Brunswick County,North Carolina
2,2,7/25/85,1985,0300Z,32.2,32.2N,-80.5,80.5W,65,1,20,1003,----,---,SC1,Bob,,
3,4,8/15/85,1985,1630Z,29.6,29.6N,-92.7,92.7W,80,1,15,987,----,---,LA1,Danny,Cameron Parish,Louisiana
4,5,9/2/85,1985,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Elena,Harrison County,Mississippi


In [4]:
#Dictionary to store State names and abb.
state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

#Replace State Names with codes
landfall_df['State'] = landfall_df['State'].replace(state_to_abbrev)
landfall_df.head()

Unnamed: 0,#,Date,Season,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,Name,County,State
0,3,8/18/83,1983,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Alicia,Galveston County,TX
1,10,9/13/84,1984,0700Z,33.9,33.9N,-78.0,78.0W,95,2,15,979,----,---,NC2,Diana,Brunswick County,NC
2,2,7/25/85,1985,0300Z,32.2,32.2N,-80.5,80.5W,65,1,20,1003,----,---,SC1,Bob,,
3,4,8/15/85,1985,1630Z,29.6,29.6N,-92.7,92.7W,80,1,15,987,----,---,LA1,Danny,Cameron Parish,LA
4,5,9/2/85,1985,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Elena,Harrison County,MS


In [5]:
#Make a list to store cities and coordinates 
cities= []
lat_lngs2= []

#Convert Lat and Long to a float
landfall_df['Longitude'] = landfall_df['Longitude'].astype(float)
landfall_df['Latitude'] = landfall_df['Latitude'].astype(float)

#Add lattitude and longitude values to list
lat_lngs2 = landfall_df[['Latitude','Longitude']].values.tolist()

#Loop through lat_lng list to pull cities names with citipy  
for lat_lng2 in lat_lngs2:
    city = citipy.nearest_city(lat_lng2[0], lat_lng2[1]).city_name
    cities.append(city)

#Add city names to dataframe
landfall_df['City'] =cities
landfall_df.head()

Unnamed: 0,#,Date,Season,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,Name,County,State,City
0,3,8/18/83,1983,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Alicia,Galveston County,TX,hitchcock
1,10,9/13/84,1984,0700Z,33.9,33.9N,-78.0,78.0W,95,2,15,979,----,---,NC2,Diana,Brunswick County,NC,long beach
2,2,7/25/85,1985,0300Z,32.2,32.2N,-80.5,80.5W,65,1,20,1003,----,---,SC1,Bob,,,hilton head island
3,4,8/15/85,1985,1630Z,29.6,29.6N,-92.7,92.7W,80,1,15,987,----,---,LA1,Danny,Cameron Parish,LA,jennings
4,5,9/2/85,1985,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Elena,Harrison County,MS,long beach


In [6]:
#Remove "County" from county column
landfall_df['County'] = landfall_df['County'].str.replace(' County','')
landfall_df['County'] = landfall_df['County'].str.replace(' Parish','')
landfall_df.head()

Unnamed: 0,#,Date,Season,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,Name,County,State,City
0,3,8/18/83,1983,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Alicia,Galveston,TX,hitchcock
1,10,9/13/84,1984,0700Z,33.9,33.9N,-78.0,78.0W,95,2,15,979,----,---,NC2,Diana,Brunswick,NC,long beach
2,2,7/25/85,1985,0300Z,32.2,32.2N,-80.5,80.5W,65,1,20,1003,----,---,SC1,Bob,,,hilton head island
3,4,8/15/85,1985,1630Z,29.6,29.6N,-92.7,92.7W,80,1,15,987,----,---,LA1,Danny,Cameron,LA,jennings
4,5,9/2/85,1985,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Elena,Harrison,MS,long beach


In [7]:
#Merge the two data frames into one
merged_df = pd.merge(damage_df,landfall_df,on = ["Name","Season"])
#Show dataframe preview
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State,City
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,100,3,10,962,----,---,CTX3,Galveston,TX,hitchcock
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,...,75,1,15,971,----,---,LA1,St. Mary,LA,patterson
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison,MS,long beach
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,120,4,20,934,----,---,"SC4,INC1",Charleston,SC,mount pleasant
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,...,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South,RI,charlestown


In [8]:
#Rename season column to year
merged_df = merged_df.rename(columns={'Season':'Year'})
merged_df.head()

Unnamed: 0,Name,Year,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State,City
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,100,3,10,962,----,---,CTX3,Galveston,TX,hitchcock
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,...,75,1,15,971,----,---,LA1,St. Mary,LA,patterson
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison,MS,long beach
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,120,4,20,934,----,---,"SC4,INC1",Charleston,SC,mount pleasant
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,...,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South,RI,charlestown


In [9]:
#Upload HPI csv to dataframe
hpi_df = pd.read_csv('Resources/hpi_county.csv')
hpi_df.head()

Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base
0,AL,Autauga,1001,1986,.,100.0,95.59,70.96
1,AL,Autauga,1001,1987,-1.94,98.06,93.74,69.59
2,AL,Autauga,1001,1988,2.57,100.58,96.14,71.38
3,AL,Autauga,1001,1989,4.32,104.92,100.29,74.46
4,AL,Autauga,1001,1990,-0.29,104.62,100.0,74.24


In [10]:
previous_year = []
year_1 = []
year_2 = []
year_3 = []

for index in hpi_df.index:
    try:
        value = hpi_df.at[index-1, 'Year']
        previous_year.append(value)
    except:
        previous_year.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+1, 'Year']
        year_1.append(value)
    except:
        year_1.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+2, 'Year']
        year_2.append(value)
    except:
        year_2.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+3, 'Year']
        year_3.append(value)
    except:
        year_3.append('')

hpi_df['Year -1'] = previous_year
hpi_df['Year +1'] = year_1
hpi_df['Year +2'] = year_2
hpi_df['Year +3'] = year_3
hpi_df.head()


Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base,Year -1,Year +1,Year +2,Year +3
0,AL,Autauga,1001,1986,.,100.0,95.59,70.96,,1987,1988,1989
1,AL,Autauga,1001,1987,-1.94,98.06,93.74,69.59,1986.0,1988,1989,1990
2,AL,Autauga,1001,1988,2.57,100.58,96.14,71.38,1987.0,1989,1990,1991
3,AL,Autauga,1001,1989,4.32,104.92,100.29,74.46,1988.0,1990,1991,1992
4,AL,Autauga,1001,1990,-0.29,104.62,100.0,74.24,1989.0,1991,1992,1993


In [11]:
prev_ac = []
year_1_ac = []
year_2_ac = []
year_3_ac = []

for index in hpi_df.index:
    try:
        value = hpi_df.at[index-1, 'Annual Change (%)']
        prev_ac.append(value)
    except:
        prev_ac.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+1, 'Annual Change (%)']
        year_1_ac.append(value)
    except:
        year_1_ac.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+2, 'Annual Change (%)']
        year_2_ac.append(value)
    except:
        year_2_ac.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+3, 'Annual Change (%)']
        year_3_ac.append(value)
    except:
        year_3_ac.append('')

hpi_df['Annual Change (%) -1'] = prev_ac
hpi_df['Annual Change (%) +1'] = year_1_ac
hpi_df['Annual Change (%) +2'] = year_2_ac
hpi_df['Annual Change (%) +3'] = year_3_ac
hpi_df.head()

Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base,Year -1,Year +1,Year +2,Year +3,Annual Change (%) -1,Annual Change (%) +1,Annual Change (%) +2,Annual Change (%) +3
0,AL,Autauga,1001,1986,.,100.0,95.59,70.96,,1987,1988,1989,,-1.94,2.57,4.32
1,AL,Autauga,1001,1987,-1.94,98.06,93.74,69.59,1986.0,1988,1989,1990,.,2.57,4.32,-0.29
2,AL,Autauga,1001,1988,2.57,100.58,96.14,71.38,1987.0,1989,1990,1991,-1.94,4.32,-0.29,2.52
3,AL,Autauga,1001,1989,4.32,104.92,100.29,74.46,1988.0,1990,1991,1992,2.57,-0.29,2.52,3.09
4,AL,Autauga,1001,1990,-0.29,104.62,100.0,74.24,1989.0,1991,1992,1993,4.32,2.52,3.09,3.35


In [12]:
prev_hpi = []
year_1_hpi = []
year_2_hpi = []
year_3_hpi = []

for index in hpi_df.index:
    try:
        value = hpi_df.at[index-1, 'HPI']
        prev_hpi.append(value)
    except:
        prev_hpi.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+1, 'HPI']
        year_1_hpi.append(value)
    except:
        year_1_hpi.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+2, 'HPI']
        year_2_hpi.append(value)
    except:
        year_2_hpi.append('')

for index in hpi_df.index:
    try:
        value = hpi_df.at[index+3, 'HPI']
        year_3_hpi.append(value)
    except:
        year_3_hpi.append('')

hpi_df['HPI -1'] = prev_hpi
hpi_df['HPI +1'] = year_1_hpi
hpi_df['HPI +2'] = year_2_hpi
hpi_df['HPI +3'] = year_3_hpi
hpi_df.head()

Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base,Year -1,Year +1,Year +2,Year +3,Annual Change (%) -1,Annual Change (%) +1,Annual Change (%) +2,Annual Change (%) +3,HPI -1,HPI +1,HPI +2,HPI +3
0,AL,Autauga,1001,1986,.,100.0,95.59,70.96,,1987,1988,1989,,-1.94,2.57,4.32,,98.06,100.58,104.92
1,AL,Autauga,1001,1987,-1.94,98.06,93.74,69.59,1986.0,1988,1989,1990,.,2.57,4.32,-0.29,100.0,100.58,104.92,104.62
2,AL,Autauga,1001,1988,2.57,100.58,96.14,71.38,1987.0,1989,1990,1991,-1.94,4.32,-0.29,2.52,98.06,104.92,104.62,107.25
3,AL,Autauga,1001,1989,4.32,104.92,100.29,74.46,1988.0,1990,1991,1992,2.57,-0.29,2.52,3.09,100.58,104.62,107.25,110.57
4,AL,Autauga,1001,1990,-0.29,104.62,100.0,74.24,1989.0,1991,1992,1993,4.32,2.52,3.09,3.35,104.92,107.25,110.57,114.28


In [13]:
#Merge HPI data with hurricane data
complete_df = pd.merge(merged_df,hpi_df,on=['Year','County','State'])
complete_df

Unnamed: 0,Name,Year,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Year +2,Year +3,Annual Change (%) -1,Annual Change (%) +1,Annual Change (%) +2,Annual Change (%) +3,HPI -1,HPI +1,HPI +2,HPI +3
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,1985,1986,-2.03,1.64,-4.40,-1.16,161.13,167.34,159.99,158.13
1,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,1987,1988,4.57,3.03,-5.08,-0.86,177.04,196.94,186.93,185.32
2,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,1991,1992,5.89,4.00,2.84,2.40,263.9,285.8,293.9,300.97
3,Andrew,1992,5,$60.2B,4,8/24/92,0905Z,25.5,25.5N,-80.3,...,1994,1995,2.09,6.14,4.36,4.63,239.18,261.13,272.52,285.14
4,Opal,1995,3,$9.6B,17,10/4/95,2200Z,30.3,30.3N,-87.1,...,1997,1998,3.43,5.35,5.62,3.30,203.44,224.56,237.17,245.00
5,Fran,1996,3,$10.0B,6,9/6/96,0030Z,33.9,33.9N,-78.0,...,1998,1999,8.11,8.86,7.20,6.14,192.77,221.57,237.51,252.09
6,Georges,1998,2,$11.5B,7,9/25/98,1530Z,24.5,24.5N,-81.8,...,2000,2001,6.2,5.86,14.89,11.29,292.04,331.92,381.35,424.39
7,Georges,1998,2,$11.5B,7,9/28/98,1130Z,30.4,30.4N,-88.9,...,2000,2001,4.08,5.74,5.58,4.21,243.21,270.82,285.95,297.98
8,Floyd,1999,2,$12.1B,8,9/16/99,0630Z,33.8,33.8N,-78.0,...,2001,2002,7.2,8.12,5.06,3.59,237.51,272.55,286.34,296.63
9,Isabel,2003,2,$9.3B,13,9/18/03,1700Z,34.9,34.9N,-76.2,...,2005,2006,3.61,8.95,22.29,17.93,205.27,234.23,286.44,337.79


In [16]:
#Make a new dataframe with only relevant columns
clean_df = complete_df[['Name','Category','Max Winds (kt)','Adjusted Costs','City','County','State',
                        'Year -1','Year','Year +1','Year +2','Year +3',
                        'Annual Change (%) -1','Annual Change (%)','Annual Change (%) +1','Annual Change (%) +2','Annual Change (%) +3',
                        'HPI -1','HPI','HPI +2','HPI +3']]
clean_df

Unnamed: 0,Name,Category,Max Winds (kt),Adjusted Costs,City,County,State,Year -1,Year,Year +1,...,Year +3,Annual Change (%) -1,Annual Change (%),Annual Change (%) +1,Annual Change (%) +2,Annual Change (%) +3,HPI -1,HPI,HPI +2,HPI +3
0,Alicia,3,100,$9.4B,hitchcock,Galveston,TX,1982,1983,1984,...,1986,-2.03,2.17,1.64,-4.40,-1.16,161.13,164.63,159.99,158.13
1,Elena,3,100,$3.8B,long beach,Harrison,MS,1984,1985,1986,...,1988,4.57,7.97,3.03,-5.08,-0.86,177.04,191.15,186.93,185.32
2,Hugo,4,120,$22.7B,mount pleasant,Charleston,SC,1988,1989,1990,...,1992,5.89,4.13,4.00,2.84,2.40,263.9,274.79,293.9,300.97
3,Andrew,5,145,$60.2B,lakes by the bay,Miami-Dade,FL,1991,1992,1993,...,1995,2.09,2.86,6.14,4.36,4.63,239.18,246.02,272.52,285.14
4,Opal,3,100,$9.6B,gulf breeze,Escambia,FL,1994,1995,1996,...,1998,3.43,4.78,5.35,5.62,3.30,203.44,213.16,237.17,245.00
5,Fran,3,100,$10.0B,long beach,Brunswick,NC,1995,1996,1997,...,1999,8.11,5.59,8.86,7.20,6.14,192.77,203.54,237.51,252.09
6,Georges,2,90,$11.5B,key west,Monroe,FL,1997,1998,1999,...,2001,6.2,7.37,5.86,14.89,11.29,292.04,313.55,381.35,424.39
7,Georges,2,90,$11.5B,biloxi,Harrison,MS,1997,1998,1999,...,2001,4.08,5.31,5.74,5.58,4.21,243.21,256.11,285.95,297.98
8,Floyd,2,90,$12.1B,long beach,Brunswick,NC,1998,1999,2000,...,2002,7.2,6.14,8.12,5.06,3.59,237.51,252.09,286.34,296.63
9,Isabel,2,90,$9.3B,morehead city,Carteret,NC,2002,2003,2004,...,2006,3.61,4.73,8.95,22.29,17.93,205.27,214.98,286.44,337.79


In [17]:
#Read CSV from PROJECT 1 MAIN notebook
clean_df.to_csv('Output_CSVs/project_data.csv', index=False)