In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import hvplot.pandas
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ChrisClass")
from scipy.stats import linregress
from citipy import citipy

In [3]:
#Upload landfall csv to notebook
landfall_df = pd.read_csv("Resources/landfall.csv")
#Upload costliest hurricane csv to notebook
damage_df = pd.read_csv("Resources/costliness.csv")
#Merge the two data frames into one
merged_df = pd.merge(damage_pd,landfall_pd,on = ["Name","Season"])
#Show dataframe preview
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,91.4W,75,1,15,971,----,---,LA1
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3"
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,79.8W,120,4,20,934,----,---,"SC4,INC1"
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,71.6W,90,2,25,962,----,---,"RI2,MA2,NY2,CT2"


In [4]:
#Create lists to store states, counties, and coordinates.
counties = []
states = []
lat_lngs = []

#Convert Lat and Long to aa string
merged_df['Longitude'] = merged_df['Longitude'].astype(str)
merged_df['Latitude'] = merged_df['Latitude'].astype(str)

#Add lattitude and longitude values to list
lat_lngs = merged_df[['Latitude', 'Longitude']].values.tolist()

#Loop through lat_lng list to pull county and state names with geopy  
for lat_lng in lat_lngs:
    try:
        county = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        county = county.raw['address']['county']
        counties.append(county)
        state = geolocator.reverse(lat_lng[0]+","+lat_lng[1])
        state = state.raw['address']['state']
        states.append(state)
    #To handle errors if data isn't found
    except:
        counties.append('')
        states.append('')

#Add county and state names to dataframe
county_column = counties
merged_df['County'] = county_column
states_column = states
merged_df['State'] = states_column
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Galveston County,Texas
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,91.4W,75,1,15,971,----,---,LA1,St. Mary Parish,Louisiana
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison County,Mississippi
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,79.8W,120,4,20,934,----,---,"SC4,INC1",Charleston County,South Carolina
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,71.6W,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South County,Rhode Island


In [6]:
#Dictionary to store State names and abb.
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

#Replace State Names with codes
merged_df['State'] = merged_df['State'].replace(us_state_to_abbrev)
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,Long,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,95.1W,100,3,10,962,----,---,CTX3,Galveston County,TX
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,91.4W,75,1,15,971,----,---,LA1,St. Mary Parish,LA
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,89.2W,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison County,MS
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,79.8W,120,4,20,934,----,---,"SC4,INC1",Charleston County,SC
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,71.6W,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South County,RI


In [7]:
#Make a list to store cities and coordinates 
cities= []
lat_lngs2= []

#Convert Lat and Long to a float
merged_df['Longitude'] = merged_df['Longitude'].astype(float)
merged_df['Latitude'] = merged_df['Latitude'].astype(float)

#Add lattitude and longitude values to list
lat_lngs2 = merged_df[['Latitude','Longitude']].values.tolist()

#Loop through lat_lng list to pull cities names with citipy  
for lat_lng2 in lat_lngs2:
    city = citipy.nearest_city(lat_lng2[0], lat_lng2[1]).city_name
    cities.append(city)

#Add city names to dataframe
city_column = cities
merged_df['City'] =city_column
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State,City
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,100,3,10,962,----,---,CTX3,Galveston County,TX,hitchcock
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,...,75,1,15,971,----,---,LA1,St. Mary Parish,LA,patterson
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison County,MS,long beach
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,120,4,20,934,----,---,"SC4,INC1",Charleston County,SC,mount pleasant
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,...,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South County,RI,charlestown


In [8]:
#Remove "County" from county column
merged_df['County'] = merged_df['County'].str.replace(' County','')
merged_df['County'] = merged_df['County'].str.replace(' Parish','')
merged_df.head()

Unnamed: 0,Name,Season,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State,City
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,100,3,10,962,----,---,CTX3,Galveston,TX,hitchcock
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,...,75,1,15,971,----,---,LA1,St. Mary,LA,patterson
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison,MS,long beach
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,120,4,20,934,----,---,"SC4,INC1",Charleston,SC,mount pleasant
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,...,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South,RI,charlestown


In [9]:
#Rename season column to year
merged_df = merged_df.rename(columns={'Season':'Year'})
merged_df.head()

Unnamed: 0,Name,Year,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Max Winds (kt),SS HWS,RMW nm,Central Prssure (mb),OCI (mb),Size (nm),States Affected,County,State,City
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,100,3,10,962,----,---,CTX3,Galveston,TX,hitchcock
1,Juan,1985,1,$4.3B,12,10/29/85,1100Z,29.5,29.5N,-91.4,...,75,1,15,971,----,---,LA1,St. Mary,LA,patterson
2,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,100,3,10,959,----,---,"AL3,MS3,AFL3",Harrison,MS,long beach
3,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,120,4,20,934,----,---,"SC4,INC1",Charleston,SC,mount pleasant
4,Bob,1991,2,$3.5B,3,8/19/91,1720Z,41.2,41.2N,-71.6,...,90,2,25,962,----,---,"RI2,MA2,NY2,CT2",South,RI,charlestown


In [10]:
#Upload HPI csv to dataframe
hpi_df = pd.read_csv('Resources/hpi_county.csv')
hpi_df.head()

Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base
0,AL,Autauga,1001,1986,.,100.0,95.59,70.96
1,AL,Autauga,1001,1987,-1.94,98.06,93.74,69.59
2,AL,Autauga,1001,1988,2.57,100.58,96.14,71.38
3,AL,Autauga,1001,1989,4.32,104.92,100.29,74.46
4,AL,Autauga,1001,1990,-0.29,104.62,100.0,74.24


In [11]:
#Merge HPI data with hurricane data
complete_df = pd.merge(merged_df, hpi_df, on=['Year', 'County','State'])
complete_df.head()

Unnamed: 0,Name,Year,Category,Adjusted Costs,#,Date,Time,Latitude,Lat,Longitude,...,Size (nm),States Affected,County,State,City,FIPS code,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base
0,Alicia,1983,3,$9.4B,3,8/18/83,0700Z,29.1,29.1N,-95.1,...,---,CTX3,Galveston,TX,hitchcock,48167,2.17,164.63,105.76,77.94
1,Elena,1985,3,$3.8B,5,9/2/85,1300Z,30.4,30.4N,-89.2,...,---,"AL3,MS3,AFL3",Harrison,MS,long beach,28047,7.97,191.15,108.0,66.85
2,Hugo,1989,4,$22.7B,11,9/22/89,0400Z,32.8,32.8N,-79.8,...,---,"SC4,INC1",Charleston,SC,mount pleasant,45019,4.13,274.79,96.15,56.85
3,Andrew,1992,5,$60.2B,4,8/24/92,0905Z,25.5,25.5N,-80.3,...,---,"CFL5,BFL4",Miami-Dade,FL,lakes by the bay,12086,2.86,246.02,105.01,71.68
4,Opal,1995,3,$9.6B,17,10/4/95,2200Z,30.3,30.3N,-87.1,...,---,"AFL3,IAL1",Escambia,FL,gulf breeze,12033,4.78,213.16,116.16,82.71


In [12]:
#Make a new dataframe with only relevant columns
clean_df = complete_df[['Name','Year','Category','Max Winds (kt)','Adjusted Costs','City','County','State', 'Annual Change (%)','HPI']]
clean_df

Unnamed: 0,Name,Year,Category,Max Winds (kt),Adjusted Costs,City,County,State,Annual Change (%),HPI
0,Alicia,1983,3,100,$9.4B,hitchcock,Galveston,TX,2.17,164.63
1,Elena,1985,3,100,$3.8B,long beach,Harrison,MS,7.97,191.15
2,Hugo,1989,4,120,$22.7B,mount pleasant,Charleston,SC,4.13,274.79
3,Andrew,1992,5,145,$60.2B,lakes by the bay,Miami-Dade,FL,2.86,246.02
4,Opal,1995,3,100,$9.6B,gulf breeze,Escambia,FL,4.78,213.16
5,Fran,1996,3,100,$10.0B,long beach,Brunswick,NC,5.59,203.54
6,Georges,1998,2,90,$11.5B,key west,Monroe,FL,7.37,313.55
7,Georges,1998,2,90,$11.5B,biloxi,Harrison,MS,5.31,256.11
8,Floyd,1999,2,90,$12.1B,long beach,Brunswick,NC,6.14,252.09
9,Isabel,2003,2,90,$9.3B,morehead city,Carteret,NC,4.73,214.98


In [14]:
#Read CSV from PROJECT 1 MAIN notebook
complete_df.to_csv('Output_CSVs/project_data.csv', index=False)