## County, Mobile Home Parks & StormReady Data Cleanup
### County Census Data
#### https://www.ers.usda.gov/data-products/county-level-data-sets/

In [1]:
# Import Dependencies
import glob
import os
import pandas as pd

In [2]:
# Read in CSV file
poppath = '../Resources/CountyData/PopulationEstimates.csv'
pop_df = pd.read_csv(poppath, encoding="utf-8")
pop_df.head()

Unnamed: 0,FIPS,State,Area_Name,Rural-urban_Continuum Code_2003,Rural-urban_Continuum Code_2013,Urban_Influence_Code_2003,Urban_Influence_Code_2013,Economic_typology_2015,CENSUS_2010_POP,ESTIMATES_BASE_2010,...,R_DOMESTIC_MIG_2015,R_DOMESTIC_MIG_2016,R_DOMESTIC_MIG_2017,R_NET_MIG_2011,R_NET_MIG_2012,R_NET_MIG_2013,R_NET_MIG_2014,R_NET_MIG_2015,R_NET_MIG_2016,R_NET_MIG_2017
0,0,US,United States,,,,,,308745538,308758105,...,0.0,0.0,0.0,2.7,2.9,2.9,3.2,3.5,3.5,3.4
1,1000,AL,Alabama,,,,,,4779736,4780135,...,-0.3,-0.4,0.8,0.5,0.9,1.4,0.7,0.7,0.6,1.7
2,1001,AL,Autauga County,2.0,2.0,2.0,2.0,0.0,54571,54571,...,-2.0,4.8,1.0,5.9,-6.1,-4.1,2.1,-1.7,5.1,1.3
3,1003,AL,Baldwin County,4.0,3.0,5.0,2.0,5.0,182265,182265,...,17.0,20.5,22.4,16.3,17.2,22.6,20.4,17.9,21.3,23.2
4,1005,AL,Barbour County,6.0,6.0,6.0,6.0,3.0,27457,27457,...,-16.2,-18.8,-19.0,0.3,-6.8,-8.0,-5.5,-16.4,-18.9,-19.2


In [3]:
# Check columns for next step
pop_df.columns

Index(['FIPS', 'State', 'Area_Name', 'Rural-urban_Continuum Code_2003',
       'Rural-urban_Continuum Code_2013', 'Urban_Influence_Code_2003',
       'Urban_Influence_Code_2013', 'Economic_typology_2015',
       'CENSUS_2010_POP', 'ESTIMATES_BASE_2010',
       ...
       'R_DOMESTIC_MIG_2015', 'R_DOMESTIC_MIG_2016', 'R_DOMESTIC_MIG_2017',
       'R_NET_MIG_2011', 'R_NET_MIG_2012', 'R_NET_MIG_2013', 'R_NET_MIG_2014',
       'R_NET_MIG_2015', 'R_NET_MIG_2016', 'R_NET_MIG_2017'],
      dtype='object', length=133)

In [4]:
# Remove unnecessary columns
pop_df = pop_df[['FIPS', 'State', 'Area_Name', 'POP_ESTIMATE_2011', 'POP_ESTIMATE_2011', 'POP_ESTIMATE_2012', 
                 'POP_ESTIMATE_2013', 'POP_ESTIMATE_2014', 'POP_ESTIMATE_2015', 'POP_ESTIMATE_2016', 
                 'POP_ESTIMATE_2017']]

pop_df = pop_df.rename(columns={'FIPS': 'COUNTY_ID'})

pop_df.head()

Unnamed: 0,COUNTY_ID,State,Area_Name,POP_ESTIMATE_2011,POP_ESTIMATE_2011.1,POP_ESTIMATE_2012,POP_ESTIMATE_2013,POP_ESTIMATE_2014,POP_ESTIMATE_2015,POP_ESTIMATE_2016,POP_ESTIMATE_2017
0,0,US,United States,311644280,311644280,313993272,316234505,318622525,321039839,323405935,325719178
1,1000,AL,Alabama,4798649,4798649,4813946,4827660,4840037,4850858,4860545,4874747
2,1001,AL,Autauga County,55199,55199,54927,54695,54864,54838,55278,55504
3,1003,AL,Baldwin County,186534,186534,190048,194736,199064,202863,207509,212628
4,1005,AL,Barbour County,27351,27351,27175,26947,26749,26264,25774,25270


In [5]:
# Export the clean CSV
pop_df.to_csv('../Resources/PopulationEstimatesFINAL.csv')

### Mobile Home Parks Data
#### https://hifld-geoplatform.opendata.arcgis.com/datasets/mobile-home-parks?geometry=-233.525%2C24.207%2C3.955%2C65.072

In [6]:
# Read in CSV file
mobilehomes = '../Resources/MobileHomeData/Mobile_Home_Parks.csv'
mobilehomes_df = pd.read_csv(mobilehomes, encoding="utf-8")
mobilehomes_df.head()

Unnamed: 0,X,Y,OBJECTID,MHPID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,NAICS_CODE,NAICS_DESC,SOURCE,SOURCEDATE,VAL_METHOD,VAL_DATE,WEBSITE,UNITS,SIZE,REVGEOFLAG
0,-81.462908,28.711371,1001,3170,WEKIWA SPRINGS STATE PARK,1800 WEKIWA CIRCLE,APOPKA,FL,32712,NOT AVAILABLE,...,721211,RECREATIONAL VEHICLE PARKS,http://www.floridahealth.gov/%5C/environmental...,2018-05-21T00:00:00.000Z,IMAGERY ONLY,2015-11-03T00:00:00.000Z,NOT AVAILABLE,-999,SMALL (<50),NO
1,-81.375434,28.50921,1002,3178,SABAL PALM MOBILE HOME PARK,164 E PINELOCH,ORLANDO,FL,32806,NOT AVAILABLE,...,531190,RESIDENTIAL TRAILER PARKS,http://www.doh.state.fl.us/Environment/program...,2013-01-14T00:00:00.000Z,IMAGERY ONLY,2018-06-08T00:00:00.000Z,NOT AVAILABLE,-999,SMALL (<50),NO
2,-81.12022,28.549894,1003,3179,K BRANTLEY MHP,17488 MONROE PARTIN ROAD,ORLANDO,FL,32833,NOT AVAILABLE,...,531190,RESIDENTIAL TRAILER PARKS,http://www.floridahealth.gov/%5C/environmental...,2018-05-21T00:00:00.000Z,IMAGERY ONLY,2015-11-03T00:00:00.000Z,NOT AVAILABLE,13,SMALL (<50),NO
3,-81.361677,28.430476,1004,3185,CHISOLM TRAILER ESTATES,9400 8TH AVENUE,ORLANDO,FL,32824,NOT AVAILABLE,...,531190,RESIDENTIAL TRAILER PARKS,http://www.floridahealth.gov/%5C/environmental...,2018-05-21T00:00:00.000Z,IMAGERY ONLY,2015-11-03T00:00:00.000Z,NOT AVAILABLE,10,SMALL (<50),NO
4,-97.162062,26.074849,1005,42410,ISLA BLANCA PARK,33174 STATE PARK RD 100,SOUTH PADRE ISLAND,TX,78597,NOT AVAILABLE,...,721211,RECREATIONAL VEHICLE PARKS,IMAGERY,2015-10-12T00:00:00.000Z,BOTH INTERNET AND IMAGERY,2015-10-12T00:00:00.000Z,HTTP://WWW.CAMERONCOUNTYPARKS.COM/ISLABLANCAPARK,-999,LARGE (>100),NO


In [7]:
# Check columns for next step
mobilehomes_df.columns

Index(['X', 'Y', 'OBJECTID', 'MHPID', 'NAME', 'ADDRESS', 'CITY', 'STATE',
       'ZIP', 'ZIP4', 'TELEPHONE', 'TYPE', 'STATUS', 'COUNTY', 'COUNTYFIPS',
       'COUNTRY', 'LATITUDE', 'LONGITUDE', 'NAICS_CODE', 'NAICS_DESC',
       'SOURCE', 'SOURCEDATE', 'VAL_METHOD', 'VAL_DATE', 'WEBSITE', 'UNITS',
       'SIZE', 'REVGEOFLAG'],
      dtype='object')

In [8]:
# Remove unnecessary columns & change COUNTYFIPS to COUNTY_ID
mobilehomes_df = mobilehomes_df[['NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'TYPE', 'STATUS', 
                                 'COUNTY', 'COUNTYFIPS', 'LATITUDE', 'LONGITUDE', 'SIZE']]

mobilehomes_df = mobilehomes_df.rename(columns={'COUNTYFIPS': 'COUNTY_ID'})

mobilehomes_df.head()

Unnamed: 0,NAME,ADDRESS,CITY,STATE,ZIP,TYPE,STATUS,COUNTY,COUNTY_ID,LATITUDE,LONGITUDE,SIZE
0,WEKIWA SPRINGS STATE PARK,1800 WEKIWA CIRCLE,APOPKA,FL,32712,RECREATIONAL VEHICLE PARK,OPEN,ORANGE,12095,28.711371,-81.462908,SMALL (<50)
1,SABAL PALM MOBILE HOME PARK,164 E PINELOCH,ORLANDO,FL,32806,MOBILE HOME PARK,OPEN,ORANGE,12095,28.50921,-81.375434,SMALL (<50)
2,K BRANTLEY MHP,17488 MONROE PARTIN ROAD,ORLANDO,FL,32833,MOBILE HOME PARK,OPEN,ORANGE,12095,28.549894,-81.12022,SMALL (<50)
3,CHISOLM TRAILER ESTATES,9400 8TH AVENUE,ORLANDO,FL,32824,MOBILE HOME PARK,OPEN,ORANGE,12095,28.430476,-81.361677,SMALL (<50)
4,ISLA BLANCA PARK,33174 STATE PARK RD 100,SOUTH PADRE ISLAND,TX,78597,RECREATIONAL VEHICLE PARK,OPEN,CAMERON,48061,26.074849,-97.162062,LARGE (>100)


In [9]:
# Remove any CLOSED mobile home parks
open_mobilehomes = mobilehomes_df.loc[mobilehomes_df["STATUS"] == "OPEN", :]
open_mobilehomes.head()

Unnamed: 0,NAME,ADDRESS,CITY,STATE,ZIP,TYPE,STATUS,COUNTY,COUNTY_ID,LATITUDE,LONGITUDE,SIZE
0,WEKIWA SPRINGS STATE PARK,1800 WEKIWA CIRCLE,APOPKA,FL,32712,RECREATIONAL VEHICLE PARK,OPEN,ORANGE,12095,28.711371,-81.462908,SMALL (<50)
1,SABAL PALM MOBILE HOME PARK,164 E PINELOCH,ORLANDO,FL,32806,MOBILE HOME PARK,OPEN,ORANGE,12095,28.50921,-81.375434,SMALL (<50)
2,K BRANTLEY MHP,17488 MONROE PARTIN ROAD,ORLANDO,FL,32833,MOBILE HOME PARK,OPEN,ORANGE,12095,28.549894,-81.12022,SMALL (<50)
3,CHISOLM TRAILER ESTATES,9400 8TH AVENUE,ORLANDO,FL,32824,MOBILE HOME PARK,OPEN,ORANGE,12095,28.430476,-81.361677,SMALL (<50)
4,ISLA BLANCA PARK,33174 STATE PARK RD 100,SOUTH PADRE ISLAND,TX,78597,RECREATIONAL VEHICLE PARK,OPEN,CAMERON,48061,26.074849,-97.162062,LARGE (>100)


In [10]:
# Export the clean CSV
open_mobilehomes.to_csv('../Resources/Mobile_Home_ParksFINAL.csv')

### Merging County Census Data + Mobile Home Parks Data

In [11]:
# Get a count of how many mobile home parks there are for each COUNTY_ID
county_mobilehomes = open_mobilehomes["COUNTY_ID"].value_counts()
county_mobilehomes.head()

6037     595
12057    485
12105    447
4013     422
6073     389
Name: COUNTY_ID, dtype: int64

In [12]:
# Create a new DataFrame with the count of mobile home parks by COUNTY_ID
mobilehome_df = pd.DataFrame({"MOBILE_HOME_PARKS": county_mobilehomes})
mobilehome_df = mobilehome_df.reset_index()
mobilehome_df = mobilehome_df.rename(columns={'index': 'COUNTY_ID'})
mobilehome_df.head()

Unnamed: 0,COUNTY_ID,MOBILE_HOME_PARKS
0,6037,595
1,12057,485
2,12105,447
3,4013,422
4,6073,389


In [13]:
# Merge the new DataFrame with the County Census DataFrame
county_df = pd.merge(pop_df, mobilehome_df, on="COUNTY_ID", how="outer")
county_df

Unnamed: 0,COUNTY_ID,State,Area_Name,POP_ESTIMATE_2011,POP_ESTIMATE_2011.1,POP_ESTIMATE_2012,POP_ESTIMATE_2013,POP_ESTIMATE_2014,POP_ESTIMATE_2015,POP_ESTIMATE_2016,POP_ESTIMATE_2017,MOBILE_HOME_PARKS
0,0,US,United States,311644280,311644280,313993272,316234505,318622525,321039839,323405935,325719178,
1,1000,AL,Alabama,4798649,4798649,4813946,4827660,4840037,4850858,4860545,4874747,
2,1001,AL,Autauga County,55199,55199,54927,54695,54864,54838,55278,55504,28.0
3,1003,AL,Baldwin County,186534,186534,190048,194736,199064,202863,207509,212628,102.0
4,1005,AL,Barbour County,27351,27351,27175,26947,26749,26264,25774,25270,7.0
5,1007,AL,Bibb County,22745,22745,22658,22503,22533,22561,22633,22668,4.0
6,1009,AL,Blount County,57562,57562,57595,57623,57546,57590,57562,58013,7.0
7,1011,AL,Bullock County,10675,10675,10612,10549,10673,10419,10441,10309,3.0
8,1013,AL,Butler County,20880,20880,20688,20372,20327,20141,19965,19825,9.0
9,1015,AL,Calhoun County,117785,117785,117219,116482,115941,115505,114980,114728,44.0


### StormReady Data
#### https://www.weather.gov/stormready/communities

In [14]:
# Read in CSV file
stormready = '../Resources/StormReadyDataFINAL.csv'
stormready_df = pd.read_csv(stormready, encoding="utf-8")
stormready_df.head()

Unnamed: 0.1,Unnamed: 0,COUNTY_NAME,Unnamed: 2,STATE,STORM_READY
0,2,Autauga,County,AL,StormReady
1,3,Baldwin,County,AL,StormReady
2,4,Blount,County,AL,StormReady
3,5,Calhoun,County,AL,StormReady
4,6,Cherokee,County,AL,StormReady


In [15]:
# Drop Unnamed: 0 column
stormready_df = stormready_df[["COUNTY_NAME", "Unnamed: 2", "STATE", "STORM_READY"]]
stormready_df.head()

Unnamed: 0,COUNTY_NAME,Unnamed: 2,STATE,STORM_READY
0,Autauga,County,AL,StormReady
1,Baldwin,County,AL,StormReady
2,Blount,County,AL,StormReady
3,Calhoun,County,AL,StormReady
4,Cherokee,County,AL,StormReady


In [16]:
# Create a new COUNTY_NAME column
stormready_df["COUNTY_NAME_NEW"] = stormready_df["COUNTY_NAME"].map(str) + stormready_df["Unnamed: 2"].map(str)
stormready_df.head()

Unnamed: 0,COUNTY_NAME,Unnamed: 2,STATE,STORM_READY,COUNTY_NAME_NEW
0,Autauga,County,AL,StormReady,Autauga County
1,Baldwin,County,AL,StormReady,Baldwin County
2,Blount,County,AL,StormReady,Blount County
3,Calhoun,County,AL,StormReady,Calhoun County
4,Cherokee,County,AL,StormReady,Cherokee County


In [17]:
# Clean up DataFrame
stormready_df = stormready_df[["COUNTY_NAME_NEW", "STATE", "STORM_READY"]]
stormready_df = stormready_df.rename(columns={"COUNTY_NAME_NEW":"Area_Name", "STATE": "State"})
stormready_df.head()

Unnamed: 0,Area_Name,State,STORM_READY
0,Autauga County,AL,StormReady
1,Baldwin County,AL,StormReady
2,Blount County,AL,StormReady
3,Calhoun County,AL,StormReady
4,Cherokee County,AL,StormReady


In [18]:
county_df.head()

Unnamed: 0,COUNTY_ID,State,Area_Name,POP_ESTIMATE_2011,POP_ESTIMATE_2011.1,POP_ESTIMATE_2012,POP_ESTIMATE_2013,POP_ESTIMATE_2014,POP_ESTIMATE_2015,POP_ESTIMATE_2016,POP_ESTIMATE_2017,MOBILE_HOME_PARKS
0,0,US,United States,311644280,311644280,313993272,316234505,318622525,321039839,323405935,325719178,
1,1000,AL,Alabama,4798649,4798649,4813946,4827660,4840037,4850858,4860545,4874747,
2,1001,AL,Autauga County,55199,55199,54927,54695,54864,54838,55278,55504,28.0
3,1003,AL,Baldwin County,186534,186534,190048,194736,199064,202863,207509,212628,102.0
4,1005,AL,Barbour County,27351,27351,27175,26947,26749,26264,25774,25270,7.0


In [19]:
# Merge the new DataFrame with the County Census DataFrame
final_countydf = pd.merge(county_df, stormready_df, on=['Area_Name', 'State'], how="outer")
final_countydf.head()

Unnamed: 0,COUNTY_ID,State,Area_Name,POP_ESTIMATE_2011,POP_ESTIMATE_2011.1,POP_ESTIMATE_2012,POP_ESTIMATE_2013,POP_ESTIMATE_2014,POP_ESTIMATE_2015,POP_ESTIMATE_2016,POP_ESTIMATE_2017,MOBILE_HOME_PARKS,STORM_READY
0,0.0,US,United States,311644280,311644280,313993272,316234505,318622525,321039839,323405935,325719178,,
1,1000.0,AL,Alabama,4798649,4798649,4813946,4827660,4840037,4850858,4860545,4874747,,
2,1001.0,AL,Autauga County,55199,55199,54927,54695,54864,54838,55278,55504,28.0,StormReady
3,1003.0,AL,Baldwin County,186534,186534,190048,194736,199064,202863,207509,212628,102.0,StormReady
4,1005.0,AL,Barbour County,27351,27351,27175,26947,26749,26264,25774,25270,7.0,


In [20]:
# Fill NaN values in MOBILE_HOME_PARKS with 0 + make STORM_READY boolean: 0 - No, 1 - Yes
final_countydf['MOBILE_HOME_PARKS'] = final_countydf['MOBILE_HOME_PARKS'].fillna(0)
final_countydf['STORM_READY'] = (final_countydf['STORM_READY'] == "StormReady").astype(int)
final_countydf.head()

Unnamed: 0,COUNTY_ID,State,Area_Name,POP_ESTIMATE_2011,POP_ESTIMATE_2011.1,POP_ESTIMATE_2012,POP_ESTIMATE_2013,POP_ESTIMATE_2014,POP_ESTIMATE_2015,POP_ESTIMATE_2016,POP_ESTIMATE_2017,MOBILE_HOME_PARKS,STORM_READY
0,0.0,US,United States,311644280,311644280,313993272,316234505,318622525,321039839,323405935,325719178,0.0,0
1,1000.0,AL,Alabama,4798649,4798649,4813946,4827660,4840037,4850858,4860545,4874747,0.0,0
2,1001.0,AL,Autauga County,55199,55199,54927,54695,54864,54838,55278,55504,28.0,1
3,1003.0,AL,Baldwin County,186534,186534,190048,194736,199064,202863,207509,212628,102.0,1
4,1005.0,AL,Barbour County,27351,27351,27175,26947,26749,26264,25774,25270,7.0,0


In [21]:
# Export the clean CSV
final_countydf.to_csv('../Resources/CountyDataFrame.csv')