In [1]:
import numpy as np
import pandas as pd

In [2]:
"""
Looking at population density and distribution of people

Dataset:
Base Zoning: http://opendata.columbus.gov/datasets/96f7642a62f84db997f9e1db4a776995_4
    - Can look at zoning to see which locations are more populated
    
BZA Zoning Variances: http://opendata.columbus.gov/datasets/19786dd084e644a4aea6b33f867dd631_1
    - More Zoning?
    
Recommended Land Use: http://opendata.columbus.gov/datasets/26f0606f94db4c07a63aef3cc8927c9b_21
    - Where we can build charging stations?
    
Population Density Map: https://apps.morpc.org/census2010/


Webscraping data:
    - https://github.com/ikhlaqsidhu/data-x/blob/master/03-tools-webscraping-crawling_api_afo/notebook-webscraping_v4.ipynb

API to convert Lat/Long to census block
    - https://geo.fcc.gov/api/census/#!/area/get_area
    
Conversion for GEOIDs
    - https://www.census.gov/geo/reference/codes/cou.html
    - https://www.census.gov/geo/reference/geoidentifiers.html (General info on how it's generated)
    
CountryCode + Lat/Long?
    - https://www.census.gov/geo/maps-data/data/gazetteer2017.html

# TODO: 
- Find more granularity in terms of where people are located (parse GEOID more)
- Visualize population density
- Predicted population through 2019?
- Visualize population density changes over the years?

""";

In [3]:
census_data = pd.read_excel("Data/Columbus_Population.xlsx").iloc[:,0:2]
census_data.head()

Unnamed: 0,GEOID,2010 Total Population
0,390410101003,2258
1,390410102002,1002
2,390410102003,2692
3,390410102004,927
4,390410105201,229


In [4]:
geoid_conversion = pd.read_excel("Data/Ohio_GEOID_Conversion.xlsx")
geoid_conversion.head()

Unnamed: 0,USPS,GEOID,ANSICODE,NAME,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,OH,39001,1074014,Adams County,1512208934,6165939,583.867,2.381,38.834468,-83.478082
1,OH,39003,1074015,Allen County,1042470095,11266164,402.5,4.35,40.771627,-84.106103
2,OH,39005,1074016,Ashland County,1095444134,9962880,422.953,3.847,40.843273,-82.270127
3,OH,39007,1074017,Ashtabula County,1818360011,1724498213,702.073,665.833,41.906637,-80.745592
4,OH,39009,1074018,Athens County,1304383737,12463875,503.625,4.812,39.332604,-82.045844


In [5]:
### For each GEOID in census_data, get the county name, latitude, and longitude of that GEOID

def geoidToCountyLatLong(geoid):
    countyCode = int(geoid / 10000000)
    return geoid_conversion[geoid_conversion["GEOID"] == countyCode][["NAME", "ALAND_SQMI", "INTPTLAT", "INTPTLONG"]]

geoids = census_data.iloc[:,0]
countyRows = geoids.apply(geoidToCountyLatLong) #An array of DF rows

In [6]:
### Combine all county data, add it to census_data
geoid_data = pd.DataFrame()
for county in countyRows:
    geoid_data = geoid_data.append(county, ignore_index=True)
    
census_data = census_data.join(geoid_data).rename(index=str, columns={"NAME":"CountyName", "ALAND_SQMI": "Area (sq. mi)", "INTPTLAT":"Latitude", "INTPTLONG":"Longitude"})

In [7]:
print(census_data.CountyName.unique())
census_data.head()

['Delaware County' 'Fairfield County' 'Fayette County' 'Franklin County'
 'Knox County' 'Licking County' 'Madison County' 'Marion County'
 'Morrow County' 'Pickaway County' 'Ross County' 'Union County']


Unnamed: 0,GEOID,2010 Total Population,CountyName,Area (sq. mi),Latitude,Longitude
0,390410101003,2258,Delaware County,443.194,40.278941,-83.007462
1,390410102002,1002,Delaware County,443.194,40.278941,-83.007462
2,390410102003,2692,Delaware County,443.194,40.278941,-83.007462
3,390410102004,927,Delaware County,443.194,40.278941,-83.007462
4,390410105201,229,Delaware County,443.194,40.278941,-83.007462


In [8]:
county_grouped_data = census_data.groupby(["CountyName"])
county_grouped_data.get_group("Franklin County")

Unnamed: 0,GEOID,2010 Total Population,CountyName,Area (sq. mi),Latitude,Longitude
181,390490001101,1080,Franklin County,532.378,39.969575,-83.006865
182,390490001102,857,Franklin County,532.378,39.969575,-83.006865
183,390490001103,648,Franklin County,532.378,39.969575,-83.006865
184,390490001104,759,Franklin County,532.378,39.969575,-83.006865
185,390490001201,789,Franklin County,532.378,39.969575,-83.006865
186,390490001202,1668,Franklin County,532.378,39.969575,-83.006865
187,390490001203,705,Franklin County,532.378,39.969575,-83.006865
188,390490002101,798,Franklin County,532.378,39.969575,-83.006865
189,390490002102,640,Franklin County,532.378,39.969575,-83.006865
190,390490002103,611,Franklin County,532.378,39.969575,-83.006865
