In [1]:
import numpy as np
import pandas as pd

In [2]:
"""
Looking at population density and distribution of people

Dataset:
Base Zoning: http://opendata.columbus.gov/datasets/96f7642a62f84db997f9e1db4a776995_4
    - Can look at zoning to see which locations are more populated
    
BZA Zoning Variances: http://opendata.columbus.gov/datasets/19786dd084e644a4aea6b33f867dd631_1
    - More Zoning?
    
Recommended Land Use: http://opendata.columbus.gov/datasets/26f0606f94db4c07a63aef3cc8927c9b_21
    - Where we can build charging stations?
    
Population Density Map: https://apps.morpc.org/census2010/


Webscraping data:
    - https://github.com/ikhlaqsidhu/data-x/blob/master/03-tools-webscraping-crawling_api_afo/notebook-webscraping_v4.ipynb

API to convert Lat/Long to census block
    - https://geo.fcc.gov/api/census/#!/area/get_area
    
Conversion for GEOIDs
    - https://www.census.gov/geo/reference/codes/cou.html
    - https://www.census.gov/geo/reference/geoidentifiers.html (General info on how it's generated)

# TODO: find a way to associate GEOID with latitude/longitude, somehow associate with city/county?

""";

In [3]:
%ls

[1m[34mData[m[m/                   PT_data.ipynb           README.md
NS_data.ipynb           Points_of_Interest.csv


In [4]:
census_data = pd.read_excel("Data/Columbus_Population.xlsx")

In [5]:
print(len(census_data))
census_data.head()

1489


Unnamed: 0,GEOID,2010 Total Population,2010 White Population,2010 African American Population,2010 Asian Population,2010 Other Races Population,2010 Hispanic Population,2010 Total Households,2010 Occupied Households,2010 Vacant Households,2000 Total Population,2000 White Population,2000 African American Population,2000 Asian Population,2000 Other Races Population,2000 Total Housing Units,2000 Occupied Housing Units,2000 Vacant Housing Units
0,390410101003,2258,1896,156,129,77,63,449,397,52,2579,2305,157,66,51,447,414,33
1,390410102002,1002,953,18,3,28,24,454,423,31,1085,1038,13,1,33,443,425,18
2,390410102003,2692,2446,127,30,89,82,951,932,19,1180,1137,23,9,11,524,439,85
3,390410102004,927,834,43,1,49,33,369,345,24,898,838,18,4,38,391,346,45
4,390410105201,229,217,3,2,7,3,107,98,9,218,211,0,0,7,102,98,4


In [6]:
geoid_conversion = pd.read_csv("Data/OhioGEOIDConversion.csv").iloc[:,:4]
geoid_conversion.head()

Unnamed: 0,State,StateCode,CountyCode,CountyName
0,OH,39,1,Adams County
1,OH,39,3,Allen County
2,OH,39,5,Ashland County
3,OH,39,7,Ashtabula County
4,OH,39,9,Athens County


In [7]:
def geoidToCountyCode(geoid):
    return int((geoid - 390000000000) / 10000000)

def geoidToCountyName(geoid):
    countyCode = geoidToCountyCode(geoid)
    return geoid_conversion[geoid_conversion["CountyCode"] == countyCode].CountyName.item()

geoids = census_data.iloc[:,0]
countyCodes = geoids.apply(geoidToCountyCode)
countyNames = geoids.apply(geoidToCountyName)
census_data.insert(1, "CountyNames", countyNames)
census_data.insert(2, "CountyCode", countyCodes)
census_data.head()

Unnamed: 0,GEOID,CountyNames,CountyCode,2010 Total Population,2010 White Population,2010 African American Population,2010 Asian Population,2010 Other Races Population,2010 Hispanic Population,2010 Total Households,2010 Occupied Households,2010 Vacant Households,2000 Total Population,2000 White Population,2000 African American Population,2000 Asian Population,2000 Other Races Population,2000 Total Housing Units,2000 Occupied Housing Units,2000 Vacant Housing Units
0,390410101003,Delaware County,41,2258,1896,156,129,77,63,449,397,52,2579,2305,157,66,51,447,414,33
1,390410102002,Delaware County,41,1002,953,18,3,28,24,454,423,31,1085,1038,13,1,33,443,425,18
2,390410102003,Delaware County,41,2692,2446,127,30,89,82,951,932,19,1180,1137,23,9,11,524,439,85
3,390410102004,Delaware County,41,927,834,43,1,49,33,369,345,24,898,838,18,4,38,391,346,45
4,390410105201,Delaware County,41,229,217,3,2,7,3,107,98,9,218,211,0,0,7,102,98,4


In [8]:
census_by_county = census_data.groupby(["CountyNames", "GEOID"]).sum()
census_by_county

Unnamed: 0_level_0,Unnamed: 1_level_0,CountyCode,2010 Total Population,2010 White Population,2010 African American Population,2010 Asian Population,2010 Other Races Population,2010 Hispanic Population,2010 Total Households,2010 Occupied Households,2010 Vacant Households,2000 Total Population,2000 White Population,2000 African American Population,2000 Asian Population,2000 Other Races Population,2000 Total Housing Units,2000 Occupied Housing Units,2000 Vacant Housing Units
CountyNames,GEOID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Delaware County,390410101001,41,1277,1209,25,5,38,18,576,539,37,1381,1316,30,8,27,573,547,26
Delaware County,390410101002,41,1721,1613,50,19,39,32,822,724,98,1716,1638,17,16,45,799,735,64
Delaware County,390410101003,41,2258,1896,156,129,77,63,449,397,52,2579,2305,157,66,51,447,414,33
Delaware County,390410102001,41,859,819,19,1,20,16,414,373,41,800,777,5,2,16,340,320,20
Delaware County,390410102002,41,1002,953,18,3,28,24,454,423,31,1085,1038,13,1,33,443,425,18
Delaware County,390410102003,41,2692,2446,127,30,89,82,951,932,19,1180,1137,23,9,11,524,439,85
Delaware County,390410102004,41,927,834,43,1,49,33,369,345,24,898,838,18,4,38,391,346,45
Delaware County,390410104201,41,556,549,0,0,7,5,261,249,12,534,518,3,3,10,261,256,5
Delaware County,390410104202,41,2370,2181,87,17,85,59,918,868,50,2113,2000,57,22,34,803,776,27
Delaware County,390410104211,41,2182,2059,66,2,55,44,1117,970,147,1203,1154,21,7,21,512,477,35


In [9]:
census_data.CountyCode.unique()

array([ 41,  45,  47,  49,  83,  89,  97, 101, 117, 129, 141, 159])