In [1]:
import numpy as np
import pandas as pd
import folium
import matplotlib
import matplotlib.cm as cm

In [2]:
"""
Looking at population density and distribution of people

Dataset:
Base Zoning: http://opendata.columbus.gov/datasets/96f7642a62f84db997f9e1db4a776995_4
    - Can look at zoning to see which locations are more populated
    
BZA Zoning Variances: http://opendata.columbus.gov/datasets/19786dd084e644a4aea6b33f867dd631_1
    - More Zoning?
    
Recommended Land Use: http://opendata.columbus.gov/datasets/26f0606f94db4c07a63aef3cc8927c9b_21
    - Where we can build charging stations?
    
Population Density Map: https://apps.morpc.org/census2010/

API to convert Lat/Long to census block
    - https://geo.fcc.gov/api/census/#!/area/get_area
    
Conversion for GEOIDs
    - https://www.census.gov/geo/reference/codes/cou.html
    - https://www.census.gov/geo/reference/geoidentifiers.html (General info on how it's generated)
    - https://geoservices.tamu.edu/Services/CensusIntersection/ (Lat/Long to Census block)
        - Current GeoID = STATE+COUNTY+TRACT+BLOCK GROUP = 2+3+6+1=12
    
CountryCode + Lat/Long?
    - https://www.census.gov/geo/maps-data/data/gazetteer2017.html

# TODO: 
- Have actual block boundaries?
- Find the units of block data
- Predicted population through 2019?
- Visualize population density changes over the years?

""";

In [3]:
census_data = pd.read_excel("Data/Columbus_Population.xlsx").iloc[:,0:2]
census_data.head()

Unnamed: 0,GEOID,2010 Total Population
0,390410101003,2258
1,390410102002,1002
2,390410102003,2692
3,390410102004,927
4,390410105201,229


In [4]:
block_data = pd.read_excel("Data/census_block_data.xlsx")
block_data.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,GEOID,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLONG
0,39,69,300,1,390690003001,Block Group 1,G5030,S,5311368,0,41.40578,-84.14859
1,39,69,300,3,390690003003,Block Group 3,G5030,S,798699,730232,41.383697,-84.133114
2,39,69,300,4,390690003004,Block Group 4,G5030,S,562170,0,41.38826,-84.136161
3,39,69,200,3,390690002003,Block Group 3,G5030,S,61037940,0,41.456737,-84.285244
4,39,69,400,1,390690004001,Block Group 1,G5030,S,14296034,709595,41.414053,-84.103982


In [5]:
### For each GEOID in census_data, get the latitude, longitude, and how large that area is of that GEOID

def geoidToBlockLatLong(geoid):
    return block_data[block_data["GEOID"] == int(geoid)][["ALAND", "INTPTLAT", "INTPTLONG"]]

geoids = census_data.iloc[:,0]
blockRows = geoids.apply(geoidToBlockLatLong) #An array of DF rows

In [6]:
### Combine all county data, add it to census_data
block_df = pd.DataFrame()
for row in blockRows:
    block_df = block_df.append(row, ignore_index=True)
    
ohio_population_data = census_data.join(block_df).rename(index=str, columns={"ALAND": "Area (units?)", "INTPTLAT":"Latitude", "INTPTLONG":"Longitude"})

In [7]:
ohio_population_data.head()

Unnamed: 0,GEOID,2010 Total Population,Area (units?),Latitude,Longitude
0,390410101003,2258,1152148,40.296735,-83.073917
1,390410102002,1002,956006,40.298514,-83.051478
2,390410102003,2692,3846946,40.309035,-83.035286
3,390410102004,927,2424295,40.290905,-83.055965
4,390410105201,229,5923630,40.280887,-83.107697


In [8]:
# Now adding county name for each GEOID

county_data = pd.read_excel("Data/Ohio_GEOID_Conversion.xlsx").iloc[:,1:4]
county_data.head()

Unnamed: 0,GEOID,ANSICODE,NAME
0,39001,1074014,Adams County
1,39003,1074015,Allen County
2,39005,1074016,Ashland County
3,39007,1074017,Ashtabula County
4,39009,1074018,Athens County


In [9]:
def geoidToCountyLatLong(geoid):
    countyCode = int(geoid / 10000000)
    return county_data[county_data["GEOID"] == countyCode][["NAME"]]

countyRows = geoids.apply(geoidToCountyLatLong)
county_df = pd.DataFrame()
for county in countyRows:
    county_df = county_df.append(county, ignore_index=True)
    
ohio_population_data["CountyNames"] = pd.Series(county_df.NAME.values, index=ohio_population_data.index)

In [10]:
# Columbus is in Franklin County
grouped_population_data = ohio_population_data.groupby(["CountyNames"])
franklin_county_data = grouped_population_data.get_group("Franklin County")
franklin_county_data.head()

Unnamed: 0,GEOID,2010 Total Population,Area (units?),Latitude,Longitude,CountyNames
181,390490001101,1080,652775,40.058622,-83.004629,Franklin County
182,390490001102,857,455974,40.05737,-83.014346,Franklin County
183,390490001103,648,276200,40.060906,-83.014081,Franklin County
184,390490001104,759,576644,40.058761,-83.024413,Franklin County
185,390490001201,789,1294135,40.045515,-83.02484,Franklin County


In [11]:
density_array = franklin_county_data["2010 Total Population"] / franklin_county_data["Area (units?)"]

fc_max_density = max(density_array)
fc_min_density = min(density_array)

def areaToMapArea(area):
    # OldRange = (OldMax - OldMin)  
    oldRange = franklin_county_data.max()["Area (units?)"] - franklin_county_data.min()["Area (units?)"]
    
    # NewRange = (NewMax - NewMin)
    newRange = 1500
    
    # NewValue = (((OldValue - OldMin) * NewRange) / OldRange) + NewMin
    return (((area - franklin_county_data.min()["Area (units?)"]) * newRange) / oldRange) + 100

In [13]:
import folium
from folium import plugins

colors = matplotlib.colors.Normalize(vmin=fc_min_density, vmax=fc_max_density, clip=True)
mapper = cm.ScalarMappable(norm=colors, cmap=cm.cool)

map = folium.Map(
    location=[39.964955, -83.028636],
    zoom_start=11
)

# Returns tuple, first elem is density, second is 'radius', third is popup information of a given row
def getRowInfo(row):
    row_density = row["2010 Total Population"] / row["Area (units?)"]
    row_radius = row["Area (units?)"]
    popup = 'GEOID: ' + str(row["GEOID"]) + '\n' + 'Density: ' + str(row_density) + '\n' + 'Area: ' + str(row_radius)
    return (row_density, row_radius, popup)

# For each row, add a circle in the map
for _, row in franklin_county_data.iterrows():
    rowInfo = getRowInfo(row)
    
    folium.Circle(
        location=[row["Latitude"], row["Longitude"]],
        radius=areaToMapArea(rowInfo[1]),
        fill=True,
        color=matplotlib.colors.rgb2hex(mapper.to_rgba(rowInfo[0])),
        popup=rowInfo[2]
    ).add_to(map)
    
print("DISREGARD UNITS, still need to find out what they are")
map

DISREGARD UNITS, still need to find out what they are
