In [2]:
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from folium import plugins
import matplotlib
import matplotlib.cm as cm
import shapefile
from shapely.geometry import shape, mapping, Point, Polygon
from zipfile import ZipFile
from io import BytesIO
import descartes

In [3]:
zipFile = ZipFile("Data/ohio_tigerfiles.zip")
filenames = [y for y in sorted(zipFile.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 
dbf, prj, shp, shx = [BytesIO(zipFile.read(filename)) for filename in filenames]

reader = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)
attributes, geometry = [], []
field_names = [field[0] for field in reader.fields[1:]]  
for row in reader.shapeRecords():  
    geometry.append(shape(row.shape.__geo_interface__))
    attributes.append(dict(zip(field_names, row.record)))
    
gdf = gpd.GeoDataFrame(data = attributes, geometry = geometry)[["ALAND10", "GEOID10", "geometry"]]
gdf = gdf.rename(index=str, columns={"ALAND10": "Area", "GEOID10": "GEOID"})
gdf.GEOID = gdf.GEOID.astype(int)
    
gdf_original = gdf.copy(deep=True)
ohio_population_data = pd.read_csv("./Data/Ohio_Population_Data.csv")
test_data = pd.read_csv("./Data/Training.csv")

In [4]:
ohio_population_data.head()

Unnamed: 0,GEOID,2010 Total Population,Area (square miles),geometry,CountyNames
0,390410101003,2258,0.444847,"[[40.303889, -83.082549], [40.303813, -83.0823...",Delaware County
1,390410102002,1002,0.369116,"[[40.297744, -83.046934], [40.296802, -83.0467...",Delaware County
2,390410102003,2692,1.485314,"[[40.297655999999996, -83.046442], [40.2977, -...",Delaware County
3,390410102004,927,0.936026,"[[40.2826, -83.061833], [40.282747, -83.061826...",Delaware County
4,390410105201,229,2.287126,"[[40.267752, -83.11309299999999], [40.267989, ...",Delaware County


In [5]:
test_data.head()

Unnamed: 0,Latitude (X),Longitude (Y),POI_TYPE,Pop_Den,Cam_Present,Disasters,Classification (Output)
0,40.057068,-82.885518,,,,,1
1,39.974166,-83.02872,,,,,1
2,39.963572,-83.0025,,,,,1
3,39.975215,-83.00678,,,,,1
4,40.073967,-83.131485,,,,,1


In [6]:
gdf_original.head()

Unnamed: 0,Area,GEOID,geometry
0,5311368,390690003001,"POLYGON ((-84.13203 41.403125, -84.132031 41.4..."
1,798699,390690003003,"POLYGON ((-84.151071 41.363811, -84.1518729999..."
2,562170,390690003004,"POLYGON ((-84.14128699999999 41.38823, -84.141..."
3,61037940,390690002003,"POLYGON ((-84.228799 41.47878499999999, -84.22..."
4,14296034,390690004001,"POLYGON ((-84.11472499999999 41.395795, -84.11..."


In [7]:
def pointToGeoid(long, lat):
    type1 = type(long)
    type2 = type(lat)
    assert(type1 == type2), "Parameters must be the same type"
    _pnts = []
    
    if (type1 == list):
        assert(len(long) == len(lat)), "Parameters must have same length"
        for i in range(len(long)):
            _pnts.append(Point(long[i], lat[i]))
    else:
        _pnts.append(Point(long, lat))
        
    pnts = gpd.GeoDataFrame(geometry=_pnts)
    for _, row in gdf_original.iterrows():
        if pnts.within(row.geometry)[0]:
            return row.GEOID
    
    return 0.0

def getGeoidPopulation(geoid):
    population = ohio_population_data[ohio_population_data['GEOID'] == int(geoid)]["2010 Total Population"]
    return population if population.size == 1 else 0.0

def getGeoidArea(geoid):
    area = ohio_population_data[ohio_population_data['GEOID'] == int(geoid)]["Area (square miles)"]
    return area if area.size == 1 else 0.0

def getGeoidCountyName(geoid):
    county = ohio_population_data[ohio_population_data['GEOID'] == int(geoid)]["CountyNames"]
    return county if county.size == 1 else ""

# Returns copy of df with GEOID and data relevant to GEOID to df
# If lat long not associated with a GEOID, population and area = 0.0 and CountyName = "" 
# Precondition: df is a dataframe
def addGeoidColumns(df):
    assert('Longitude (Y)' in df.columns), "Cannot find longitude column"
    assert('Latitude (X)' in df.columns), "Cannot find latitude column"
    df_copy = df.copy(deep=True)
    
    df_copy['GEOID'] = df_copy.apply(lambda x: pointToGeoid(x['Longitude (Y)'], x['Latitude (X)']), axis=1)
    df_copy['2010 Total Population'] = df_copy.apply(lambda x: getGeoidPopulation(x['GEOID']), axis=1)
    df_copy['Area (square miles)'] = df_copy.apply(lambda x: getGeoidArea(x['GEOID']), axis=1)
    df_copy['CountyNames'] = df_copy.apply(lambda x: getGeoidCountyName(x['GEOID']), axis=1)
    return df_copy

In [8]:
#Long, Lat = (-82.885518, 40.057068)

modified_data = addGeoidColumns(test_data)

ValueError: Wrong number of items passed 51, placement implies 1

In [None]:
modified_data.head()

In [None]:
# Credit to http://andrewgaidus.com/Reading_Zipped_Shapefiles/

zipFile = ZipFile("Data/ohio_tigerfiles.zip")
filenames = [y for y in sorted(zipFile.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 
dbf, prj, shp, shx = [BytesIO(zipFile.read(filename)) for filename in filenames]

reader = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)
attributes, geometry = [], []
field_names = [field[0] for field in reader.fields[1:]]  
for row in reader.shapeRecords():  
    geometry.append(shape(row.shape.__geo_interface__))
    attributes.append(dict(zip(field_names, row.record)))  
    

In [None]:
gdf = gpd.GeoDataFrame(data = attributes, geometry = geometry)[["ALAND10", "GEOID10", "geometry"]]
gdf = gdf.rename(index=str, columns={"ALAND10": "Area", "GEOID10": "GEOID"})
block_coord_array = []
for _, row in gdf.iterrows():
    row_coord_array = []
    for coord in mapping(row['geometry'])['coordinates'][0]:
        correct_coord = reversed(coord)
        row_coord_array.append(list(correct_coord))
    
    block_coord_array.append(row_coord_array)

In [None]:
gdf.head(3)

In [None]:
coord_array = []
for coord in mapping(a)['coordinates'][0]:
    correct_coord = reversed(coord)
    coord_array.append(list(correct_coord))

In [None]:
secondmap = folium.Map(
    location=[39.964955, -83.028636],
    zoom_start=12
)

# Limiting to first 470 census block groups b/c laptop can't load all 877
for block in block_coord_array[:470]:
    folium.Polygon(
        locations=block,
        fill=True
    ).add_to(secondmap)

secondmap

In [None]:
ohio_population_data.head()
#export_ohio = ohio_population_data.to_csv(r'./Data/Ohio_Population_Data.csv', index = None)