In [2]:
# Imports first
import csv
import time
from math import sqrt
import geopandas as gpd
import pandas as pd
import shapely
from shapely.ops import nearest_points
import numpy as np
from scipy import ndimage
from scipy.spatial import cKDTree  
import pyproj

import matplotlib
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from matplotlib.colors import ListedColormap
from matplotlib.ticker import MaxNLocator

In [3]:
# Set notebook display options
pd.set_option('display.max_rows', 500)

In [4]:
# Do housekeeping between runs
objects = ['neighborhoods',
           'schools',
           'crimes_extract',
           'crimes_header',
           'crimes',
           'police_stations_df',
           'police_stations',
           'census2k_df'
          ]

for thing in objects:
    try:
        del thing
    except NameError as e:
        print("Couldn't delete {0}".format(thing))
        print(e)
        continue

In [5]:
def distance_to_nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
#def distance_to_nearest(row, dtn_args):
     """Find the nearest point and return the corresponding value from specified column."""
     # Find the geometry that is closest
     nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
     # Get the corresponding value from df2 (matching is based on the geometry)
     nearest_id = df2[nearest][src_column].get_values()[0]
     start_point: shapely.geometry.Point = row[geom1_col]
     end_point_series: pd.Series = df2[nearest][geom2_col]
     end_point: shapely.geometry.Point = end_point_series.iloc[0]
     dist_to_point = start_point.distance(end_point)
     return nearest_id, dist_to_point

In [6]:
# Define utility functions
def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
    """Find the nearest point and return the corresponding value from specified column."""
    # Find the geometry that is closest
    nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
    #nearest = df2[geom2_col] == nearest_points(df1, df2)[1]
    # Get the corresponding value from df2 (matching is based on the geometry)
    value = df2[nearest][src_column].get_values()[0]
    return value

In [7]:
def geod2utm(row):
    '''   Convert geodetic coordinates to UTM   '''
    #if zn == None :
    #    zn = lon2zone (lon)
    zn = '16T'
    datum = 'WGS84'
    lat = row['lat']
    lon = row['lon']
        
    p = pyproj.Proj(proj='utm', zone=zn, ellps=datum)
    
    X, Y = p(lon, lat)
    
    #   Return Y, X, Z
    # return Y, X, elev
    return pd.Series({'UTMx': X, 'UTMy': Y})

In [8]:
def make_utm_points(row):
    UTMx = row['UTMx']
    UTMy = row['UTMy']
    UTMPoint = shapely.geometry.Point(UTMx, UTMy)
    return UTMPoint

In [7]:
# Fix column type in neighborhoods dataset
neighborhoods = pd.read_pickle("neighborhoods.pkl")

In [8]:
neighborhoods['area_numbe'] = neighborhoods['area_numbe'].astype('float64')

In [9]:
neighborhoods.to_pickle('neighborhoods-transformed.pkl')

In [194]:
# Enhance school data with UTM coordinates and associated shapely Points
schools_df = pd.read_pickle("schools-transformed.pkl")

In [195]:
schools_df.head()

Unnamed: 0_level_0,geodesic geometry,SCHOOL_NM,SCHOOL_ID,SCH_ADDR,GRADE_CAT,SCH_TYPE,lon,lat,UNIT_ID,UTMx,UTMy,geometry
UNIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4610,POINT (-87.5748539059 41.7483660139),MANN,610052,8050 S CHAPPEL AV,ES,Elementary School,-87.574854,41.748366,4610,452204.771983,4621998.0,POINT (452204.7719826915 4621997.860158128)
5180,POINT (-87.57284397069999 41.768650177),OKEEFFE,610103,6940 S MERRILL AV,ES,Elementary School,-87.572844,41.76865,5180,452386.881166,4624249.0,POINT (452386.881166386 4624248.790945017)
5300,POINT (-87.5829920307 41.768594283),PARKSIDE,610116,6938 S EAST END AV,ES,Elementary School,-87.582992,41.768594,5300,451543.358669,4624248.0,POINT (451543.3586693273 4624248.25277694)
5440,POINT (-87.60800008530001 41.740365734),PIRIE,610130,650 E 85TH ST,ES,Elementary School,-87.608,41.740366,5440,449442.603124,4621129.0,POINT (449442.6031235668 4621128.574748854)
5610,POINT (-87.60015764629999 41.764223475),REVERE,610146,1010 E 72ND ST,ES,Elementary School,-87.600158,41.764223,5610,450113.209645,4623773.0,POINT (450113.2096448886 4623772.796898597)


In [215]:
# Enhance crimes data with community name from the neighborhoods dataset and UTM coordinates/Points
crimes_header = ['ID', 'case number', 'date', 'block', 'iucr', 'primary type', 'desc', 'locdesc',
                 'arrest', 'domestic', 'beat', 'district', 'ward', 'community area', 'fbi code',
                 'x coord', 'y coord', 'year', 'updated on', 'lat', 'lon', 'location'
                ]

In [None]:
ids = []
date = []
ptype = []
category = []
location = []
arrest = []
community_area = []
geometry = []

In [None]:
categories: dict = {"THEFT": "property",
                    "BURGLARY": "property",
                    "MOTOR VEHICLE THEFT": "property",
                    "ARSON": "property",
                    "CRIMINAL DAMAGE": "property",
                    "ROBBERY": "property",
                    "ASSAULT": "person",
                    "BATTERY": "person",
                    "CRIM SEXUAL ASSAULT": "person",
                    "HOMICIDE": "person",
                    "INTIMIDATION": "person",
                    "KIDNAPPING": "person",
                    "OFFENSE INVOLVING CHILDREN": "person",
                    "SEX OFFENSE": "person",
                    "STALKING": "person",
                    "GAMBLING": "vice",
                    "NARCOTICS": "vice",
                    "PROSTITUTION": "vice",
                    "LIQUOR LAW VIOLATION": "vice",
                    "OBSCENITY": "vice",
                    "OTHER NARCOTIC VIOLATION": "vice",
                    "PUBLIC INDECENCY": "vice",
                    "OTHER OFFENSE": "other",
                    "DECEPTIVE PRACTICE": "other",
                    "WEAPONS VIOLATION": "other",
                    "PUBLIC PEACE VIOLATION": "other",
                    "CRIMINAL TRESPASS": "other",
                    "INTERFERENCE WITH PUBLIC OFFICER": "other",
                    "NON-CRIMINAL": "other"
                   }

In [216]:
crimes1 = pd.read_csv('crimes-2010-2011-0.csv', names=crimes_header, header=None)
crimes2 = pd.read_csv('crimes-2010-2011-1.csv', names=crimes_header, header=None)

In [217]:
crimes = pd.concat([crimes1, crimes2], names=crimes_header, ignore_index=True)
crimes.dropna(subset=['lat', 'lon'], inplace=True)

In [218]:
crimes_df = (crimes.merge(neighborhoods[['community', 'area_numbe']], left_on='community area', right_on='area_numbe'))
del crimes_df['area_numbe']

In [219]:
crimes_df['UTMx'] = np.zeros(len(crimes_df))
crimes_df['UTMy'] = np.zeros(len(crimes_df))
crimes_df.loc[:, ('UTMx', 'UTMy')] = crimes_df.apply(geod2utm, axis=1)
crimes_df.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,x coord,y coord,year,updated on,lat,lon,location,community,UTMx,UTMy
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,1136960.0,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,1138173.0,1922746.0,2001,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,1139616.0,1926816.0,2010,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,1138176.0,1921375.0,2010,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,1143808.0,1920986.0,2010,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0


In [220]:
crimes_df['UTMPoint'] = crimes_df.apply(make_utm_points, axis=1)

In [221]:
crimes_df.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,y coord,year,updated on,lat,lon,location,community,UTMx,UTMy,UTMPoint
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981)
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,1922746.0,2001,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577)
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,1926816.0,2010,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0,POINT (436836.8507415869 4645093.21940705)
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,1921375.0,2010,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0,POINT (436372.3575166143 4643442.41025281)
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,1920986.0,2010,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0,POINT (438086.3348712226 4643297.20981963)


In [185]:
crimes = gpd.GeoDataFrame(crimes_df, geometry='UTMPoint')

In [186]:
crimes.crs = {'init' :'epsg:2966'}
crimes.crs
crimes.geometry.name

'UTMPoint'

In [19]:
# Create a small extract of data to test performance of algorithms
crimes_extract = crimes.iloc[0:100]

In [196]:
schools_df.set_index("UNIT_ID")
schools_df['UNIT_ID'] = schools_df.index
schools_df.head()

Unnamed: 0_level_0,geodesic geometry,SCHOOL_NM,SCHOOL_ID,SCH_ADDR,GRADE_CAT,SCH_TYPE,lon,lat,UNIT_ID,UTMx,UTMy,geometry
UNIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4610,POINT (-87.5748539059 41.7483660139),MANN,610052,8050 S CHAPPEL AV,ES,Elementary School,-87.574854,41.748366,4610,452204.771983,4621998.0,POINT (452204.7719826915 4621997.860158128)
5180,POINT (-87.57284397069999 41.768650177),OKEEFFE,610103,6940 S MERRILL AV,ES,Elementary School,-87.572844,41.76865,5180,452386.881166,4624249.0,POINT (452386.881166386 4624248.790945017)
5300,POINT (-87.5829920307 41.768594283),PARKSIDE,610116,6938 S EAST END AV,ES,Elementary School,-87.582992,41.768594,5300,451543.358669,4624248.0,POINT (451543.3586693273 4624248.25277694)
5440,POINT (-87.60800008530001 41.740365734),PIRIE,610130,650 E 85TH ST,ES,Elementary School,-87.608,41.740366,5440,449442.603124,4621129.0,POINT (449442.6031235668 4621128.574748854)
5610,POINT (-87.60015764629999 41.764223475),REVERE,610146,1010 E 72ND ST,ES,Elementary School,-87.600158,41.764223,5610,450113.209645,4623773.0,POINT (450113.2096448886 4623772.796898597)


In [197]:
schools = gpd.GeoDataFrame(schools_df)

In [198]:
schools_unary_union = schools.unary_union

In [204]:
start_time = time.time()
crimes['nearest_id'] = crimes.apply(distance_to_nearest,
                                    geom_union=schools_unary_union,
                                    df1=crimes,
                                    df2=schools,
                                    src_column='UNIT_ID',
                                    geom1_col='UTMPoint',
                                    geom2_col='geometry',
                                    axis=1)
end_time = time.time()
print("That took {0} seconds".format(end_time - start_time))


That took 8873.31572008133 seconds


In [207]:
start_time = time.time()
#unpackdf = pd.DataFrame(crimes.apply(distance_to_nearest,
crimes['nearest_school_id'], crimes['nearest_school_distance'] = pd.DataFrame(crimes.apply(distance_to_nearest,
                                     geom_union=schools_unary_union,
                                     df1=crimes,
                                     df2=schools,
                                     geom1_col='UTMPoint',
                                     geom2_col='geometry',
                                     src_column='UNIT_ID',
                                     axis=1
                                    ).tolist(), columns = ['nearest_school_id', 'nearest_school_distance'], index=crimes.index)
end_time = time.time()
print("That took {0} seconds".format(end_time - start_time))

That took 8934.497382879257 seconds


In [222]:
crimes.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,ward,community area,fbi code,x coord,y coord,year,updated on,lat,lon,location
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,38.0,15.0,5,1136960.0,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)"
1,4991465,HM602607,01/01/2001 12:00:00 PM,065XX N SHERIDAN RD,1140,DECEPTIVE PRACTICE,EMBEZZLEMENT,"SCHOOL, PRIVATE, GROUNDS",True,False,...,49.0,1.0,12,1167102.0,1943920.0,2001,06/02/2010 10:34:17 AM,42.001679,-87.660604,"(42.001678618, -87.660604177)"
2,5147738,HM740459,08/01/2001 09:00:00 PM,050XX S JUSTINE ST,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,...,16.0,61.0,2,1166899.0,1871231.0,2001,06/02/2010 10:34:17 AM,41.80222,-87.663439,"(41.802219519, -87.663439163)"
4,5281401,G114422,02/26/2001 07:46:31 PM,005XX E 51ST ST,5000,OTHER OFFENSE,OTHER CRIME AGAINST PERSON,HOSPITAL BUILDING/GROUNDS,False,False,...,3.0,38.0,26,1180401.0,1871347.0,2001,02/18/2010 01:12:55 AM,41.802238,-87.613919,"(41.802238398, -87.613918511)"
6,5622147,HN422454,12/01/2001 12:01:00 AM,027XX N LAWNDALE AVE,1753,OFFENSE INVOLVING CHILDREN,SEX ASSLT OF CHILD BY FAM MBR,RESIDENCE,False,True,...,35.0,22.0,2,1151205.0,1918060.0,2001,03/11/2010 03:22:37 PM,41.931045,-87.719769,"(41.931044901, -87.719768573)"


In [213]:
#del crimes['community_y']
crimes.rename(columns={'community_x': 'community name'}, inplace=True)
del crimes['nearest_school_id']
crimes.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,y coord,year,updated on,lat,lon,location,community name,UTMx,UTMy,UTMPoint
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981)
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,1922746.0,2001,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577)
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,1926816.0,2010,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0,POINT (436836.8507415869 4645093.21940705)
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,1921375.0,2010,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0,POINT (436372.3575166143 4643442.41025281)
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,1920986.0,2010,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0,POINT (438086.3348712226 4643297.20981963)


In [214]:
crimes.to_pickle('crimes-transformed.pkl')

In [5]:
police_stations_df = pd.read_csv('data/Police_Stations_-_Map.csv')
police_stations_df['geodesic geometry'] = police_stations_df.apply(lambda z: shapely.geometry.Point(z.LAT, z.LON), axis=1)

In [9]:
police_stations_df = police_stations_df.rename(columns={'LAT': 'lat', 'LON': 'lon'})

In [10]:
police_stations_df['UTMx'] = np.zeros(len(police_stations_df))
police_stations_df['UTMy'] = np.zeros(len(police_stations_df))
police_stations_df.loc[:, ('UTMx', 'UTMy')] = police_stations_df.apply(geod2utm, axis=1)
police_stations_df.head()

Unnamed: 0,DISTRICT,ADDRESS,CITY,STATE,ZIP,WEBSITE,lat,lon,geodesic geometry,UTMx,UTMy
0,1,1718 S State St,Chicago,IL,60616,http://home.chicagopolice.org/community/distri...,41.858373,-87.627356,POINT (41.8583725929 -87.627356171),447928.689104,4634242.0
1,2,5101 S Wentworth Ave,Chicago,IL,60609,http://home.chicagopolice.org/community/distri...,41.801811,-87.63056,POINT (41.8018110912 -87.63056018010001),447616.657302,4627964.0
2,3,7040 S Cottage Grove Ave,Chicago,IL,60637,http://home.chicagopolice.org/community/distri...,41.766431,-87.605748,POINT (41.7664308925 -87.60574786059999),449650.259252,4624021.0
3,4,2255 E 103rd St,Chicago,IL,60617,http://home.chicagopolice.org/community/distri...,41.707933,-87.568349,POINT (41.7079332906 -87.5683491228),452715.963543,4617505.0
4,5,727 E 111th St,Chicago,IL,60628,http://home.chicagopolice.org/community/distri...,41.692723,-87.604506,POINT (41.6927233639 -87.60450586669999),449696.023331,4615837.0


In [11]:
police_stations_df['UTMPoint'] = police_stations_df.apply(make_utm_points, axis=1)

In [14]:
police_stations = gpd.GeoDataFrame(police_stations_df)

In [15]:
police_stations.to_pickle('data/police-stations-transformed.pkl')

In [29]:
libraries_df = None

In [30]:
libraries_df = pd.read_csv('data/Libraries_-_Locations__Hours_and_Contact_Information.csv')
libraries_df['geodesic geometry'] = libraries_df.apply(lambda z: shapely.geometry.Point(z.lat, z.lon), axis=1)

In [31]:
libraries_df[['lat', 'lon']] = libraries_df[['lat', 'lon']].apply(pd.to_numeric)
libraries_df.head()

Unnamed: 0,NAME,HOURS OF OPERATION,CYBERNAVIGATOR,TEACHER IN THE LIBRARY,ADDRESS,CITY,STATE,ZIP,PHONE,WEBSITE,lat,lon,geodesic geometry
0,Albany Park,"M, W: 10AM-6PM; TU, TH: 12PM-8PM; F, SA: 9AM-...",Yes,Yes,3401 W. Foster Avenue,CHICAGO,IL,60625,(773) 539-5450,https://www.chipublib.org/locations/3,41.975456,-87.71409,POINT (41.975456 -87.71409)
1,Altgeld,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",Yes,Yes,13281 S. Corliss Avenue,CHICAGO,IL,60827,(312) 747-3270,https://www.chipublib.org/locations/4,41.65473,-87.60223,POINT (41.65473021837776 -87.6022302609835)
2,Archer Heights,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",No,Yes,5055 S. Archer Avenue,CHICAGO,IL,60632,(312) 747-9241,https://www.chipublib.org/locations/5,41.801214,-87.726491,POINT (41.8012136599335 -87.72649071431441)
3,Austin,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",Yes,Yes,5615 W. Race Avenue,CHICAGO,IL,60644,(312) 746-5038,https://www.chipublib.org/locations/6,41.889272,-87.765712,POINT (41.88927215351453 -87.76571186722818)
4,Austin-Irving,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",No,Yes,6100 W. Irving Park Road,CHICAGO,IL,60634,(312) 744-6222,https://www.chipublib.org/locations/7,41.953174,-87.779387,POINT (41.95317390064158 -87.7793868207354)


In [35]:
libraries_df['UTMx'] = np.zeros(len(libraries_df))
libraries_df['UTMy'] = np.zeros(len(libraries_df))
libraries_df.loc[:, ('UTMx', 'UTMy')] = libraries_df.apply(geod2utm, axis=1)
libraries_df['UTMPoint'] = libraries_df.apply(make_utm_points, axis=1)
libraries_df.head()

Unnamed: 0,NAME,HOURS OF OPERATION,CYBERNAVIGATOR,TEACHER IN THE LIBRARY,ADDRESS,CITY,STATE,ZIP,PHONE,WEBSITE,lat,lon,geodesic geometry,UTMx,UTMy,UTMPoint
0,Albany Park,"M, W: 10AM-6PM; TU, TH: 12PM-8PM; F, SA: 9AM-...",Yes,Yes,3401 W. Foster Avenue,CHICAGO,IL,60625,(773) 539-5450,https://www.chipublib.org/locations/3,41.975456,-87.71409,POINT (41.975456 -87.71409),440837.879782,4647298.0,POINT (440837.8797823585 4647297.719777162)
1,Altgeld,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",Yes,Yes,13281 S. Corliss Avenue,CHICAGO,IL,60827,(312) 747-3270,https://www.chipublib.org/locations/4,41.65473,-87.60223,POINT (41.65473021837776 -87.6022302609835),449855.909019,4611618.0,POINT (449855.909019377 4611617.62890495)
2,Archer Heights,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",No,Yes,5055 S. Archer Avenue,CHICAGO,IL,60632,(312) 747-9241,https://www.chipublib.org/locations/5,41.801214,-87.726491,POINT (41.8012136599335 -87.72649071431441),439646.689875,4627961.0,POINT (439646.6898747591 4627960.636984769)
3,Austin,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",Yes,Yes,5615 W. Race Avenue,CHICAGO,IL,60644,(312) 746-5038,https://www.chipublib.org/locations/6,41.889272,-87.765712,POINT (41.88927215351453 -87.76571186722818),436475.537266,4637766.0,POINT (436475.5372664328 4637765.786946087)
4,Austin-Irving,"M, W: 12PM-8PM; TU, TH: 10AM-6PM; F, SA: 9AM-5...",No,Yes,6100 W. Irving Park Road,CHICAGO,IL,60634,(312) 744-6222,https://www.chipublib.org/locations/7,41.953174,-87.779387,POINT (41.95317390064158 -87.7793868207354),435405.521377,4644871.0,POINT (435405.5213774398 4644870.913123414)


In [36]:
libraries = gpd.GeoDataFrame(libraries_df)

In [37]:
libraries.to_pickle('data/libraries.pkl')

# Census data

In [4]:
census2k_df = pd.read_pickle('with_incomes.pkl')
census2k_df[['tract_ce_3', 'tract_ce_2']] = census2k_df[['tract_ce_3', 'tract_ce_2']].apply(pd.to_numeric)
census2k_df.rename(columns={'tract_ce_3': 'lat', 'tract_ce_2': 'lon'}, inplace=True)

In [5]:
census2k_df['geodesic geometry'] = census2k_df.apply(lambda z: shapely.geometry.Point(z.lat,
                                                                                      z.lon), axis=1)

In [9]:
census2k_df['UTMx'] = np.zeros(len(census2k_df))
census2k_df['UTMy'] = np.zeros(len(census2k_df))
census2k_df.loc[:, ('UTMx', 'UTMy')] = census2k_df.apply(geod2utm, axis=1)
census2k_df['UTMPoint'] = census2k_df.apply(make_utm_points, axis=1)
census2k_df['income_range'] = census2k_df['income_range'].astype('int64')
census2k_df.head()

Unnamed: 0,tract_cens,tract_fips,shape_area,perimeter,tract_cent,census_t_1,tract_numa,tract_comm,objectid,tract_cr_1,...,lat,tract_crea,lon,shape_len,geometry,income_range,geodesic geometry,UTMx,UTMy,UTMPoint
0,2000,17031,11228197.5722,0.0,1160171.70454335,17031720500,36,72,1,,...,41.702834,,-87.689107,14047.0507089,(POLYGON ((-87.69148523416723 41.7063622944737...,4,POINT (41.70283387 -87.68910701999999),442664.87913,4617012.0,POINT (442664.8791301676 4617012.450029014)
1,2000,17031,20199335.5566,0.0,1169816.30242459,17031730200,113,73,2,,...,41.726636,,-87.653538,19738.525071,(POLYGON ((-87.64651275537268 41.7325265907278...,3,POINT (41.72663578 -87.65353786999999),445644.379235,4619632.0,POINT (445644.3792354412 4619631.967828829)
2,2000,17031,3170252.61205,0.0,1166814.1830406,17031730300,22,73,3,,...,41.727436,,-87.664527,9848.21406776,(POLYGON ((-87.66332792156787 41.7212030772242...,2,POINT (41.72743573 -87.66452735),444731.050369,4619728.0,POINT (444731.0503693434 4619727.779515934)
3,2000,17031,9690785.77117,0.0,1157599.24855614,17031740100,45,74,4,,...,41.697269,,-87.698582,18058.3722532,(POLYGON ((-87.69645961375069 41.7071449131191...,2,POINT (41.69726919 -87.69858205),441871.521369,4616401.0,POINT (441871.5213687398 4616400.988459446)
4,2000,17031,8038189.85397,0.0,1165053.80574048,17031710500,33,71,5,,...,41.747534,,-87.67077,12094.2889266,(POLYGON ((-87.66338805460082 41.7484781942969...,2,POINT (41.74753426 -87.67076974),444229.257909,4621963.0,POINT (444229.2579089529 4621963.238518982)


In [10]:
census2k_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 878 entries, 0 to 877
Data columns (total 23 columns):
tract_cens           878 non-null object
tract_fips           878 non-null object
shape_area           878 non-null object
perimeter            878 non-null object
tract_cent           878 non-null object
census_t_1           878 non-null object
tract_numa           878 non-null object
tract_comm           878 non-null object
objectid             878 non-null object
tract_cr_1           0 non-null object
data_admin           878 non-null object
tract_ce_1           878 non-null object
census_tra           878 non-null object
lat                  878 non-null float64
tract_crea           878 non-null object
lon                  878 non-null float64
shape_len            878 non-null object
geometry             878 non-null object
income_range         878 non-null int64
geodesic geometry    878 non-null object
UTMx                 878 non-null float64
UTMy                 878 n

In [11]:
census2k_df.to_pickle('data/census2k-transformed.pkl')