# Police stations: Police_Stations_-_Map.csv
# Libraries: Libraries_-Locations_Hours_and_Contact_Information.csv
# Schools: Chicago Public Schools - School Loactions SY1011.geojson
# Todo: for each police report, find distance to nearest station, to nearest library, and to nearest school
# Done: Identify which neighborhood each report occurred in

In [3]:
import csv
from math import sqrt
import geopandas as gpd
import pandas as pd
import shapely
from shapely.ops import nearest_points
import numpy as np
from scipy import ndimage
from scipy.spatial import cKDTree  
import pyproj

import matplotlib
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from matplotlib.colors import ListedColormap
from matplotlib.ticker import MaxNLocator

In [None]:
# Do housekeeping between runs
objects = ['neighborhoods',
           'schools',
           'crimes_extract',
           'crimes_header',
           'crimes']

for thing in objects:
    try:
        del thing
    except NameError as e:
        print("Couldn't delete {0}".format(thing))
        print(e)
        continue

In [None]:
pd.set_option('display.max_rows', 500)

In [2]:
def nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
    """Find the nearest point and return the corresponding value from specified column."""
    # Find the geometry that is closest
    nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
    #nearest = df2[geom2_col] == nearest_points(df1, df2)[1]
    # Get the corresponding value from df2 (matching is based on the geometry)
    value = df2[nearest][src_column].get_values()[0]
    return value

In [30]:
def geod2utm(row):
    '''   Convert geodetic coordinates to UTM   '''
    #if zn == None :
    #    zn = lon2zone (lon)
    zn = '16T'
    datum = 'WGS84'
    lat = row['lat']
    lon = row['lon']
        
    p = pyproj.Proj(proj='utm', zone=zn, ellps=datum)
    
    X, Y = p(lon, lat)
    
    #   Return Y, X, Z
    # return Y, X, elev
    return pd.Series({'UTMx': X, 'UTMy': Y})

In [46]:
def make_utm_points(row):
    UTMx = row['UTMx']
    UTMy = row['UTMy']
    UTMPoint = shapely.geometry.Point(UTMx, UTMy)
    return UTMPoint

In [4]:
crimes = pd.read_pickle("crimes.pkl")
#crimes_header = ['ID', 'case number', 'date', 'block', 'iucr', 'primary type', 'desc', 'locdesc',
#                 'arrest', 'domestic', 'beat', 'district', 'ward', 'community area', 'fbi code',
#                 'x coord', 'y coord', 'year', 'updated on', 'lat', 'lon', 'location'
#                ]
#crimes = pd.read_csv("parallel/xaa", names=crimes_header, header=None)
crimes.dropna(subset=['lat', 'lon'], inplace=True)

In [None]:
# with_incomes = pd.read_pickle("with_incomes.pkl")

In [None]:
neighborhoods = pd.read_pickle("neighborhoods.pkl")

In [62]:
schools = pd.read_pickle("schools.pkl")

In [63]:
schools.rename(columns={'X': 'lon', 'Y': 'lat'}, inplace=True)
schools.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 688 entries, 4610 to 4210
Data columns (total 9 columns):
geometry     688 non-null object
SCHOOL_NM    688 non-null object
SCHOOL_ID    688 non-null int64
SCH_ADDR     688 non-null object
GRADE_CAT    688 non-null object
SCH_TYPE     688 non-null object
lon          688 non-null float64
lat          688 non-null float64
UNIT_ID      688 non-null int64
dtypes: float64(2), int64(2), object(5)
memory usage: 53.8+ KB


In [64]:
schools['UTMx'] = np.zeros(len(schools))
schools['UTMy'] = np.zeros(len(schools))
schools.loc[:, ('UTMx', 'UTMy')] = schools.apply(geod2utm, axis=1)
schools['UTMPoint'] = schools.apply(make_utm_points, axis=1)
schools.head()

Unnamed: 0_level_0,geometry,SCHOOL_NM,SCHOOL_ID,SCH_ADDR,GRADE_CAT,SCH_TYPE,lon,lat,UNIT_ID,UTMx,UTMy,UTMPoint
UNIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4610,POINT (-87.5748539059 41.7483660139),MANN,610052,8050 S CHAPPEL AV,ES,Elementary School,-87.574854,41.748366,4610,452204.771983,4621998.0,POINT (452204.7719826915 4621997.860158128)
5180,POINT (-87.57284397069999 41.768650177),OKEEFFE,610103,6940 S MERRILL AV,ES,Elementary School,-87.572844,41.76865,5180,452386.881166,4624249.0,POINT (452386.881166386 4624248.790945017)
5300,POINT (-87.5829920307 41.768594283),PARKSIDE,610116,6938 S EAST END AV,ES,Elementary School,-87.582992,41.768594,5300,451543.358669,4624248.0,POINT (451543.3586693273 4624248.25277694)
5440,POINT (-87.60800008530001 41.740365734),PIRIE,610130,650 E 85TH ST,ES,Elementary School,-87.608,41.740366,5440,449442.603124,4621129.0,POINT (449442.6031235668 4621128.574748854)
5610,POINT (-87.60015764629999 41.764223475),REVERE,610146,1010 E 72ND ST,ES,Elementary School,-87.600158,41.764223,5610,450113.209645,4623773.0,POINT (450113.2096448886 4623772.796898597)


In [7]:
neighborhoods['area_numbe'] = neighborhoods['area_numbe'].astype('float64')

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 10 columns):
community     77 non-null object
area          77 non-null object
shape_area    77 non-null object
perimeter     77 non-null object
area_num_1    77 non-null object
area_numbe    77 non-null float64
comarea_id    77 non-null object
comarea       77 non-null object
shape_len     77 non-null object
geometry      77 non-null object
dtypes: float64(1), object(9)
memory usage: 6.1+ KB


In [19]:
crimes = (crimes.merge(neighborhoods[['community', 'area_numbe']], left_on='community area', right_on='area_numbe'))
del crimes['area_numbe']

In [31]:
crimes['UTMx'] = np.zeros(len(crimes))
crimes['UTMy'] = np.zeros(len(crimes))
crimes.loc[:, ('UTMx', 'UTMy')] = crimes.apply(geod2utm, axis=1)
crimes.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,x coord,y coord,year,updated on,lat,lon,location,community,UTMx,UTMy
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,1136960.0,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,1138173.0,1922746.0,2001,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,1139616.0,1926816.0,2010,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,1138176.0,1921375.0,2010,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,1143808.0,1920986.0,2010,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0


In [47]:
crimes['UTMPoint'] = crimes.apply(make_utm_points, axis=1)

In [48]:
crimes.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,y coord,year,updated on,lat,lon,location,community,UTMx,UTMy,UTMPoint
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,1926315.0,2011,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981)
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,1922746.0,2001,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577)
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,1926816.0,2010,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0,POINT (436836.8507415869 4645093.21940705)
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,1921375.0,2010,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0,POINT (436372.3575166143 4643442.41025281)
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,1920986.0,2010,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0,POINT (438086.3348712226 4643297.20981963)


In [49]:
crimes_extract = crimes.iloc[0:100]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 99
Data columns (total 26 columns):
ID                100 non-null int64
case number       100 non-null object
date              100 non-null object
block             100 non-null object
iucr              100 non-null object
primary type      100 non-null object
desc              100 non-null object
locdesc           100 non-null object
arrest            100 non-null bool
domestic          100 non-null bool
beat              100 non-null int64
district          100 non-null int64
ward              100 non-null int64
community area    100 non-null float64
fbi code          100 non-null object
x coord           100 non-null float64
y coord           100 non-null float64
year              100 non-null int64
updated on        100 non-null object
lat               100 non-null float64
lon               100 non-null float64
location          100 non-null object
community         100 non-null object
UTMx              100 non-

In [50]:
crimes.to_pickle('crimes-transformed.pkl')