In [23]:
import csv
import time
import sys
import pandas as pd
import geopandas as gpd
import shapely
import numpy as np
from shapely.ops import nearest_points
from scipy import spatial
from pyproj import Proj

In [3]:
 prefix = 'parallel'
 filename = 'xaa'
 colnames = ['record id', 'case no', 'date', 'block', 'iucr','primary type', 'description',
             'location description', 'arrest', 'domestic', 'beat', 'district', 'ward', 'community area',
             'fbi code', 'x coordinate', 'y coordinate', 'year', 'updated on', 'latitude',
             'longitude', 'location']
 crimesdf = pd.read_csv("{0}/{1}".format(prefix, filename), names=colnames, header=None)
 crimesdf.dropna(inplace=True)
 print(crimesdf.shape)
 del crimesdf['block']
 del crimesdf['iucr']
 del crimesdf['domestic']
 del crimesdf['beat']
 del crimesdf['district']
 del crimesdf['fbi code']
 del crimesdf['x coordinate']
 del crimesdf['y coordinate']
 del crimesdf['year']
 del crimesdf['updated on']
 del crimesdf['location']
 
 crimesdf['geometry'] = list(zip(crimesdf['latitude'], crimesdf['longitude']))
 crimesdf['geometry'] = crimesdf['geometry'].apply(shapely.geometry.Point)
 crimesgpd = gpd.GeoDataFrame(crimesdf)
 print(crimesgpd.iloc[0])

(1147, 22)
record id                                         8265244
case no                                          HT498458
date                               09/15/2011 08:00:00 AM
primary type                                     BURGLARY
description                                UNLAWFUL ENTRY
location description                            RESIDENCE
arrest                                               True
ward                                                   38
community area                                         15
latitude                                           41.954
longitude                                        -87.7719
geometry                POINT (41.95396528 -87.771918163)
Name: 0, dtype: object


In [4]:
# Read in the school locations data, fix the type of the UNIT_ID column, create shapely points from X/Y
# and create the geopandas DataFrame
schools = pd.read_csv("school-locations-2010-2011.csv", index_col=2)
schools['UNIT_ID'] = schools.index
schools = schools.astype({'UNIT_ID': int})
schools['geometry'] = list(zip(schools['X'], schools['Y']))
schools['geometry'] = schools['geometry'].apply(shapely.geometry.Point)
schools = gpd.GeoDataFrame(schools)
schools.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 688 entries, 4610 to 4210
Data columns (total 10 columns):
geometry     688 non-null object
SCHOOL_NM    688 non-null object
SCHOOL_ID    688 non-null int64
SCH_ADDR     688 non-null object
GRADE_CAT    688 non-null object
GRADES       688 non-null object
SCH_TYPE     688 non-null object
X            688 non-null float64
Y            688 non-null float64
UNIT_ID      688 non-null int64
dtypes: float64(2), int64(2), object(6)
memory usage: 59.1+ KB


In [34]:
crimes_df = pd.read_pickle('crimes-transformed.pkl')
schools_df = pd.read_pickle('schools-transformed.pkl')

In [33]:
crimesgpd = gpd.GeoDataFrame(crimes_df)
crimesgpd.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 721011 entries, 0 to 721010
Data columns (total 26 columns):
ID                721011 non-null int64
case number       721010 non-null object
date              721011 non-null object
block             721011 non-null object
iucr              721011 non-null object
primary type      721011 non-null object
desc              721011 non-null object
locdesc           720782 non-null object
arrest            721011 non-null bool
domestic          721011 non-null bool
beat              721011 non-null int64
district          721011 non-null int64
ward              720979 non-null float64
community area    721011 non-null float64
fbi code          721011 non-null object
x coord           721011 non-null float64
y coord           721011 non-null float64
year              721011 non-null int64
updated on        721011 non-null object
lat               721011 non-null float64
lon               721011 non-null float64
location          7210

In [35]:
schools = gpd.GeoDataFrame(schools_df)

In [36]:
schools.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 688 entries, 4610 to 4210
Data columns (total 12 columns):
geodesic geometry    688 non-null object
SCHOOL_NM            688 non-null object
SCHOOL_ID            688 non-null int64
SCH_ADDR             688 non-null object
GRADE_CAT            688 non-null object
SCH_TYPE             688 non-null object
lon                  688 non-null float64
lat                  688 non-null float64
UNIT_ID              688 non-null int64
UTMx                 688 non-null float64
UTMy                 688 non-null float64
geometry             688 non-null object
dtypes: float64(4), int64(2), object(6)
memory usage: 69.9+ KB


In [97]:
 start_time = time.time()
 D = spatial.distance_matrix([[pt.x, pt.y] for pt in crimesgpd['UTMPoint']], [[pt.x, pt.y] for pt in schools['geometry']])
 nn = np.array([[np.min(D[i,]), np.argmin(D[i,])] for i in range(crimesgpd['UTMPoint'].shape[0])])
 crimesgpd['school distance'] = nn[:,0]
 crimesgpd['nearest school'] = nn[:,1]
 end_time = time.time()
 print("That took {0} seconds".format(end_time - start_time))


That took 52.9865837097168 seconds


In [98]:
crimesgpd.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,lon,location,community name,UTMx,UTMy,UTMPoint,scipy distance,scipy distance2,school distance,nearest school
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981),762.689082,0.0,762.689082,573.0
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577),175.3059,0.0,175.3059,571.0
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0,POINT (436836.8507415869 4645093.21940705),213.404085,0.0,213.404085,569.0
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0,POINT (436372.3575166143 4643442.41025281),308.147116,0.0,308.147116,571.0
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0,POINT (438086.3348712226 4643297.20981963),436.027666,0.0,436.027666,559.0


In [90]:
np.argmin(D[1,])

571

In [91]:
schools.iloc[571]

geodesic geometry       POINT (-87.76894425340001 41.9429654734)
SCHOOL_NM                                               REINBERG
SCHOOL_ID                                                 610145
SCH_ADDR                                         3425 N MAJOR AV
GRADE_CAT                                                     ES
SCH_TYPE                                       Elementary School
lon                                                     -87.7689
lat                                                       41.943
UNIT_ID                                                     5600
UTMx                                                      436261
UTMy                                                 4.64373e+06
geometry             POINT (436260.8235323379 4643729.664174551)
Name: 5600, dtype: object

In [96]:
crimes_extract_df[crimes_extract_df['ID'] == 7357772]

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,updated on,lat,lon,location,community name,UTMx,UTMy,UTMPoint,nearest_school_id,nearest_school_distance
22,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577),5600,175.3059


In [92]:
crimes_extract_df[crimes_extract_df['ID'] == 7357772]

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,updated on,lat,lon,location,community name,UTMx,UTMy,UTMPoint,scipy distance,scipy distance2
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981),762.689082,0.0
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,02/18/2010 01:12:55 AM,41.94415,-87.767546,"(41.9441497, -87.767545572)",PORTAGE PARK,436377.940882,4643860.0,POINT (436377.940881997 4643860.108357577),175.3059,0.0
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,02/04/2016 06:33:39 AM,41.955292,-87.762142,"(41.95529191, -87.762141971)",PORTAGE PARK,436836.850742,4645093.0,POINT (436836.8507415869 4645093.21940705),213.404085,0.0
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,02/04/2016 06:33:39 AM,41.940387,-87.767568,"(41.940387482, -87.767567805)",PORTAGE PARK,436372.357517,4643442.0,POINT (436372.3575166143 4643442.41025281),308.147116,0.0
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,02/04/2016 06:33:39 AM,41.939216,-87.746878,"(41.939216164, -87.746877814)",PORTAGE PARK,438086.334871,4643297.0,POINT (438086.3348712226 4643297.20981963),436.027666,0.0


In [41]:
crimes_extract_df = pd.read_pickle('crimes_extract.pkl')

In [93]:
crimes_extract_df.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,updated on,lat,lon,location,community name,UTMx,UTMy,UTMPoint,nearest_school_id,nearest_school_distance
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981),6190,762.689082
1,4991465,HM602607,01/01/2001 12:00:00 PM,065XX N SHERIDAN RD,1140,DECEPTIVE PRACTICE,EMBEZZLEMENT,"SCHOOL, PRIVATE, GROUNDS",True,False,...,06/02/2010 10:34:17 AM,42.001679,-87.660604,"(42.001678618, -87.660604177)",ROGERS PARK,445291.642241,4650174.0,POINT (445291.6422413067 4650173.646103563),4300,675.350954
2,5147738,HM740459,08/01/2001 09:00:00 PM,050XX S JUSTINE ST,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,...,06/02/2010 10:34:17 AM,41.80222,-87.663439,"(41.802219519, -87.663439163)",NEW CITY,444885.595462,4628030.0,POINT (444885.5954621887 4628029.962462749),1110,118.108366
3,5281401,G114422,02/26/2001 07:46:31 PM,005XX E 51ST ST,5000,OTHER OFFENSE,OTHER CRIME AGAINST PERSON,HOSPITAL BUILDING/GROUNDS,False,False,...,02/18/2010 01:12:55 AM,41.802238,-87.613919,"(41.802238398, -87.613918511)",GRAND BOULEVARD,448999.497171,4628001.0,POINT (448999.4971705653 4628001.490274356),1600,166.988105
4,5622147,HN422454,12/01/2001 12:01:00 AM,027XX N LAWNDALE AVE,1753,OFFENSE INVOLVING CHILDREN,SEX ASSLT OF CHILD BY FAM MBR,RESIDENCE,False,True,...,03/11/2010 03:22:37 PM,41.931045,-87.719769,"(41.931044901, -87.719768573)",LOGAN SQUARE,440325.996137,4642371.0,POINT (440325.9961369195 4642370.741977295),4850,129.590417


In [42]:
crimesgpd_extract = crimesgpd.iloc[0:100,]

In [82]:
crimesgpd_extract.join(crimes_extract_df,
                       rsuffix='_r',
                       lsuffix='_l',
                       how='left',
                       on='ID'
                      )#[['case number_l', 'case number_r', 'nearest_school_id', 'nearest_school_distance', 'scipy distance']]

Unnamed: 0,ID_l,case number_l,date_l,block_l,iucr_l,primary type_l,desc_l,locdesc_l,arrest_l,domestic_l,...,updated on_r,lat_r,lon_r,location_r,community name_r,UTMx_r,UTMy_r,UTMPoint_r,nearest_school_id,nearest_school_distance
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,0620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,,,,,,,,,,
1,7357772,HS159408,07/01/2001 10:00:00 AM,056XX W CORNELIA AVE,0840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,,,,,,,,,,
2,7609828,HS413731,07/13/2010 12:00:00 PM,041XX N LONG AVE,0820,THEFT,$500 AND UNDER,PARK PROPERTY,False,False,...,,,,,,,,,,
3,7610439,HS413034,07/16/2010 03:00:00 AM,056XX W SCHOOL ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,...,,,,,,,,,,
4,7610466,HS414753,07/17/2010 12:25:00 AM,032XX N CICERO AVE,1506,PROSTITUTION,SOLICIT ON PUBLIC WAY,ALLEY,True,False,...,,,,,,,,,,
5,7614778,HS419653,07/19/2010 11:02:00 PM,032XX N CENTRAL AVE,2023,NARCOTICS,POSS: HEROIN(BRN/TAN),STREET,True,False,...,,,,,,,,,,
6,7615365,HS408946,07/13/2010 06:00:00 AM,048XX W BELLE PLAINE AVE,0910,MOTOR VEHICLE THEFT,AUTOMOBILE,STREET,False,False,...,,,,,,,,,,
7,7616241,HS420347,07/16/2010 02:30:00 PM,054XX W DAKIN ST,2825,OTHER OFFENSE,HARASSMENT BY TELEPHONE,RESIDENCE,False,False,...,,,,,,,,,,
8,7616531,HS420343,07/20/2010 01:30:00 PM,047XX N CENTRAL AVE,0860,THEFT,RETAIL THEFT,GROCERY FOOD STORE,True,False,...,,,,,,,,,,
9,7616562,HS420657,07/20/2010 04:25:00 PM,039XX N CENTRAL AVE,1200,DECEPTIVE PRACTICE,STOLEN PROP: BUY/RECEIVE/POS.,SIDEWALK,True,False,...,,,,,,,,,,


In [69]:
crimes_extract_df.head()

Unnamed: 0,ID,case number,date,block,iucr,primary type,desc,locdesc,arrest,domestic,...,updated on,lat,lon,location,community name,UTMx,UTMy,UTMPoint,nearest_school_id,nearest_school_distance
0,8265244,HT498458,09/15/2011 08:00:00 AM,040XX N MENARD AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE,True,False,...,02/12/2018 03:46:59 PM,41.953965,-87.771918,"(41.95396528, -87.771918163)",PORTAGE PARK,436025.308686,4644953.0,POINT (436025.3086861699 4644953.17679981),6190,762.689082
1,4991465,HM602607,01/01/2001 12:00:00 PM,065XX N SHERIDAN RD,1140,DECEPTIVE PRACTICE,EMBEZZLEMENT,"SCHOOL, PRIVATE, GROUNDS",True,False,...,06/02/2010 10:34:17 AM,42.001679,-87.660604,"(42.001678618, -87.660604177)",ROGERS PARK,445291.642241,4650174.0,POINT (445291.6422413067 4650173.646103563),4300,675.350954
2,5147738,HM740459,08/01/2001 09:00:00 PM,050XX S JUSTINE ST,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,True,False,...,06/02/2010 10:34:17 AM,41.80222,-87.663439,"(41.802219519, -87.663439163)",NEW CITY,444885.595462,4628030.0,POINT (444885.5954621887 4628029.962462749),1110,118.108366
3,5281401,G114422,02/26/2001 07:46:31 PM,005XX E 51ST ST,5000,OTHER OFFENSE,OTHER CRIME AGAINST PERSON,HOSPITAL BUILDING/GROUNDS,False,False,...,02/18/2010 01:12:55 AM,41.802238,-87.613919,"(41.802238398, -87.613918511)",GRAND BOULEVARD,448999.497171,4628001.0,POINT (448999.4971705653 4628001.490274356),1600,166.988105
4,5622147,HN422454,12/01/2001 12:01:00 AM,027XX N LAWNDALE AVE,1753,OFFENSE INVOLVING CHILDREN,SEX ASSLT OF CHILD BY FAM MBR,RESIDENCE,False,True,...,03/11/2010 03:22:37 PM,41.931045,-87.719769,"(41.931044901, -87.719768573)",LOGAN SQUARE,440325.996137,4642371.0,POINT (440325.9961369195 4642370.741977295),4850,129.590417


In [61]:
schools[schools['UNIT_ID'] == 4300]

Unnamed: 0_level_0,geodesic geometry,SCHOOL_NM,SCHOOL_ID,SCH_ADDR,GRADE_CAT,SCH_TYPE,lon,lat,UNIT_ID,UTMx,UTMy,geometry
UNIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4300,POINT (-87.6682628983 42.0037675389),KILMER,610022,6700 N GREENVIEW AV,ES,Elementary School,-87.668263,42.003768,4300,444659.187544,4650411.0,POINT (444659.1875437051 4650410.500417422)


In [60]:
from math import sqrt
sqrt((436025.3086861699 - 435543.9217422368)**2 + (4644953.17679981 - 4644361.601575511)**2)

762.689081994512

In [68]:
sqrt(abs((436377.940881997 - 444659.1875437051))**2 + abs((4643860.108357577 - 4650410.500417422))**2)

10558.725415964327

In [95]:
nn[:,1]

array([0., 0., 0., ..., 0., 0., 0.])