In [1]:
# Imports first
import csv
import time
from math import sqrt
import geopandas as gpd
import pandas as pd
import shapely
from shapely.ops import nearest_points
import numpy as np
from scipy import ndimage
from scipy.spatial import cKDTree  
import pyproj

In [2]:
# Set notebook display options
pd.set_option('display.max_rows', 500)

In [3]:
# Do housekeeping between runs
objects = ['neighborhoods',
           'schools',
           'schools_df',
           'schools_unary_union',
           'crimes_extract',
           'crimes_header',
           'crimes_df',
           'crimes']

for thing in objects:
    try:
        del thing
    except NameError as e:
        print("Couldn't delete {0}".format(thing))
        print(e)
        continue

In [4]:
def distance_to_nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
     """Find the nearest point and return the corresponding value from specified column."""
     # Find the geometry that is closest
     nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
     # Get the corresponding value from df2 (matching is based on the geometry)
     nearest_id = df2[nearest][src_column].get_values()[0]
     start_point: shapely.geometry.Point = row[geom1_col]
     end_point_series: pd.Series = df2[nearest][geom2_col]
     end_point: shapely.geometry.Point = end_point_series.iloc[0]
     dist_to_point = start_point.distance(end_point)
     return nearest_id, dist_to_point

In [5]:
def geod2utm(row):
    '''   Convert geodetic coordinates to UTM   '''
    #if zn == None :
    #    zn = lon2zone (lon)
    zn = '16T'
    datum = 'WGS84'
    lat = row['lat']
    lon = row['lon']
        
    p = pyproj.Proj(proj='utm', zone=zn, ellps=datum)
    
    X, Y = p(lon, lat)
    
    #   Return Y, X, Z
    # return Y, X, elev
    return pd.Series({'UTMx': X, 'UTMy': Y})

In [6]:
def make_utm_points(row):
    UTMx = row['UTMx']
    UTMy = row['UTMy']
    UTMPoint = shapely.geometry.Point(UTMx, UTMy)
    return UTMPoint

# Enhance crimes data with nearest school ID and distance to it

In [7]:
schools_df = pd.read_pickle("schools-transformed.pkl")

In [9]:
crimes_df = pd.read_pickle('crimes-transformed.pkl')

In [10]:
crimes = gpd.GeoDataFrame(crimes_df, geometry='UTMPoint')

In [13]:
crimes.crs = {'init' :'epsg:2966'}

In [14]:
schools_df.set_index("UNIT_ID")
schools_df['UNIT_ID'] = schools_df.index
schools_df.head()

Unnamed: 0_level_0,geodesic geometry,SCHOOL_NM,SCHOOL_ID,SCH_ADDR,GRADE_CAT,SCH_TYPE,lon,lat,UNIT_ID,UTMx,UTMy,geometry
UNIT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4610,POINT (-87.5748539059 41.7483660139),MANN,610052,8050 S CHAPPEL AV,ES,Elementary School,-87.574854,41.748366,4610,452204.771983,4621998.0,POINT (452204.7719826915 4621997.860158128)
5180,POINT (-87.57284397069999 41.768650177),OKEEFFE,610103,6940 S MERRILL AV,ES,Elementary School,-87.572844,41.76865,5180,452386.881166,4624249.0,POINT (452386.881166386 4624248.790945017)
5300,POINT (-87.5829920307 41.768594283),PARKSIDE,610116,6938 S EAST END AV,ES,Elementary School,-87.582992,41.768594,5300,451543.358669,4624248.0,POINT (451543.3586693273 4624248.25277694)
5440,POINT (-87.60800008530001 41.740365734),PIRIE,610130,650 E 85TH ST,ES,Elementary School,-87.608,41.740366,5440,449442.603124,4621129.0,POINT (449442.6031235668 4621128.574748854)
5610,POINT (-87.60015764629999 41.764223475),REVERE,610146,1010 E 72ND ST,ES,Elementary School,-87.600158,41.764223,5610,450113.209645,4623773.0,POINT (450113.2096448886 4623772.796898597)


In [15]:
schools = gpd.GeoDataFrame(schools_df)

In [16]:
schools_unary_union = schools.unary_union

In [None]:
start_time = time.time()
crimes['nearest_school_id'], crimes['nearest_school_distance'] = pd.DataFrame(crimes.apply(distance_to_nearest,
                                     geom_union=schools_unary_union,
                                     df1=crimes,
                                     df2=schools,
                                     geom1_col='UTMPoint',
                                     geom2_col='geometry',
                                     src_column='UNIT_ID',
                                     axis=1
                                    ).tolist(), columns = ['nearest_school_id', 'nearest_school_distance'], index=crimes.index)
end_time = time.time()
print("That took {0} seconds".format(end_time - start_time))