# Police stations: Police_Stations_-_Map.csv
# Libraries: Libraries_-Locations_Hours_and_Contact_Information.csv
# Schools: Chicago Public Schools - School Loactions SY1011.geojson
# Todo: for each police report, find distance to nearest station, to nearest library, and to nearest school

In [1]:
import csv
import time
#from collections import namedtuple
import geopandas as gpd
import pandas as pd
import shapely
from shapely.ops import nearest_points
import numpy as np
from scipy import ndimage

#import matplotlib
#from matplotlib.pyplot import figure
#import matplotlib.pyplot as plt
#from matplotlib import cm
#from matplotlib.ticker import LinearLocator, FormatStrFormatter
#from matplotlib.colors import ListedColormap
#from matplotlib.ticker import MaxNLocator

In [2]:
# Do housekeeping between runs
objects = ['neighborhoods',
           'schools',
           'crimes_extract',
           'crimes_header',
           'crimes',
           'crimes2'
          ]

for thing in objects:
    try:
        del thing
    except NameError as e:
        print("Couldn't delete {0}".format(thing))
        print(e)
        continue

In [3]:
def distance_to_nearest(row, geom_union, df1, df2, geom1_col='geometry', geom2_col='geometry', src_column=None):
    """Find the nearest point and return the corresponding value from specified column."""
    # Find the geometry that is closest
    nearest = df2[geom2_col] == nearest_points(row[geom1_col], geom_union)[1]
    # Get the corresponding value from df2 (matching is based on the geometry)
    nearest_id = df2[nearest][src_column].get_values()[0]
    start_point: shapely.geometry.Point = row[geom1_col]
    end_point_series: pd.Series = df2[nearest][geom2_col]
    end_point: shapely.geometry.Point = end_point_series.iloc[0]
    dist_to_point = start_point.distance(end_point)
    return nearest_id, dist_to_point

In [4]:
crimes = pd.read_pickle("crimes-transformed.pkl")
neighborhoods = pd.read_pickle("neighborhoods-transformed.pkl")

In [5]:
schools = pd.read_pickle('schools-transformed.pkl')

In [6]:
# Create unary_union object from the schools dataset
schools_unary_union = schools.unary_union

In [7]:
# Locate the nearest school to each reported crime's location
start_time = time.time()
unpackdf = pd.DataFrame(crimes.apply(distance_to_nearest,
                                     geom_union=schools_unary_union,
                                     df1=crimes,
                                     df2=schools,
                                     geom1_col='UTMPoint',
                                     geom2_col='geometry',
                                     src_column='UNIT_ID',
                                     axis=1
                                    ).tolist(), columns = ['nearest_school_id', 'nearest_school_distance'], index=crimes.index)
crimes = pd.concat([unpackdf, crimes], axis=1)
end_time = time.time()
print("That took {0} seconds".format(end_time - start_time))

That took 14.41885495185852 seconds


In [9]:
crimes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1147 entries, 0 to 1146
Data columns (total 28 columns):
nearest_school_id          1147 non-null int64
nearest_school_distance    1147 non-null float64
ID                         1147 non-null int64
case number                1147 non-null object
date                       1147 non-null object
block                      1147 non-null object
iucr                       1147 non-null object
primary type               1147 non-null object
desc                       1147 non-null object
locdesc                    1147 non-null object
arrest                     1147 non-null bool
domestic                   1147 non-null bool
beat                       1147 non-null int64
district                   1147 non-null int64
ward                       1147 non-null int64
community area             1147 non-null float64
fbi code                   1147 non-null object
x coord                    1147 non-null float64
y coord                    1147 non