## Use `rtree` to speed intersections and nearest-neighbor search
more info at: http://toblerity.org/rtree/

In [1]:
import time
from shapely.geometry import Point
from rtree import index
import pandas as pd
import matplotlib.pyplot as plt
import mplleaflet
from GISio import shp2df

% matplotlib inline

In [2]:
df = pd.read_csv('data/MSNcrime16.csv')
df.dropna(axis=0, subset=['longitude', 'latitude'], inplace=True)

msn = shp2df('data/ZillowNeighborhoods-WI/ZillowNeighborhoods-WI.shp')


reading data/ZillowNeighborhoods-WI/ZillowNeighborhoods-WI.shp...
--> building dataframe... (may take a while for large shapefiles)


  interactivity=interactivity, compiler=compiler, result=result)


### Set up the spatial index
* the `Index` object works with bounding boxes, regardless if the feature is a point  ,
(accepts tuples of (minx, miny, maxx, maxy))

In [3]:
points = zip(df.longitude, df.latitude, df.longitude, df.latitude)

# build spatial index
ta = time.time()
idx = index.Index()
for i, p in enumerate(points):
    idx.insert(i, p)
print("finished in {:.2f}".format(time.time() - ta))

finished in 5.84


### Query the index
get the indices of the lat, lon values that intersect one of the polygons

In [4]:
print(list(idx.intersection(msn.geometry[0].bounds)))

[87005, 49350, 32291, 51062, 15138, 46997, 53741, 56744, 60758, 61134, 70411, 74495, 81634, 82883, 85155, 85156, 85164, 95158, 96751, 78657, 37002, 36054, 40780, 37954, 34233, 18891, 34583, 51027, 49322, 40656, 16124, 15173, 4097, 14554, 70119, 76726, 78422, 82743, 83858, 88995, 96993, 10713, 41741, 85150, 67810, 67825, 62981, 1458, 6530, 81761, 41199, 73456, 69831, 6778, 7429, 19041, 96994, 97424, 23978, 21452, 20673, 25401, 28334, 28343, 28984, 29379, 29454, 29505, 30594, 31758]


#### get point intersections for all polygons

In [5]:
points = [Point(p) for p in zip(df.longitude, df.latitude)]
polygons = msn.geometry.tolist()
intersections = []

ta = time.time()
for pind, p in enumerate(polygons):
    print('\r{}'.format(pind + 1), end='')
    
    # test for intersection with bounding box of each polygon feature in geom2 using spatial index
    inds = [i for i in idx.intersection(p.bounds)]
    
    # test each feature inside the bounding box for intersection with the polygon geometry
    inds = [i for i in inds if points[i].intersects(p)]
    intersections.append(inds)
print("\nfinished in {:.2f}s\n".format(time.time() - ta))

314
finished in 5.99s



### Find the nearest neighbors to a point

get the 100 crimes committed closest to the Wisconsin State Capitol in 2016

In [18]:
nearest = list(idx.nearest((-89.384100, 43.074686, -89.384100, 43.074686), 100))
print(nearest)

[83691, 1486, 3102, 88928, 58439, 75557, 83925, 94971, 99635, 72877, 25754, 96937, 47488, 53765, 4772, 15574, 8127, 20160, 45803, 67109, 54077, 68431, 61309, 62899, 64362, 49919, 9134, 52322, 54076, 36725, 48212, 41337, 41887, 37042, 35466, 37353, 48266, 38882, 45772, 44259, 37982, 44729, 34551, 4775, 84708, 85286, 88833, 28772, 12280, 35367, 32402, 38623, 69005, 35279, 74059, 77039, 48887, 10788, 15257, 97083, 100354, 98710, 95186, 51436, 59906, 62392, 71322, 92500, 67736, 88420, 75592, 68546, 50111, 55211, 23830, 52665, 10641, 53379, 38138, 3022, 23883, 83268, 87971, 34731, 52835, 34732, 29260, 57749, 91405, 21205, 5437, 1872, 91849, 90119, 83033, 86367, 66286, 68328, 56657, 35297, 54259, 56645, 50389, 45441, 35296, 18697]


In [19]:
dfn = df.iloc[nearest]
dfn.head()

Unnamed: 0,incident_id,case_number,incident_datetime,incident_type_primary,incident_description,clearance_type,address_1,address_2,city,state,...,created_at,updated_at,location,hour_of_day,day_of_week,parent_incident_type,City of Madison Sector Shapes - 54pa-kaun,City of Madison Aldermanic District Shapes - w9zi-qe9k,City of Madison Police District Shapes - ygny-ea98,geometry
83706,776882650,-7666752,10/12/2016 09:29:23 AM,Stolen Auto,Stolen Auto,,1 Block W MIFFLIN ST,,MADISON,WI,...,10/13/2016 09:02:50 AM,10/26/2016 11:09:31 AM,POINT (-89.3841659 43.074698),9,Wednesday,Theft of Vehicle,190315.0,190376.0,190262.0,POINT (305902.2853413947 4771868.762619643)
1486,764368083,-7534539,07/07/2016 12:12:31 PM,Foot Patrol,Foot Patrol,,1 Block E MAIN ST,,MADISON,WI,...,07/08/2016 08:54:07 AM,07/21/2016 11:07:35 AM,POINT (-89.3841678 43.074691),12,Thursday,Community Policing,190315.0,190376.0,190262.0,POINT (305902.1085487101 4771867.989623718)
3102,764514856,-7537639,07/09/2016 02:41:38 PM,Check Property,Check Property,,1 Block E MAIN ST,,MADISON,WI,...,07/10/2016 08:56:33 AM,07/23/2016 11:14:14 AM,POINT (-89.3841678 43.074691),14,Saturday,Community Policing,190315.0,190376.0,190262.0,POINT (305902.1085487101 4771867.989623718)
88945,772711673,-7637406,09/19/2016 12:18:20 PM,Check Person,Check Person,,1 Block N PINCKNEY ST,,MADISON,WI,...,09/20/2016 09:04:05 AM,10/03/2016 11:07:12 AM,POINT (-89.3831815 43.0755258),12,Monday,Community Policing,190315.0,190376.0,190262.0,POINT (305985.0427126487 4771958.416625443)
58448,754605973,-7400330,03/28/2016 10:22:40 AM,Check Person,Check Person,,100 Block MARTIN LUTHER KING JR BLVD,,MADISON,WI,...,03/29/2016 08:38:10 AM,04/11/2016 11:09:38 AM,POINT (-89.3831282 43.0738716),10,Monday,Community Policing,190320.0,190376.0,190262.0,POINT (305984.1605091513 4771774.583873359)


### Use `mplleaflet` to display the incidents closest to the capitol
can be installed with the conda package manager
```
>conda install mplleaflet
```

In [20]:
fig, ax = plt.subplots()
ax.scatter(dfn.longitude, dfn.latitude)
mplleaflet.display()