In [1]:
import matplotlib as plot
import pymongo as pm
import numpy as np
import json
import csv
from geopy import distance

Connect to Databases

In [2]:
connection = pm.MongoClient()
buffalo = connection.buffalo

Read in data

In [3]:
def readGeoJson(FILE_PATH):
    with open(FILE_PATH, 'r') as f:
        return json.load(f)

In [4]:
def ingestGeoJsonFeatureToMongoDB(geojson, collection):
    for feature in geojson['features']:
        collection.insert_one(feature)

In [5]:
def readCSV(FILE_PATH):
    CSV = []
    with open(FILE_PATH, 'r') as F:
        CSV = list(csv.reader(F, quotechar='"', delimiter=','))
    return np.array(CSV)

In [6]:
directory = "/home/apjansing/Documents/Hack Upstate/Buffalo/data/"
camerasData = readGeoJson(directory+"Buffalo Police Department Camera Locations.geojson")
policeDistricts = readGeoJson(directory+"Police Districts.geojson")
streetsData = readGeoJson(directory+"Streets.geojson")
crimeData = readCSV(directory+"Crime_Incidents.csv")

Ingest data to Collections

In [7]:
buffalo.cameras.drop()
buffalo.cameras.create_index([("geometry",  "2dsphere")])
ingestGeoJsonFeatureToMongoDB(camerasData, buffalo.cameras)

In [8]:
buffalo.districts.drop()
buffalo.districts.create_index([("geometry", "2dsphere")])
ingestGeoJsonFeatureToMongoDB(policeDistricts, buffalo.districts)

In [9]:
buffalo.streets.drop()
buffalo.streets.create_index([("geometry", "2dsphere")])
ingestGeoJsonFeatureToMongoDB(streetsData, buffalo.streets)

In [11]:
def getJson(keys, values):
    payload = {}
    for i,j in zip(keys,values):
        try:
            payload[i] = float(j)
        except:
            payload[i] = j   
    return payload

In [13]:
'''
['incident_id' 'case_number' 'incident_datetime' 'incident_type_primary'
 'incident_description' 'clearance_type' 'address_1' 'address_2' 'city'
 'state' 'zip' 'country' 'latitude' 'longitude' 'created_at' 'updated_at'
 'location' 'hour_of_day' 'day_of_week' 'parent_incident_type']
 '''
buffalo.crimes.drop()
buffalo.crimes.create_index([("location", pm.GEO2D)])
keys = crimeData[0]
# print keys
for i in range(1, len(crimeData)):
    payload = getJson(keys, crimeData[i])
    payload['location'] = str(payload['longitude']) + ',' + str(payload['latitude'] )
    buffalo.crimes.insert_one(payload)

Find some metrics about the streets.

In [48]:
'''
Knowing that you're receiving location points as (long, lat) and you need to swap them.
'''
def getLength(loc1, loc2):
    return distance.distance((loc1[1], loc1[0]), (loc2[1], loc2[0])).miles

In [74]:
cursor = buffalo.streets.find()
while cursor.alive:
    length = 0
    token = cursor.next()
    coords = token['geometry']['coordinates']
    for i in range(len(coords)):
        for j in range(len(coords[i])):
            for k in range(j+1, len(coords[i])):
                length += getLength(coords[i][j], coords[i][k])
    buffalo.streets.update_one( {"_id":token['_id']}, {"$set": { "streetLength": length }} )


Find where and how close the closest police camera is

In [77]:
cursor = buffalo.crimes.find()
while cursor.alive:
    token = cursor.next()
    loc = token['location'].split(',')
    loc[0] = float(loc[0])
    loc[1] = float(loc[1])
    near = {"geometry": {"$near": {"$geometry": {"type": "Point" ,"coordinates": loc } } } }
    near = buffalo.cameras.find(near).limit(1).next()
    nearLoc = near['geometry']['coordinates']
    closestCamera = distance.distance((nearLoc[1], nearLoc[0]), (loc[1], loc[0])).miles
    buffalo.crimes.update_one( {"_id":token['_id']}, {"$set": { "closestCamera": closestCamera }} )

Find where and how close the closest police camera is

In [190]:
cursor = buffalo.crimes.find()
i = 0
while cursor.alive and i < 100:
    i+=1
    token = cursor.next()
    loc = token['location'].split(',')
    loc[0] = float(loc[0])
    loc[1] = float(loc[1])
    near = {"geometry": {"$near": {"$geometry": {"type": "Point" ,"coordinates": loc } } } }
    near = buffalo.cameras.find(near).limit(1).next()
    nearLoc = near['geometry']['coordinates']
#     distanceFromClosestCamera = np.linalg.norm(np.array(loc) - np.array(near['geometry']['coordinates']))
    if distance.distance((nearLoc[1], nearLoc[0]), (loc[1], loc[0])).miles > 50:
        print token

{u'incident_id': 765848097.0, u'incident_datetime': u'07/20/2016 11:10:00 AM', u'longitude': -77.2971053, u'location': u'-77.2971053,42.89514', u'zip': 14424.0, u'city': u'BUFFALO', u'incident_description': u'THEFT OF SERVICES', u'created_at': u'07/23/2016 03:58:48 AM', u'case_number': u'16-2020414', u'parent_incident_type': u'Theft', u'updated_at': u'07/27/2016 06:08:06 AM', u'day_of_week': u'Wednesday', u'clearance_type': u'', u'state': u'NY', u'hour_of_day': 11.0, u'address_2': u'', u'country': u'', u'latitude': 42.89514, u'incident_type_primary': u'THEFT OF SERVICES', u'_id': ObjectId('5ad2d0c66c06a0136a3d6c98'), u'address_1': u'PEARL & ST'}
{u'incident_id': 68952074.0, u'incident_datetime': u'05/28/2011 10:00:00 AM', u'longitude': 0.0, u'location': u'0.0,0.0', u'zip': u'null', u'city': u'BUFFALO', u'incident_description': u'Buffalo Police are investigating this report of a crime. It is important to note that this is very preliminary information and further investigation as to the 