In [2]:
#import packages
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Point
from shapely import wkt
import requests
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import urllib.request, json
import requests
import urllib.parse
import datetime
import pickle

import warnings
warnings.filterwarnings('ignore')

# Use the SODA API for NYPD Complaint Data Historic

This dataset includes all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD) from 2006 to current day. We will extract the 2019 complaints and aggregate the to the NTA level.

Important Column Names:

* cmplnt_num: unique id for each complaint

* cmplnt_fr_dt: Date of occurrence

* lat_lon: geospatial location point of incident

In [3]:
#load nta shapefile first
nta_shp = pd.read_csv('../Data/2020NTA.csv')
nta_shp['geometry'] = nta_shp['geometry'].apply(wkt.loads)
nta_shp = gpd.GeoDataFrame(nta_shp,geometry = 'geometry',crs = '4326')
nta_shp.head()

Unnamed: 0,OBJECTID,BoroCode,BoroName,CountyFIPS,NTA2020,NTAName,NTAAbbrev,NTAType,CDTA2020,CDTAName,Shape__Area,Shape__Length,geometry
0,1,3,Brooklyn,47,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),35321790.0,28914.13164,"POLYGON ((-73.93214 40.72817, -73.93238 40.728..."
1,2,3,Brooklyn,47,BK0102,Williamsburg,Wllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28862140.0,28158.316197,"POLYGON ((-73.96176 40.72524, -73.96154 40.725..."
2,3,3,Brooklyn,47,BK0103,South Williamsburg,SWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),15208960.0,18250.280262,"POLYGON ((-73.95024 40.70548, -73.94984 40.705..."
3,4,3,Brooklyn,47,BK0104,East Williamsburg,EWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),52266280.0,43184.732279,"POLYGON ((-73.92406 40.71412, -73.92404 40.714..."
4,5,3,Brooklyn,47,BK0201,Brooklyn Heights,BkHts,0,BK02,BK02 Downtown Brooklyn-Fort Greene (CD 2 Appro...,9982387.0,14312.482065,"POLYGON ((-73.99237 40.68970, -73.99436 40.690..."


In [10]:
url = 'https://data.cityofnewyork.us/resource/qgea-i56i.json'

query = (url +'?'
        "$select= cmplnt_num,cmplnt_fr_dt,lat_lon"
        "&$where=cmplnt_fr_dt between '2019-01-01' and '2019-12-31'"
        "&$limit=5000000")
query = query.replace(" ", "%20")
response = urllib.request.urlopen(query)
data = json.loads(response.read())

# for d in data:
#     d['geometry'] = shape(d['lat_lon'])
for d in data:
    lat_lon_dict = d['lat_lon']
    point = Point(float(lat_lon_dict['longitude']), float(lat_lon_dict['latitude']))
    d['geometry'] = point

In [11]:
data[0]

{'cmplnt_num': '745872483',
 'cmplnt_fr_dt': '2019-01-01T00:00:00.000',
 'lat_lon': {'latitude': '40.86503877900003',
  'longitude': '-73.92694500099998'},
 'geometry': <POINT (-73.927 40.865)>}

In [13]:
#store in a geodataframe
crime_comps = gpd.GeoDataFrame(data, geometry = 'geometry', crs = 'EPSG: 4326')
crime_comps = crime_comps.drop(columns = ['lat_lon'])

In [15]:
#Assign complaints to neighborhoods
nta_crime= gpd.sjoin(crime_comps, nta_shp, how='inner',predicate = 'within')

In [16]:
nta_crime.head()

Unnamed: 0,cmplnt_num,cmplnt_fr_dt,geometry,index_right,OBJECTID,BoroCode,BoroName,CountyFIPS,NTA2020,NTAName,NTAAbbrev,NTAType,CDTA2020,CDTAName,Shape__Area,Shape__Length
1,394332203,2019-01-01T00:00:00.000,POINT (-73.89635 40.67378),15,16,3,Brooklyn,47,BK0502,East New York (North),ENY_N,0,BK05,BK05 East New York-Cypress Hills (CD 5 Approxi...,27907170.0,25635.338533
120,215788290,2019-01-01T00:00:00.000,POINT (-73.88143 40.67111),15,16,3,Brooklyn,47,BK0502,East New York (North),ENY_N,0,BK05,BK05 East New York-Cypress Hills (CD 5 Approxi...,27907170.0,25635.338533
266,829603381,2019-01-01T00:00:00.000,POINT (-73.88143 40.67111),15,16,3,Brooklyn,47,BK0502,East New York (North),ENY_N,0,BK05,BK05 East New York-Cypress Hills (CD 5 Approxi...,27907170.0,25635.338533
460,782154177,2019-01-01T00:00:00.000,POINT (-73.89756 40.66903),15,16,3,Brooklyn,47,BK0502,East New York (North),ENY_N,0,BK05,BK05 East New York-Cypress Hills (CD 5 Approxi...,27907170.0,25635.338533
482,697507528,2019-01-01T00:00:00.000,POINT (-73.89620 40.66924),15,16,3,Brooklyn,47,BK0502,East New York (North),ENY_N,0,BK05,BK05 East New York-Cypress Hills (CD 5 Approxi...,27907170.0,25635.338533


In [24]:
nta_crime_agg = nta_crime[['NTAName','cmplnt_num']].groupby('NTAName').nunique().reset_index()
nta_crime_agg = nta_crime_agg.rename(columns = {'cmplnt_num':'Total_NYPD_Complaints'})
nta_crime_agg.sort_values(by = 'Total_NYPD_Complaints',ascending = False).head(15)

Unnamed: 0,NTAName,Total_NYPD_Complaints
136,Midtown-Times Square,8111
59,East Harlem (North),7679
135,Midtown South-Flatiron-Union Square,7126
16,Bedford-Stuyvesant (East),6727
143,Mott Haven-Port Morris,6711
97,Harlem (North),6090
17,Bedford-Stuyvesant (West),5960
26,Brownsville,5763
132,Melrose,5738
46,Crown Heights (North),5358


In [25]:
nta_crime_agg.to_csv('../Data/NTA_Aggregated_Crime_Complaints2019.csv')