## Imports & Installs

#### Installs

In [1]:
!pip install geopandas
!pip install geopy
!pip install shapely
!pip install pyproj



### Imports

In [2]:
import pandas as pd
import numpy as np
import math
import random

import matplotlib.pyplot as plt

import geopandas as gpd
import geopy
from geopy.distance import distance, Distance, great_circle
import shapely
from shapely.geometry import Polygon
import pyproj

## NYPD Complaints Indexing

#### shapefile

In [3]:
# shapefile used to determine indexing grid variables
shp = gpd.read_file('/Users/ernestvmo/Downloads/Borough Boundaries/geo_export_c4a7f77b-811d-4998-b675-a83cf42208e0.shp')
shp['geometry'] = shp['geometry'].values.to_crs(2263)

In [4]:
min_x = min(shp.bounds['minx'].values)
max_x = max(shp.bounds['maxx'].values)

min_y = min(shp.bounds['miny'].values)
max_y = max(shp.bounds['maxy'].values)

grid_00 = (min_x, max_y)
grid_01 = (max_x, max_y)
grid_10 = (min_x, min_y)
grid_11 = (max_x, min_y)

cell_x_diff = 6579.3
cell_y_diff = -6574.5

# calculated using great_circle distance
height = 46.63508344453982 # height of NYC in km
width = 46.68408524309399 # width of NYC in km

In [8]:
min_y

120128.36999505223

#### Load data

In [38]:
nypd_complaints_file = "/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/NYPDcomplaintData.csv"
nypd_complaints_df = pd.read_csv(nypd_complaints_file)

### Assign Index using Modulo

In [39]:
nypd_complaints_df['CMPLNT_FR_DT'] = pd.to_datetime(nypd_complaints_df['CMPLNT_FR_DT'])
nypd_complaints_df = nypd_complaints_df[['CMPLNT_FR_DT', 'X_COORD_CD', 'Y_COORD_CD']]
nypd_complaints_df = nypd_complaints_df.rename(columns={'X_COORD_CD':'xcoord', 'Y_COORD_CD':'ycoord'})
nypd_complaints_df = nypd_complaints_df[~nypd_complaints_df['xcoord'].isna()]
nypd_complaints_df = nypd_complaints_df[~nypd_complaints_df['ycoord'].isna()]

nypd_complaints_df['xcoord'] = nypd_complaints_df['xcoord'].str.replace(',','').astype(float)
nypd_complaints_df['ycoord'] = nypd_complaints_df['ycoord'].str.replace(',','').astype(float)

In [40]:
nypd_complaints_df['nyc_index_x'], nypd_complaints_df['nyc_index_y'] = np.floor((nypd_complaints_df['xcoord'].values - min_x) / cell_x_diff), np.floor(((width // 2) - (nypd_complaints_df['ycoord'].values - min_y) / abs(cell_y_diff)))
nypd_complaints_df = nypd_complaints_df.astype({'nyc_index_x':'int64', 'nyc_index_y':'int64'})

### Grouping

In [52]:
nypd_complaints_df = nypd_complaints_df.rename(columns={'CMPLNT_FR_DT':'datestop'})
nypd_complaints_df['coordinates'] = list(zip(nypd_complaints_df.nyc_index_x, nypd_complaints_df.nyc_index_y))

In [69]:
grouped_nypd_complaints_1 = nypd_complaints_df.groupby(['datestop', 'coordinates']).size().reset_index()
grouped_nypd_complaints_2 = nypd_complaints_df.groupby(['datestop', 'nyc_index_x', 'nyc_index_y']).size().reset_index()

grouped_nypd_complaints_1 = grouped_nypd_complaints_1.rename(columns={0:'nypd_compaints_COUNT'})
grouped_nypd_complaints_2 = grouped_nypd_complaints_2.rename(columns={0:'nypd_compaints_COUNT'})

In [70]:
save_location = '/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/grouped_csv/'

grouped_nypd_complaints_1.to_csv(save_location + 'grouped_nypd_complaints.csv')
# grouped_nypd_complaints_2.to_csv('grouped_nypd_complaints.csv')