## Imports & Installs

#### Installs

In [None]:
!pip install geopandas
!pip install geopy
!pip install shapely
!pip install pyproj

### Imports

In [1]:
import pandas as pd
import numpy as np
import math
import random

import matplotlib.pyplot as plt

import geopandas as gpd
import geopy
from geopy.distance import distance, Distance, great_circle
import shapely
from shapely.geometry import Polygon
import pyproj

## Stop and Frisk Indexing

#### shapefile

In [3]:
# shapefile used to determine indexing grid variables
shp = gpd.read_file('/Users/ernestvmo/Downloads/Borough Boundaries/geo_export_c4a7f77b-811d-4998-b675-a83cf42208e0.shp')
shp['geometry'] = shp['geometry']

In [4]:
min_x = min(shp.bounds['minx'].values)
max_x = max(shp.bounds['maxx'].values)

min_y = min(shp.bounds['miny'].values)
max_y = max(shp.bounds['maxy'].values)

grid_00 = (min_x, max_y)
grid_01 = (max_x, max_y)
grid_10 = (min_x, min_y)
grid_11 = (max_x, min_y)

cell_x_diff = 6579.3
cell_y_diff = -6574.5

# calculated using great_circle distance
height = 46.63508344453982 # height of NYC 
width = 46.68408524309399 # width of NYC

In [7]:
min_y, max_y

(40.496133987611806, 40.91553277650264)

#### Load data

In [37]:
stop_frisk_file = "/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/NYPDstopAndFrisk12-13.csv"
saf_df = pd.read_csv(stop_frisk_file)

### Assign Index using Modulo

In [38]:
saf_df = saf_df[['datestop', 'xcoord', 'ycoord']]

In [39]:
saf_df['nyc_index_x'], saf_df['nyc_index_y'] = np.floor((saf_df['xcoord'].values - min_x) / cell_x_diff), np.floor(((width // 2) - (saf_df['ycoord'].values - min_y) / abs(cell_y_diff)))
saf_df = saf_df.astype({'nyc_index_x':'int64', 'nyc_index_y':'int64'})
saf_df['coordinates'] = list(zip(saf_df.nyc_index_x, saf_df.nyc_index_y))

In [40]:
saf_df

Unnamed: 0,datestop,xcoord,ycoord,nyc_index_x,nyc_index_y,coordinates
0,2012-07-02,1019585,184371,16,13,"(16, 13)"
1,2012-07-03,987078,215157,11,8,"(11, 8)"
2,2012-07-05,1002416,231297,13,6,"(13, 6)"
3,2012-07-06,988511,164316,11,16,"(11, 16)"
4,2012-07-08,995824,230943,12,6,"(12, 6)"
...,...,...,...,...,...,...
343455,2013-06-30,966801,156143,8,17,"(8, 17)"
343456,2013-06-30,954749,149274,6,18,"(6, 18)"
343457,2013-06-30,954749,149274,6,18,"(6, 18)"
343458,2013-06-30,964020,154103,7,17,"(7, 17)"


### Grouping

In [128]:
saf_df.dtypes

datestop       object
xcoord          int64
ycoord          int64
nyc_index_x     int64
nyc_index_y     int64
coordinates    object
dtype: object

In [121]:
grouped_saf_1 = saf_df.groupby(['datestop', 'coordinates']).size().reset_index()
grouped_saf_2 = saf_df.groupby(['datestop', 'nyc_index_x', 'nyc_index_y']).size().reset_index()

grouped_saf_1 = grouped_saf_1.rename(columns={0:'SAF_COUNT'})
grouped_saf_2 = grouped_saf_2.rename(columns={0:'SAF_COUNT'})

In [125]:
save_location = '/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/grouped_csv/'

grouped_saf_1.to_csv(save_location + 'saf_grouped.csv')
# grouped_nypd_complaints_2.to_csv('grouped_nypd_complaints.csv')