## Imports & Installs

#### Installs

In [1]:
!pip install geopandas
!pip install geopy
!pip install shapely
!pip install pyproj

### Imports

In [1]:
import pandas as pd
import numpy as np
import math
import random

import matplotlib.pyplot as plt

import geopandas as gpd
import geopy
from geopy.distance import distance, Distance, great_circle
import shapely
from shapely.geometry import Polygon
import pyproj

## 311 data Indexing

#### shapefile

In [2]:
# shapefile used to determine indexing grid variables
shp = gpd.read_file('/Users/ernestvmo/Downloads/Borough Boundaries/geo_export_c4a7f77b-811d-4998-b675-a83cf42208e0.shp')
shp['geometry'] = shp['geometry'].values.to_crs(2263)

In [3]:
min_x = min(shp.bounds['minx'].values)
max_x = max(shp.bounds['maxx'].values)

min_y = min(shp.bounds['miny'].values)
max_y = max(shp.bounds['maxy'].values)

grid_00 = (min_x, max_y)
grid_01 = (max_x, max_y)
grid_10 = (min_x, min_y)
grid_11 = (max_x, min_y)

cell_x_diff = 6579.3
cell_y_diff = -6574.5

# calculated using great_circle distance
height = 46.63508344453982 # height of NYC in km
width = 46.68408524309399 # width of NYC in km

In [4]:
print(min_x, max_x)
print(min_y, max_y)

913175.1090087884 1067382.508422852
120128.36999505223 272844.2938231766


#### Load data

In [56]:
_311_filtered_file = "/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/311filtered.csv"
_311_df = pd.read_csv(_311_filtered_file, index_col=[0])

  exec(code_obj, self.user_global_ns, self.user_ns)
  mask |= (ar1 == a)


### Assign Index using Modulo

In [58]:
_311_df['Created Date'] = pd.to_datetime(_311_df['Created Date']).dt.date
# _311_df['Created Date'] = pd.to_datetime(_311_df['Created Date']).dt.strftime('%Y/%m/%d')
# _311_df['Closed Date'] = pd.to_datetime(_311_df['Closed Date'])
# _311_df['Closed Date'] = pd.to_datetime(_311_df['Closed Date']).dt.strftime('%Y/%m/%d')

In [59]:
_311_df

Unnamed: 0,Created Date,Closed Date,Complaint Type,Descriptor,Incident Zip,X Coordinate (State Plane),Y Coordinate (State Plane)
0,2012-07-01,07/05/2012 12:00:00 AM,NONCONST,RUBBISH,10454,1005484.0,233852.0
1,2012-07-01,07/09/2012 12:00:00 AM,PLUMBING,TOILET,10467,1021274.0,257092.0
2,2012-07-01,07/17/2012 12:00:00 AM,NONCONST,VERMIN,11221,1004475.0,189909.0
3,2012-07-01,07/09/2012 12:00:00 AM,PAINT - PLASTER,CEILING,10454,1005205.0,234836.0
4,2012-07-01,07/10/2012 12:00:00 AM,PAINT - PLASTER,CEILING,11361,1046844.0,217017.0
...,...,...,...,...,...,...,...
1853020,2013-06-30,07/08/2013 12:00:00 AM,Rodent,Condition Attracting Rodents,11221.0,1003028.0,190880.0
1853021,2013-06-30,06/30/2013 12:00:00 AM,Rodent,Mouse Sighting,10040.0,1003928.0,251236.0
1853022,2013-06-30,07/09/2013 04:16:44 PM,Rodent,Condition Attracting Rodents,11369.0,1018302.0,218807.0
1853023,2013-06-30,07/22/2013 12:00:00 AM,Rodent,Rat Sighting,10314.0,949019.0,165690.0


In [60]:
_311_df = _311_df.rename(columns={'X Coordinate (State Plane)':'xcoord', 'Y Coordinate (State Plane)':'ycoord'})
_311_df = _311_df[['Created Date', 'xcoord', 'ycoord']]
_311_df = _311_df[~_311_df['xcoord'].isna()]
_311_df = _311_df[~_311_df['ycoord'].isna()]

In [61]:
_311_df['nyc_index_x'], _311_df['nyc_index_y'] = np.floor((_311_df['xcoord'].values - min_x) / cell_x_diff), np.floor(((width // 2) - (_311_df['ycoord'].values - min_y) / abs(cell_y_diff)))
_311_df = _311_df.astype({'nyc_index_x':'int64', 'nyc_index_y':'int64'})

In [62]:
_311_df.dtypes

Created Date     object
xcoord          float64
ycoord          float64
nyc_index_x       int64
nyc_index_y       int64
dtype: object

### Grouping

In [63]:
_311_df = _311_df.rename(columns={'Created Date':'datestop'})
_311_df['coordinates'] = list(zip(_311_df.nyc_index_x, _311_df.nyc_index_y))

In [64]:
_311_df['datestop'].values[0]

datetime.date(2012, 7, 1)

In [65]:
grouped_311_df_1 = _311_df.groupby(['datestop', 'coordinates']).size().reset_index()
grouped_311_df_2 = _311_df.groupby(['datestop', 'nyc_index_x', 'nyc_index_y']).size().reset_index()

grouped_311_df_1 = grouped_311_df_1.rename(columns={0:'311_complaints_COUNT'})
grouped_311_df_2 = grouped_311_df_2.rename(columns={0:'311_compaints_COUNT'})

In [66]:
save_location = '/Users/ernestvmo/OneDrive - Universiteit Leiden/Q1/Urban Computing/Project/data/grouped_csv/'

grouped_311_df_1.to_csv(save_location + 'grouped_311.csv')
# grouped_nypd_complaints_2.to_csv('grouped_nypd_complaints.csv')