# Needle Collection in NYC Parks: A Data Exploration

By Aaron Potts

### Setup

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import shapely as sh
from shapely import wkt
import matplotlib.pyplot as plt
import plotly.express as px


Our data comes from the [NYC Parks Syringe Litter Data Collection](https://data.cityofnewyork.us/browse?Data-Collection_Data-Collection=NYC+Parks+Syringe+Litter&q=), taking most of our data from the Summary of Syringe Data in NYC Parks, and location data from the Parks Properties sheets. The data covers mostly certain portions of Manhattan and the Bronx that are the responsibility of the NYC Parks Department, the Parks Department Bloodborn Pathogen Crew, the Washington Heights Corner Project, and New York Harm Reduction Educators, the latter two of which have now merged and are now [OnPointNYC](https://onpointnyc.org/). It does not contain personally identifying information, and in fact lacks any staffing information beyond broad attributions to different groups.

In [2]:
sy = pd.read_csv("Data/Summary_of_Syringe_Data_in_NYC_Parks.csv")
pp = pd.read_csv("Data/Parks_Properties.csv")

In [3]:
pp = pp[['GISPROPNUM', 'multipolygon']]

In [4]:
pp.head(10)

Unnamed: 0,GISPROPNUM,multipolygon
0,Q084A,MULTIPOLYGON (((-73.8587476480729 40.767414466...
1,Q498,MULTIPOLYGON (((-73.82218300936414 40.59892072...
2,Q346,MULTIPOLYGON (((-73.72738293199147 40.75605209...
3,B510,MULTIPOLYGON (((-73.91598050092 40.66927372409...
4,B058,MULTIPOLYGON (((-73.9529286105069 40.720436278...
5,B502,MULTIPOLYGON (((-73.94519019107632 40.68617686...
6,X195E,MULTIPOLYGON (((-73.84374321952677 40.82901804...
7,B426,MULTIPOLYGON (((-73.9396631946347 40.712601061...
8,B425,MULTIPOLYGON (((-73.93893539617332 40.71323418...
9,Q454,MULTIPOLYGON (((-73.73905463825886 40.65284962...


In [5]:
pp.isnull().sum()

GISPROPNUM      0
multipolygon    0
dtype: int64

In [6]:
#use Shapely to find the centroid of the multipolygon, and apply that to the new 'centroid' column
#This is a growth area and could be done less problematically
pp['centroid'] = pp.apply(lambda x: sh.wkt.loads(x['multipolygon']).centroid, axis=1)

In [7]:
#create latitude and longitude for later mapping, by extracting x and y values from the centroid
#Looks like it works by using the .x and .y methods, which might be available through the centroid object in Shapely
#Well Known Text format reverses latitude and longitude
pp['latitude'] = pp['centroid'].apply(lambda x: x.y)
pp['longitude'] = pp['centroid'].apply(lambda x: x.x)

In [8]:
pp = pp.drop(columns = ["multipolygon" , "centroid"])

In [9]:
pp.head(10)

Unnamed: 0,GISPROPNUM,latitude,longitude
0,Q084A,40.753159,-73.853168
1,Q498,40.59853,-73.822995
2,Q346,40.756623,-73.727964
3,B510,40.66938,-73.916197
4,B058,40.720857,-73.951842
5,B502,40.68629,-73.945212
6,X195E,40.829058,-73.843833
7,B426,40.712737,-73.939715
8,B425,40.7133,-73.939052
9,Q454,40.648779,-73.742196


In [12]:
sy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25575 entries, 0 to 25574
Data columns (total 23 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   response_id      25575 non-null  object 
 1   gispropnum       25452 non-null  object 
 2   omppropid        25452 non-null  object 
 3   collected_date   25575 non-null  object 
 4   time_of_day      12082 non-null  object 
 5   year             25575 non-null  int64  
 6   month            25575 non-null  int64  
 7   month_text       25575 non-null  object 
 8   week             25575 non-null  int64  
 9   group            25575 non-null  object 
 10  location         25452 non-null  object 
 11  ground_syringes  19300 non-null  float64
 12  kiosk_syringes   3921 non-null   float64
 13  total_syringes   23627 non-null  float64
 14  kiosk_number     6625 non-null   float64
 15  kiosk_type       6291 non-null   object 
 16  precinct         25450 non-null  float64
 17  borough     

In [10]:
#data cleaning
# Two particular entries were fixed
# Got weird errors with week table, etc.

sy = sy.drop(columns = ['omppropid', 'month', 'month_text', 'kiosk_number', 'kiosk_type', 'source', 'created_date', 'week', 'year'])
sy.dropna(subset=['gispropnum'], inplace=True)
sy['collected_date'] = pd.to_datetime(sy['collected_date'])
sy.loc[sy['location'] == 'McNally Plaza', 'precinct'] = 34
sy.loc[sy['location'] == 'Bronx Park', 'precinct'] = 52
sy['time_of_day'].fillna("Daily", inplace = True)

In [11]:
sy.head(10)

Unnamed: 0,response_id,gispropnum,collected_date,time_of_day,group,location,ground_syringes,kiosk_syringes,total_syringes,precinct,borough,district,property_type,kiosk_site
34,M-001244,M037,2021-11-04,AM,Parks,Highbridge Park,22.0,,22.0,34.0,Manhattan,M-12A,PLGD,True
35,M-001235,M037,2021-11-03,AM,Parks,Highbridge Park,50.0,,50.0,34.0,Manhattan,M-12A,PLGD,True
36,M-001227,M037,2021-11-02,AM,Parks,Highbridge Park,54.0,,54.0,34.0,Manhattan,M-12A,PLGD,True
37,M-001219,M037,2021-11-01,AM,Parks,Highbridge Park,10.0,,10.0,34.0,Manhattan,M-12A,PLGD,True
38,M-001212,M037,2021-10-31,AM,Parks,Highbridge Park,7.0,,7.0,34.0,Manhattan,M-12A,PLGD,True
39,M-001205,M037,2021-10-30,AM,Parks,Highbridge Park,10.0,,10.0,34.0,Manhattan,M-12A,PLGD,True
40,M-001199,M037,2021-10-29,AM,Parks,Highbridge Park,22.0,,22.0,34.0,Manhattan,M-12A,PLGD,True
41,M-000881,M037,2021-09-17,AM,Parks,Highbridge Park,6.0,,6.0,34.0,Manhattan,M-12A,PLGD,True
42,M-000872,M037,2021-09-15,AM,Parks,Highbridge Park,2.0,,2.0,34.0,Manhattan,M-12A,PLGD,True
43,M-000868,M037,2021-09-14,AM,Parks,Highbridge Park,4.0,,4.0,34.0,Manhattan,M-12A,PLGD,True


In [12]:
sy.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25452 entries, 34 to 25574
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   response_id      25452 non-null  object        
 1   gispropnum       25452 non-null  object        
 2   collected_date   25452 non-null  datetime64[ns]
 3   time_of_day      25452 non-null  object        
 4   group            25452 non-null  object        
 5   location         25452 non-null  object        
 6   ground_syringes  19177 non-null  float64       
 7   kiosk_syringes   3921 non-null   float64       
 8   total_syringes   23504 non-null  float64       
 9   precinct         25452 non-null  float64       
 10  borough          25452 non-null  object        
 11  district         25452 non-null  object        
 12  property_type    25452 non-null  object        
 13  kiosk_site       25055 non-null  object        
dtypes: datetime64[ns](1), float64(4), obj

In [13]:
sy.describe()

Unnamed: 0,ground_syringes,kiosk_syringes,total_syringes,precinct
count,19177.0,3921.0,23504.0,25452.0
mean,30.596079,33.0,30.468601,38.03206
std,86.428795,71.884566,84.521003,10.912661
min,1.0,1.0,0.0,1.0
25%,5.0,3.0,4.0,34.0
50%,11.0,10.0,10.0,40.0
75%,27.0,30.0,27.0,44.0
max,7000.0,1000.0,7000.0,122.0


In [17]:
sy.describe(include='all')

  sy.describe(include='all')


Unnamed: 0,response_id,gispropnum,collected_date,time_of_day,year,month_text,week,group,location,ground_syringes,kiosk_syringes,total_syringes,precinct,borough,property_type,kiosk_site
count,23954,23954,23954,23954,23954.0,23954,23954.0,23954,23954,17679.0,3921.0,22006.0,23954.0,23954,23954,23557
unique,23954,280,1832,3,7.0,12,52.0,3,271,,,,,4,4,2
top,M-000698,M037,2019-01-22 00:00:00,Daily,2022.0,Jan,43.0,Parks,Highbridge Park,,,,,Bronx,PARK,True
freq,1,6632,104,13474,7574.0,2267,581.0,12056,6632,,,,,15952,17256,19089
first,,,2017-01-01 00:00:00,,,,,,,,,,,,,
last,,,2023-02-13 00:00:00,,,,,,,,,,,,,
mean,,,,,,,,,,31.685672,33.0,31.335272,38.060908,,,
std,,,,,,,,,,89.581992,71.884566,86.9936,10.755392,,,
min,,,,,,,,,,1.0,1.0,0.0,1.0,,,
25%,,,,,,,,,,5.0,3.0,4.0,34.0,,,


In [14]:
sy.isnull().sum()

response_id            0
gispropnum             0
collected_date         0
time_of_day            0
group                  0
location               0
ground_syringes     6275
kiosk_syringes     21531
total_syringes      1948
precinct               0
borough                0
district               0
property_type          0
kiosk_site           397
dtype: int64

In [15]:

sy1 = sy.join(pp.set_index('GISPROPNUM'), on='gispropnum', how='left')

In [16]:
#creates a location map we can use later- the aggregation of longitude and latitude was necessary to make it work. Some location granularity is lost

location_map = sy1.groupby('location').agg({'longitude': 'mean', 'latitude': 'mean'}).reset_index()

In [17]:
sy1.to_csv("Syringe Location Data.csv")
location_map.to_csv("Syringe Location Key.csv")