In [None]:
"""
This Script parses citywide 311 noise complaints. The goal is to see what areas of the city have seen
the biggest change in noise complaints. 

TO DO:
- DONE - Download 311 data from NYC Open Data Portal (filter: 'Complaint Type' contains 'Noise')
- Create grid for data aggregation
    - What should grid size be?
    - Consider aggregating by Community District
    
- Create new DF for July 1 2019 - July 1 2020
- Create new DF for July 1 2020 - July 1 2021
- Create By year total for each year
- Create Percent change
"""

In [50]:
import pandas as pd
import datetime as dt
import openpyxl


In [3]:
noise_data = r"C:\Users\MN03\Desktop\Calvin Docs\Text Amendments\Text_Amendment_Open_Restaurants\311_Noise_Jan2019_to_present\311_Noise_Jan2019_to_present_citywide.csv"

In [4]:
noise_df = pd.read_csv(noise_data)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [14]:
noise_df.dtypes

unique_key                                 int64
created_date                              object
closed_date                               object
agency                                    object
agency_name                               object
complaint_type                            object
descriptor                                object
location_type                             object
incident_zip                             float64
incident_address                          object
street_name                               object
cross_street_1                            object
cross_street_2                            object
intersection_street_1                     object
intersection_street_2                     object
address_type                              object
city                                      object
landmark                                  object
facility_type                             object
status                                    object
due_date            

In [6]:
#Remove spaces from column names.replace with underscore
noise_df.columns = noise_df.columns.str.replace(' ', '_')

In [7]:
#Make column names lowercase
noise_df.columns = noise_df.columns.str.lower()

In [8]:
noise_df.columns

Index(['unique_key', 'created_date', 'closed_date', 'agency', 'agency_name',
       'complaint_type', 'descriptor', 'location_type', 'incident_zip',
       'incident_address', 'street_name', 'cross_street_1', 'cross_street_2',
       'intersection_street_1', 'intersection_street_2', 'address_type',
       'city', 'landmark', 'facility_type', 'status', 'due_date',
       'resolution_description', 'resolution_action_updated_date',
       'community_board', 'bbl', 'borough', 'x_coordinate_(state_plane)',
       'y_coordinate_(state_plane)', 'open_data_channel_type',
       'park_facility_name', 'park_borough', 'vehicle_type',
       'taxi_company_borough', 'taxi_pick_up_location', 'bridge_highway_name',
       'bridge_highway_direction', 'road_ramp', 'bridge_highway_segment',
       'latitude', 'longitude', 'location'],
      dtype='object')

In [9]:
noise_df["date_only"] = noise_df.created_date

In [10]:
#Create a column that contains only the creation date
noise_df["date_only"] = noise_df.created_date.str[:10]


In [11]:
noise_df["date_only"]

0          01/01/2019
1          01/01/2019
2          01/01/2019
3          01/01/2019
4          01/01/2019
              ...    
1748840    08/08/2021
1748841    08/08/2021
1748842    08/08/2021
1748843    08/08/2021
1748844    08/08/2021
Name: date_only, Length: 1748845, dtype: object

In [12]:
#datetime function is smart. It appears to successfully convert datetime strings without including a format string. 
noise_df["date_test"]= pd.to_datetime(noise_df["created_date"])

In [13]:
noise_df[["date_test","date_only", "created_date"]]

Unnamed: 0,date_test,date_only,created_date
0,2019-01-01 00:00:29,01/01/2019,01/01/2019 12:00:29 AM
1,2019-01-01 00:00:43,01/01/2019,01/01/2019 12:00:43 AM
2,2019-01-01 00:02:13,01/01/2019,01/01/2019 12:02:13 AM
3,2019-01-01 00:03:08,01/01/2019,01/01/2019 12:03:08 AM
4,2019-01-01 00:04:26,01/01/2019,01/01/2019 12:04:26 AM
...,...,...,...
1748840,2021-08-08 01:07:51,08/08/2021,08/08/2021 01:07:51 AM
1748841,2021-08-08 01:07:54,08/08/2021,08/08/2021 01:07:54 AM
1748842,2021-08-08 01:07:59,08/08/2021,08/08/2021 01:07:59 AM
1748843,2021-08-08 01:08:04,08/08/2021,08/08/2021 01:08:04 AM


In [None]:
"""
The code below creates, and then applies, a filter that displays all records by a specific hour.

df["messageDate"] = pd.to_datetime(df["messageDate"])
time_mask = (df['messageDate'].dt.hour >= 13) & \
            (df['messageDate'].dt.hour <= 15)
df[time_mask]
""" 
test = noise_df["date_test"].dt.hour >= 13
noise_df[test]

In [None]:
# Create new DF that includes only complaints within a given time range. 
# The time ranges in the next cells are for different years. This will allow year over year comparison. 

date1 = '7/1/2019'
date2 = '7/1/2020'

# greater than the start date and smaller than the end date
# mask = (df['date'] > start_date) & (df['date'] <= end_date)
year1 = (noise_df["date_test"] >= date1) & \
       (noise_df["date_test"] <= date2)

In [None]:

date3 = '7/1/2020'
date4 = '7/1/2021'

# greater than the start date and smaller than the end date
# mask = (df['date'] > start_date) & (df['date'] <= end_date)
year2 = (noise_df["date_test"] >= date3) & \
       (noise_df["date_test"] <= date4)

In [None]:
year1noise = noise_df[year1]

In [None]:
year2noise = noise_df[year2]

In [None]:
"""Susan Requested checking commercial noise complaints 
from August 1, 2020 to August 1, 2021. 
1 - Create time filter for the requested dates.
2 - Create Filter for Commercial Noise complaints only.
3 - Sum by Community Board
4 - Save Results to Excel file

"""

In [43]:
excel_output = r"C:\Users\MN03\Desktop\Calvin Docs\Text Amendments\Text_Amendment_Open_Restaurants\Commercial_Noise_Complaints_Research_Outputs\commercial_noise_by_cb.xlsx"

In [15]:
# Pull records for requested year

date5 = '8/1/2020'
date6 = '8/1/2021'

year3 = (noise_df["date_test"] >= date5) & \
       (noise_df["date_test"] <= date6)

In [16]:
# Apply filter created in previous cell 

year3noise = noise_df[year3]

In [17]:
year3noise

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,...,taxi_pick_up_location,bridge_highway_name,bridge_highway_direction,road_ramp,bridge_highway_segment,latitude,longitude,location,date_only,date_test
891467,47051764,08/01/2020 12:00:03 AM,08/01/2020 12:23:34 AM,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,10468.0,15 EAST 205 STREET,...,,,,,,40.875714,-73.887514,"(40.875714122683576, -73.88751398013322)",08/01/2020,2020-08-01 00:00:03
891468,47047802,08/01/2020 12:00:11 AM,08/01/2020 07:12:58 PM,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,10466.0,674 EAST 231 STREET,...,,,,,,40.892368,-73.859166,"(40.89236798554616, -73.85916553107027)",08/01/2020,2020-08-01 00:00:11
891469,47043407,08/01/2020 12:00:15 AM,08/01/2020 02:49:51 AM,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,10456.0,1169 TINTON AVENUE,...,,,,,,40.828259,-73.900808,"(40.82825938943283, -73.90080843221006)",08/01/2020,2020-08-01 00:00:15
891470,47048666,08/01/2020 12:00:21 AM,08/01/2020 12:22:24 AM,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Music/Party,Street/Sidewalk,10027.0,55 TIEMANN PLACE,...,,,,,,40.815483,-73.959581,"(40.81548262382948, -73.95958099861804)",08/01/2020,2020-08-01 00:00:21
891471,47042667,08/01/2020 12:00:37 AM,08/01/2020 12:46:12 AM,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Music/Party,Street/Sidewalk,11101.0,40-01 10 STREET,...,,,,,,40.756843,-73.944374,"(40.756843296478166, -73.94437353500923)",08/01/2020,2020-08-01 00:00:37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1729761,51394726,07/31/2021 11:59:34 PM,08/01/2021 05:57:29 AM,NYPD,New York City Police Department,Noise - Commercial,Loud Music/Party,Club/Bar/Restaurant,11106.0,35-55 31 STREET,...,,,,,,40.757293,-73.929162,"(40.75729331959201, -73.92916249455044)",07/31/2021,2021-07-31 23:59:34
1729762,51392339,07/31/2021 11:59:44 PM,08/01/2021 05:13:06 AM,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,11411.0,117-28 223 STREET,...,,,,,,40.693783,-73.737810,"(40.6937827298276, -73.73780975329866)",07/31/2021,2021-07-31 23:59:44
1729763,51395344,07/31/2021 11:59:50 PM,08/01/2021 12:57:12 AM,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,11355.0,42-24 149 PLACE,...,,,,,,40.760030,-73.813714,"(40.76002999657409, -73.81371395146566)",07/31/2021,2021-07-31 23:59:50
1729764,51394224,07/31/2021 11:59:53 PM,08/01/2021 12:04:12 AM,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Music/Party,Street/Sidewalk,10034.0,107 POST AVENUE,...,,,,,,40.864337,-73.920614,"(40.864337428793434, -73.92061382485133)",07/31/2021,2021-07-31 23:59:53


In [19]:
# Filter out commercial noise only
cnoise = year3noise[year3noise['complaint_type'].str.contains("Commercial")]

In [33]:
# Create a sum of Commercial Noise complaints for each Community Board
# EXAMPLE: df.groupby(['Fruit','Name']).sum()
# This resets the index: df.groupby(['Fruit','Name'])['Number'].sum().reset_index()
#df[['col1', 'col2', 'col3', 'col4']].groupby(['col1', 'col2']).agg(['mean', 'count'])
# df.groupby(['col1', 'col2']).size().reset_index(name='counts')
cnoise_counts = cnoise.groupby('community_board').size().reset_index(name='counts')

In [34]:
cnoise_counts.columns

Index(['community_board', 'counts'], dtype='object')

In [32]:
cnoise_counts.index

Index(['0 Unspecified', '01 BRONX', '01 BROOKLYN', '01 MANHATTAN', '01 QUEENS',
       '01 STATEN ISLAND', '02 BRONX', '02 BROOKLYN', '02 MANHATTAN',
       '02 QUEENS', '02 STATEN ISLAND', '03 BRONX', '03 BROOKLYN',
       '03 MANHATTAN', '03 QUEENS', '03 STATEN ISLAND', '04 BRONX',
       '04 BROOKLYN', '04 MANHATTAN', '04 QUEENS', '05 BRONX', '05 BROOKLYN',
       '05 MANHATTAN', '05 QUEENS', '06 BRONX', '06 BROOKLYN', '06 MANHATTAN',
       '06 QUEENS', '07 BRONX', '07 BROOKLYN', '07 MANHATTAN', '07 QUEENS',
       '08 BRONX', '08 BROOKLYN', '08 MANHATTAN', '08 QUEENS', '09 BRONX',
       '09 BROOKLYN', '09 MANHATTAN', '09 QUEENS', '10 BRONX', '10 BROOKLYN',
       '10 MANHATTAN', '10 QUEENS', '11 BRONX', '11 BROOKLYN', '11 MANHATTAN',
       '11 QUEENS', '12 BRONX', '12 BROOKLYN', '12 MANHATTAN', '12 QUEENS',
       '13 BROOKLYN', '13 QUEENS', '14 BROOKLYN', '14 QUEENS', '15 BROOKLYN',
       '16 BROOKLYN', '17 BROOKLYN', '18 BROOKLYN', '26 BRONX', '27 BRONX',
       '28 BRONX', '

In [35]:
cnoise_counts

Unnamed: 0,community_board,counts
0,0 Unspecified,5
1,01 BRONX,561
2,01 BROOKLYN,2901
3,01 MANHATTAN,629
4,01 QUEENS,2711
...,...,...
70,Unspecified BRONX,17
71,Unspecified BROOKLYN,24
72,Unspecified MANHATTAN,47
73,Unspecified QUEENS,19


In [36]:
# Display all Rows of Dataframe
# pandas.set_option('display.max_rows', df.shape[0]+1)
pd.set_option('display.max_rows', cnoise_counts.shape[0]+1)

In [37]:
cnoise_counts

Unnamed: 0,community_board,counts
0,0 Unspecified,5
1,01 BRONX,561
2,01 BROOKLYN,2901
3,01 MANHATTAN,629
4,01 QUEENS,2711
5,01 STATEN ISLAND,485
6,02 BRONX,413
7,02 BROOKLYN,981
8,02 MANHATTAN,2078
9,02 QUEENS,924


In [47]:
# Sort values Descending
# df.sort_values(by='col1', ascending=False)
cnoise_sorted = cnoise_counts.sort_values(by='counts', ascending=False)

In [49]:
cnoise_sorted

Unnamed: 0,community_board,counts
2,01 BROOKLYN,2901
13,03 MANHATTAN,2797
4,01 QUEENS,2711
8,02 MANHATTAN,2078
50,12 MANHATTAN,2050
42,10 MANHATTAN,1666
36,09 BRONX,1629
18,04 MANHATTAN,1588
17,04 BROOKLYN,1450
45,11 BROOKLYN,1306


In [51]:
cnoise_sorted.to_excel(excel_output)