In [46]:
# importing libraries
import pandas as pd
import numpy as np
from sodapy import Socrata
import os

In [30]:
socrata_domain = 'data.cityofnewyork.us'
socrata_dataset_identifier = 'erm2-nwe9'

# If you choose to use a token, run the following command on the terminal (or add it to your .bashrc)
# $ export SODAPY_APPTOKEN=<token>
socrata_token = os.environ.get("SODAPY_APPTOKEN")

In [31]:
client = Socrata(socrata_domain, socrata_token)

In [32]:
metadata = client.get_metadata(socrata_dataset_identifier)
[x['name'] for x in metadata['columns']]

['Unique Key',
 'Created Date',
 'Closed Date',
 'Agency',
 'Agency Name',
 'Complaint Type',
 'Descriptor',
 'Location Type',
 'Incident Zip',
 'Incident Address',
 'Street Name',
 'Cross Street 1',
 'Cross Street 2',
 'Intersection Street 1',
 'Intersection Street 2',
 'Address Type',
 'City',
 'Landmark',
 'Facility Type',
 'Status',
 'Due Date',
 'Resolution Description',
 'Resolution Action Updated Date',
 'Community Board',
 'BBL',
 'Borough',
 'X Coordinate (State Plane)',
 'Y Coordinate (State Plane)',
 'Open Data Channel Type',
 'Park Facility Name',
 'Park Borough',
 'Vehicle Type',
 'Taxi Company Borough',
 'Taxi Pick Up Location',
 'Bridge Highway Name',
 'Bridge Highway Direction',
 'Road Ramp',
 'Bridge Highway Segment',
 'Latitude',
 'Longitude',
 'Location',
 'Zip Codes',
 'Community Districts',
 'Borough Boundaries',
 'City Council Districts',
 'Police Precincts']

In [33]:
meta_amount = [x for x in metadata['columns'] if x['name'] == 'Complaint Type'][0]
meta_amount

{'id': 354922035,
 'name': 'Complaint Type',
 'dataTypeName': 'text',
 'description': 'This is the first level of a hierarchy identifying the topic of the incident or condition. Complaint Type may have a corresponding Descriptor (below) or may stand alone.',
 'fieldName': 'complaint_type',
 'position': 6,
 'renderTypeName': 'text',
 'tableColumnId': 1567792,
 'width': 268,
 'cachedContents': {'largest': 'ZTESTINT',
  'non_null': '24649252',
  'null': '0',
  'top': [{'item': 'Noise - Residential', 'count': '2210345'},
   {'item': 'HEAT/HOT WATER', 'count': '1390948'},
   {'item': 'Illegal Parking', 'count': '1107011'},
   {'item': 'Blocked Driveway', 'count': '1035276'},
   {'item': 'Street Condition', 'count': '1016156'},
   {'item': 'Street Light Condition', 'count': '978513'},
   {'item': 'HEATING', 'count': '887869'},
   {'item': 'PLUMBING', 'count': '742600'},
   {'item': 'Water System', 'count': '697035'},
   {'item': 'Noise - Street/Sidewalk', 'count': '678530'},
   {'item': 'Gen

In [34]:
[x['fieldName'] for x in metadata['columns']]

['unique_key',
 'created_date',
 'closed_date',
 'agency',
 'agency_name',
 'complaint_type',
 'descriptor',
 'location_type',
 'incident_zip',
 'incident_address',
 'street_name',
 'cross_street_1',
 'cross_street_2',
 'intersection_street_1',
 'intersection_street_2',
 'address_type',
 'city',
 'landmark',
 'facility_type',
 'status',
 'due_date',
 'resolution_description',
 'resolution_action_updated_date',
 'community_board',
 'bbl',
 'borough',
 'x_coordinate_state_plane',
 'y_coordinate_state_plane',
 'open_data_channel_type',
 'park_facility_name',
 'park_borough',
 'vehicle_type',
 'taxi_company_borough',
 'taxi_pick_up_location',
 'bridge_highway_name',
 'bridge_highway_direction',
 'road_ramp',
 'bridge_highway_segment',
 'latitude',
 'longitude',
 'location',
 ':@computed_region_efsh_h5xi',
 ':@computed_region_f5dn_yrer',
 ':@computed_region_yeji_bk3q',
 ':@computed_region_92fq_4b7q',
 ':@computed_region_sbqj_enih']

In [35]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    complaint_type, count(complaint_type)
GROUP BY 
    complaint_type
ORDER BY 
    count(complaint_type) DESC
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head(10)

(446, 2)


Unnamed: 0,complaint_type,count_complaint_type
0,Noise - Residential,2210345
1,HEAT/HOT WATER,1390948
2,Illegal Parking,1107011
3,Blocked Driveway,1035276
4,Street Condition,1016156
5,Street Light Condition,978513
6,HEATING,887869
7,PLUMBING,742600
8,Water System,697035
9,Noise - Street/Sidewalk,678530


In [36]:
results_df.shape

(446, 2)

In [37]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    descriptor, count(descriptor)
WHERE 
    LOWER(descriptor) LIKE '%flood%'
GROUP BY 
    descriptor
ORDER BY 
    count(descriptor) DESC
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df

(11, 2)


Unnamed: 0,descriptor,count_descriptor
0,Catch Basin Clogged/Flooding (Use Comments) (SC),90500
1,Street Flooding (SJ),27817
2,Flood Light Lamp Out,5951
3,Highway Flooding (SH),2839
4,Flood Light Lamp Cycling,2507
5,Ready NY - Flooding,271
6,Flood Light Lamp Dayburning,205
7,Flood Light Lamp Missing,190
8,Flood Light Lamp Dim,177
9,RAIN GARDEN FLOODING (SRGFLD),80


In [38]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    *
WHERE 
    LOWER(descriptor) LIKE '%flood%'
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head()

(130585, 34)


Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,...,park_facility_name,park_borough,latitude,longitude,location,intersection_street_1,intersection_street_2,location_type,due_date,landmark
0,40139218,2018-08-29T13:43:00.000,2018-09-03T13:25:00.000,DEP,Department of Environmental Protection,Sewer,Catch Basin Clogged/Flooding (Use Comments) (SC),11413,140-19 184 STREET,184 STREET,...,Unspecified,QUEENS,40.671777,-73.7601099,"{'latitude': '40.671777', 'longitude': '-73.76...",,,,,
1,40139247,2018-08-29T08:44:00.000,2018-09-03T13:55:00.000,DEP,Department of Environmental Protection,Sewer,Catch Basin Clogged/Flooding (Use Comments) (SC),11236,10101 AVENUE D,AVENUE D,...,Unspecified,BROOKLYN,40.6521589,-73.9041227,"{'latitude': '40.6521589', 'longitude': '-73.9...",,,,,
2,40140533,2018-08-29T08:35:00.000,2018-09-08T12:40:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),10034,,,...,Unspecified,MANHATTAN,40.8630205,-73.9175569,"{'latitude': '40.8630205', 'longitude': '-73.9...",WEST 206 STREET,9 AVENUE,,,
3,40141770,2018-08-29T14:04:00.000,2018-08-30T14:10:00.000,DEP,Department of Environmental Protection,Sewer,Catch Basin Clogged/Flooding (Use Comments) (SC),11203,,,...,Unspecified,BROOKLYN,40.6415384,-73.9389818,"{'latitude': '40.6415384', 'longitude': '-73.9...",AVENUE D,EAST 40 STREET,,,
4,40141782,2018-08-29T10:29:00.000,2018-08-30T09:40:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11235,2468 EAST 27 STREET,EAST 27 STREET,...,Unspecified,BROOKLYN,40.5921398,-73.9432397,"{'latitude': '40.5921398', 'longitude': '-73.9...",,,,,


In [39]:
results_df['descriptor'].value_counts()

Catch Basin Clogged/Flooding (Use Comments) (SC)    90500
Street Flooding (SJ)                                27817
Flood Light Lamp Out                                 5951
Highway Flooding (SH)                                2839
Flood Light Lamp Cycling                             2507
Ready NY - Flooding                                   271
Flood Light Lamp Dayburning                           205
Flood Light Lamp Missing                              190
Flood Light Lamp Dim                                  177
RAIN GARDEN FLOODING (SRGFLD)                          80
Flooded                                                48
Name: descriptor, dtype: int64

In [40]:
results_df = results_df[results_df['descriptor'] == 'Street Flooding (SJ)']

In [41]:
results_df['descriptor'].value_counts()

Street Flooding (SJ)    27817
Name: descriptor, dtype: int64

In [42]:
results_df['complaint_type'].value_counts()

Sewer    27817
Name: complaint_type, dtype: int64

In [43]:
results_df.shape

(27817, 34)

In [44]:
# writing output file as a csv
results_df.to_csv('data/311-flooding-data.csv', index=False)

# listing items in data folder
%ls data/

311-flooding-data.csv  streets_clipped.json


# Briefly reviewing what descriptors are in the complaint_type='Sewer'

In [45]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofnewyork.us,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

query = """
SELECT 
    descriptor, count(descriptor)
WHERE 
    complaint_type='Sewer'
GROUP BY 
    descriptor
ORDER BY 
    count(descriptor) DESC
"""

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head(len(results_df))

(27, 2)


Unnamed: 0,descriptor,count_descriptor
0,Sewer Backup (Use Comments) (SA),149180
1,Catch Basin Clogged/Flooding (Use Comments) (SC),90500
2,Catch Basin Sunken/Damaged/Raised (SC1),28700
3,Street Flooding (SJ),27817
4,Manhole Cover Broken/Making Noise (SB),19777
5,Manhole Cover Missing (Emergency) (SA3),17507
6,Sewer Odor (SA2),15406
7,Defective/Missing Curb Piece (SC4),8508
8,Manhole Overflow (Use Comments) (SA1),6830
9,Catch Basin Search (SC2),4153
