# NYC 311 Requests - Data Gathering
### Danielle Medellin

## Loading Libraries

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import time
import requests
from sodapy import Socrata
import json
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
ENV = pd.read_json("../env.json", typ = "series")
API_KEY = ENV["APIKEY"]
API_SECRET = ENV["APISECRET"]
APP_TOKEN = ENV["APPTOKEN"]
APP_SECRET = ENV["APPSECRET"]

In [12]:
# function to convert datetimes to all days
def conv_to_days(waittime): # must be a timedelta type 
    """
    Converts datetime object in units of days
    """
    try:
        days = abs(waittime.components[0])
        hours = waittime.components[1] / 24
        mins = waittime.components[2] / 60 / 24
        secs = waittime.components[3] / 60 / 60 / 24
        if waittime.components[0] < 0:
            return (days + hours + mins + secs) * -1 # returns waittime in days with original sign
        else:
            return days + hours + mins + secs
    # for NaT types
    except:
        pass


def get_311_data(limit=2000, app_token=None, date_max='2020-02-01T00:00:00'): # takes limit, and app_token, not necessary
    
    """
    Collects 311 service request data and returns a data frame. 
    
    Arguments:
    - limit: how much data you want to collect in each sweep. Default set to 2000 (MAX). Will collect limit x 25 rows of data.
    - app_token: not necessary, but will help with the collection process.
    - date_max: set the most recent date you want to collect from in the form 'YYYY-MM-DDTHH:MM:SS'
    """
    
    # columns we want to grab from dataframe
    SUBFIELDS = ['unique_key', 'created_date','closed_date', 'agency', 'agency_name', 'complaint_type',
       'descriptor', 'location_type', 'status', 'community_board', 'borough',
       'open_data_channel_type', 'park_facility_name', 'park_borough',
       'incident_zip', 'incident_address', 'street_name', 'cross_street_1',
       'cross_street_2', 'intersection_street_1', 'intersection_street_2',
       'city', 'landmark', 'bbl', 'x_coordinate_state_plane',
       'y_coordinate_state_plane', 'latitude', 'longitude', 'location', 'resolution_description',
       'resolution_action_updated_date']
    
    # establish client source
    client = Socrata("data.cityofnewyork.us", app_token, timeout=300)
    
    data = []
    
    for offs in range(0, 48001, 2000): # will cycle through 50000 entries when limit is 2000
        # Collect results offset by 2000, returned as JSON from API / converted to Python list of
        # dictionaries by sodapy.
        results = client.get("erm2-nwe9",limit=limit,
                             offset=offs,
                             # looks for only HPD & NYPD agencies, closed requests, before Feb 2020
                             where=f"agency = 'NYPD' and status = 'Closed' and created_date < '{date_max}'",
                             order="created_date desc") # takes from most recent

        # Convert to pandas DataFrame
        results_df = pd.DataFrame.from_records(results, columns = SUBFIELDS)
        
        # add df to list 
        data.append(results_df)
        time.sleep(3)
    
    full = pd.concat(data)
    full.reset_index(inplace=True)
    
    # cleaning & engineering features 
    full['created_date'] = pd.to_datetime(full['created_date']) # change type to datetime
    full['closed_date'] = pd.to_datetime(full['closed_date']) # change type to datetime
    full['waittime'] = full['closed_date'] - full['created_date'] # find wait time
    full['waittime'] = full['waittime'].map(conv_to_days) # convert wait time to days
    
    return full

In [5]:
def get_earliest_date(df): # will get earliest date from data frame so we know where to start for next one
    return str(df['created_date'][49999]).replace(" ", "T")

In [9]:
def get_all_data(app_token):
    
    dfs = []
    
    df1 = get_311_data(app_token = app_token)
    df_last = get_earliest_date(df1)
    dfs.append(df1)
    
    for _ in range(2,13):
        df = get_311_data(app_token = app_token, date_max = df_last)
        df_last = get_earliest_date(df)
        dfs.append(df)
    
    full_data = pd.concat(dfs, ignore_index = True)
    
    return full_data

In [13]:
sr_df = get_all_data(APP_TOKEN)

In [15]:
sr_df.drop(columns='index', inplace = True)

In [16]:
sr_df.head()

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,status,community_board,...,landmark,bbl,x_coordinate_state_plane,y_coordinate_state_plane,latitude,longitude,location,resolution_description,resolution_action_updated_date,waittime
0,45514479,2020-01-31 23:58:46,2020-02-01 11:05:48,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Talking,Street/Sidewalk,Closed,06 BRONX,...,HOFFMAN STREET,2030540026,1015007,251021,40.85561595630976,-73.88881679868403,"{'latitude': '40.85561595630976', 'longitude':...",The Police Department responded to the complai...,2020-02-01T16:05:49.000,0.463218
1,45517361,2020-01-31 23:58:28,2020-02-01 02:57:57,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,,Closed,11 MANHATTAN,...,EAST 100 STREET,1016280006,998065,226692,40.78888251589769,-73.95011034864612,"{'latitude': '40.78888251589769', 'longitude':...",The Police Department responded to the complai...,2020-02-01T07:58:02.000,0.124641
2,45516851,2020-01-31 23:57:05,2020-02-01 01:36:02,NYPD,New York City Police Department,Noise - Residential,Loud Talking,Residential Building/House,Closed,14 BROOKLYN,...,FOSTER AVENUE,3052200064,995846,171145,40.63642248430778,-73.95821942468432,"{'latitude': '40.63642248430778', 'longitude':...",The Police Department responded to the complai...,2020-02-01T06:36:05.000,0.068715
3,45515360,2020-01-31 23:56:48,2020-02-01 02:16:21,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Talking,Street/Sidewalk,Closed,02 MANHATTAN,...,BROOME STREET,1004777502,982629,203186,40.72437519160271,-74.00584819694839,"{'latitude': '40.72437519160271', 'longitude':...",The Police Department responded to the complai...,2020-02-01T07:16:25.000,0.09691
4,45518349,2020-01-31 23:56:42,2020-02-01 08:52:48,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,Closed,09 BRONX,...,TAYLOR AVENUE,2039000033,1021265,244157,40.8367522977154,-73.86623273386101,"{'latitude': '40.8367522977154', 'longitude': ...",The Police Department responded to the complai...,2020-02-01T13:52:50.000,0.372292


In [17]:
sr_df.shape

(600000, 32)

In [20]:
len(sr_df['unique_key'].unique())

600000

In [21]:
sr_df['created_date'].sort_values()

599999   2019-05-28 04:53:17
599998   2019-05-28 04:53:50
599997   2019-05-28 04:54:00
599996   2019-05-28 04:58:44
599995   2019-05-28 05:00:34
                 ...        
4        2020-01-31 23:56:42
3        2020-01-31 23:56:48
2        2020-01-31 23:57:05
1        2020-01-31 23:58:28
0        2020-01-31 23:58:46
Name: created_date, Length: 600000, dtype: datetime64[ns]

In [22]:
sr_df.to_csv('../data/service_request.csv', index = False)

In [23]:
pd.read_csv('../data/service_request.csv')

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,status,community_board,...,landmark,bbl,x_coordinate_state_plane,y_coordinate_state_plane,latitude,longitude,location,resolution_description,resolution_action_updated_date,waittime
0,45514479,2020-01-31 23:58:46,2020-02-01 11:05:48,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Talking,Street/Sidewalk,Closed,06 BRONX,...,HOFFMAN STREET,2.030540e+09,1015007.0,251021.0,40.855616,-73.888817,"{'latitude': '40.85561595630976', 'longitude':...",The Police Department responded to the complai...,2020-02-01T16:05:49.000,0.463218
1,45517361,2020-01-31 23:58:28,2020-02-01 02:57:57,NYPD,New York City Police Department,Noise - Residential,Banging/Pounding,,Closed,11 MANHATTAN,...,EAST 100 STREET,1.016280e+09,998065.0,226692.0,40.788883,-73.950110,"{'latitude': '40.78888251589769', 'longitude':...",The Police Department responded to the complai...,2020-02-01T07:58:02.000,0.124641
2,45516851,2020-01-31 23:57:05,2020-02-01 01:36:02,NYPD,New York City Police Department,Noise - Residential,Loud Talking,Residential Building/House,Closed,14 BROOKLYN,...,FOSTER AVENUE,3.052200e+09,995846.0,171145.0,40.636422,-73.958219,"{'latitude': '40.63642248430778', 'longitude':...",The Police Department responded to the complai...,2020-02-01T06:36:05.000,0.068715
3,45515360,2020-01-31 23:56:48,2020-02-01 02:16:21,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Talking,Street/Sidewalk,Closed,02 MANHATTAN,...,BROOME STREET,1.004778e+09,982629.0,203186.0,40.724375,-74.005848,"{'latitude': '40.72437519160271', 'longitude':...",The Police Department responded to the complai...,2020-02-01T07:16:25.000,0.096910
4,45518349,2020-01-31 23:56:42,2020-02-01 08:52:48,NYPD,New York City Police Department,Noise - Residential,Loud Music/Party,Residential Building/House,Closed,09 BRONX,...,TAYLOR AVENUE,2.039000e+09,1021265.0,244157.0,40.836752,-73.866233,"{'latitude': '40.8367522977154', 'longitude': ...",The Police Department responded to the complai...,2020-02-01T13:52:50.000,0.372292
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,42797027,2019-05-28 05:00:34,2019-05-28 06:39:30,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Music/Party,Street/Sidewalk,Closed,11 MANHATTAN,...,,,999049.0,226734.0,40.788996,-73.946557,"{'latitude': '40.78899620161923', 'longitude':...",The Police Department responded to the complai...,2019-05-28T06:39:30.000,0.068704
599996,42799149,2019-05-28 04:58:44,2019-05-28 05:14:26,NYPD,New York City Police Department,Noise - Park,Loud Music/Party,Park/Playground,Closed,03 MANHATTAN,...,,1.004698e+09,987700.0,206242.0,40.732763,-73.987552,"{'latitude': '40.73276264138802', 'longitude':...",Your request can not be processed at this time...,2019-05-28T05:14:26.000,0.010903
599997,42803136,2019-05-28 04:54:00,2019-05-28 08:09:41,NYPD,New York City Police Department,Blocked Driveway,Partial Access,Street/Sidewalk,Closed,05 BROOKLYN,...,,3.038860e+09,1014314.0,188120.0,40.682972,-73.891603,"{'latitude': '40.6829716549788', 'longitude': ...",The Police Department responded to the complai...,2019-05-28T08:09:41.000,0.135891
599998,42801604,2019-05-28 04:53:50,2019-05-28 07:10:05,NYPD,New York City Police Department,Illegal Parking,Blocked Hydrant,Street/Sidewalk,Closed,11 BROOKLYN,...,,3.064600e+09,980450.0,159740.0,40.605125,-74.013685,"{'latitude': '40.605124912147716', 'longitude'...",The Police Department responded to the complai...,2019-05-28T07:10:05.000,0.094618


## Resources:

[NYC Open Data](https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9)  

[Socrata](https://dev.socrata.com/foundry/data.cityofnewyork.us/erm2-nwe9)

[More Socrata](http://holowczak.com/getting-started-with-nyc-opendata-and-the-socrata-api/5/)

[SoQl Queries](https://github.com/xmunoz/sodapy/blob/master/examples/soql_queries.ipynb)