## Setup

In [1]:
import os
import pandas as pd
import requests as re
from IPython.display import JSON

In [3]:
#and a dictionary of the parameters to define
def API(root, search_term, param, header=None):
    """search_term must be a string of valid end point queries 
            - as specified by the relevent documentation
            
            returns JSON of results
   """
 
    #define endpoint
    if search_term is not None:
        endpoint = root + search_term
    else:
        endpoint = root
    
    #GET
    response = re.get(endpoint, params = param, headers=header)
   
    #return status code and results
    status_code, results = response.status_code, response.json()
    
    #Let's make sure it worked
    if status_code != 200:
        print('Something went wrong!')
        print(status_code)
        
    return results

In [12]:
test_param = {'latitude': 45.6387, 'longitude':-122.6615, 'radius': 1000}
test_param2 = {'ll': '45.6387,-122.6615', 'radius': 1000}
yelp_head = {"Authorization": f"Bearer {os.environ['YELP_API']}"}
four_head = {"Accept": "application/json",
                  "Authorization": os.environ['FOURSQUARE_API_KEY']}
             

In [13]:
#test it's working
print(bool(API("https://api.yelp.com/", 'v3/businesses/search', test_param, yelp_head)))
bool(API("https://api.foursquare.com/", 'v3/places/search', test_param2, four_head))

True


True

## Exploration

See MiniProject 4 Google Doc for more details

### OpenData

In [36]:
#function to retrieve data from SODA datasets within inputted time frame
def range_SODA(root, column, time_range, params=None):
  
    range_endpoint = f"?$where=project_start_date between '{time_range[0]}' and '{time_range[1]}'"
    
    endpoint = root + range_endpoint
    
    results = API(endpoint, None, params)
    
    return results

In [4]:
#set standard variables
range_start = '2021-02-01T00:00:00'
range_end = '2021-02-28T00:00:00'
range_21 = [range_start, range_end]

#roots of the different datasets
#Housing
housing_root = 'https://data.cityofnewyork.us/resource/hg8x-zxpr.json'

#Areas of Interest
facilities_root = 'https://data.cityofnewyork.us/resource/ji82-xba5.json'
places_root = 'https://data.cityofnewyork.us/resource/ssdk-4qjy.json'
parks_root = 'https://data.cityofnewyork.us/resource/enfh-gkve.json'
hotels_root = 'https://data.cityofnewyork.us/resource/tjus-cn27.json'
library_root = 'https://data.cityofnewyork.us/resource/feuq-due4.json'

#Business
business_root = 'https://data.cityofnewyork.us/resource/w7w3-xahh.json'

In [28]:
def JSON_to_DF(JSON, desired_features):
 
    columns = {}
    
    #for each desired column
    for feat in desired_features: 
        #empty value list
        values = []
        #for each project
        for element in JSON:
            #try to append value
            try:
                values.append(element[feat])
            #if error means no value append NaaN
            except:
                values.append('NaaN')
            
        columns[feat] = values

    #make dict into pandas dataframe        
    return pd.DataFrame(columns)                    

#### Housing

In [81]:
#-- API call -- 

#call housing data within range
housing_start_JSON = range_SODA(housing_root, 'project_start_date', range_21)
housing_complete_JSON = range_SODA(housing_root, 'project_completion_date', ['2020-11-01T00:00:00', '2021-03-01T00:00:00'])

In [98]:
#id desired features
desired_feat = ['project_id', 'project_name', 'latitude', 'longitude', 'all_counted_units', 'total_units']

In [97]:
house_start_df = JSON_to_DF(housing_start_JSON, desired_feat)
house_start_df.head()

Unnamed: 0,project_id,project_name,latitude,longitude,all_counted_units,total_units
0,70849,1893 CROTONA AVENUE APARTMENTS,40.844171,-73.892197,9,28
1,70851,1634 CHURCH AVENUE APARTMENTS,40.648856,-73.964536,15,48
2,70853,770 & 774 LEXINGTON AVENUE APARTMENTS,40.690236,-73.928624,3,10
3,70853,770 & 774 LEXINGTON AVENUE APARTMENTS,40.690252,-73.928495,3,10
4,70856,142 LENOX ROAD,40.653856,-73.954709,5,16


In [99]:
house_end_df = JSON_to_DF(housing_complete_JSON, desired_feat)
house_end_df.head()

Unnamed: 0,project_id,project_name,latitude,longitude,all_counted_units,total_units
0,70507,25-22 30TH DRIVE,40.766618,-73.925719,10,31
1,70508,27 BUCHANAN PLACE,40.858099,-73.905388,8,25
2,70509,834 PACIFIC STREET,40.680762,-73.966791,34,113
3,70510,3915 14TH AVENUE,40.639152,-73.984225,3,8
4,70511,ROOSEVELT PARC APARTMENTS,40.747525,-73.893776,47,154


#### Other

In [57]:
#-- API call-- 
#for the smaller datasets, no filter required
places_JSON = API(places_root, None, None)
library_JSON = API(library_root, None, None)
#only pull active parks
parks_JSON = API(parks_root, None, {'retired':'false'})

In [60]:
#pull out desired feats from places
desired_feat = ['objectid', 'name', 'annoline2a', 'the_geom']
places_df = JSON_to_DF(places_JSON, desired_feat)
print(places_df.shape)
places_df.head()

(96, 4)


Unnamed: 0,objectid,name,annoline2a,the_geom
0,1,Van Cortlandt Park,Cortlandt,"{'type': 'Point', 'coordinates': [-73.88895811..."
1,2,Woodlawn Cemetery,Cemetery,"{'type': 'Point', 'coordinates': [-73.87165102..."
2,3,Seton Falls Park,Falls,"{'type': 'Point', 'coordinates': [-73.83864188..."
3,4,Pelham Bay Park,Bay,"{'type': 'Point', 'coordinates': [-73.80980242..."
4,5,Botanical Garden,Garden,"{'type': 'Point', 'coordinates': [-73.87830775..."


In [61]:
#pull out desired feats from library
desired_feat = ['bin', 'name', 'the_geom']
library_df = JSON_to_DF(library_JSON, desired_feat)
print(library_df.shape)
library_df.head()

(216, 3)


Unnamed: 0,bin,name,the_geom
0,1055236,115th Street,"{'type': 'Point', 'coordinates': [-73.95353074..."
1,1054674,125th Street,"{'type': 'Point', 'coordinates': [-73.93484756..."
2,1089376,53rd Street,"{'type': 'Point', 'coordinates': [-73.97736329..."
3,1037165,58th Street,"{'type': 'Point', 'coordinates': [-73.96938170..."
4,1044749,67th Street,"{'type': 'Point', 'coordinates': [-73.95954994..."


In [59]:
#pull out desired feats from parks
desired_feat = ['objectid', 'address', 'location', 'acres']
parks_df = JSON_to_DF(parks_JSON, desired_feat)
print(parks_df.shape)
parks_df.head()

(1000, 4)


Unnamed: 0,objectid,address,location,acres
0,5548,NaaN,East River,21.10936646
1,5170,538 MACON STREET,Macdonough St. between Malcolm X Blvd. and Stu...,0.89
2,6349,50 COLUMBIA STREET,"Columbia St., Hicks St. bet. Congress St. and ...",5.739
3,6063,NaaN,LIE Srv. Rd. S. at 86 St.,0.001
4,4884,8302 FT HAMILTON PARKWAY,"Ft Hamilton Pkwy., 83 St., 6 Ave.",0.032


Bigger sets, want to pull just based on specific categories so we're getting a clearer picture

See `facilities_datadictionary.xlsx` for possible facility category, subcatory, and factype

#hotels_JSON = API(hotels_root, None, None)

In [160]:
facility_JSONs = {}

In [48]:
#-- API call--

#make dict to store JSON returns
facility_JSONs = {}

#define subgroups we want
facility_subgroup = ['PARKING LOTS AND GARAGES', 'STREETSCAPES, PLAZAS, AND MALLS', 'MUSEUMS',
                     'NON-PUBLIC K-12 SCHOOLS', 'PUBLIC K-12 SCHOOLS', 'COLLEGES OR UNIVERSITIES', 
                    'HOSPITALS AND CLINICS', 'BUS DEPOTS AND TERMINALS'] 

#loop through each subgroup and call JSON
for subgroup in facility_subgroup:
    #define endpoint with higher limit
    endpoint = facilities_root + f"?facsubgrp={subgroup}&$limit=3000"
    #GET
    response = re.get(endpoint, 
                      data={'app_token': os.environ['NYC_TOKEN']})
   
    #return status code and results
    status_code, facility_JSONs[subgroup] = response.status_code, response.json()

In [49]:
#number of return values for each JSON
for JSON in facility_JSONs:
    print(JSON, len(facility_JSONs[JSON]))

PARKING LOTS AND GARAGES 2559
STREETSCAPES, PLAZAS, AND MALLS 560
MUSEUMS 138
NON-PUBLIC K-12 SCHOOLS 1113
PUBLIC K-12 SCHOOLS 1515
COLLEGES OR UNIVERSITIES 132
HOSPITALS AND CLINICS 1191
BUS DEPOTS AND TERMINALS 144


In [53]:
facility_features = ['factype', 'facgroup', 'latitude', 'longitude']
facility_df = {}

for JSON in facility_JSONs:
    facility_df[JSON] = JSON_to_DF(facility_JSONs[JSON], facility_features)

In [54]:
facility_df[facility_subgroup[0]].head()

Unnamed: 0,factype,facgroup,latitude,longitude
0,PUBLIC PARKING,TRANSPORTATION,40.6033903449,-73.9967793904
1,COMMERCIAL GARAGE,TRANSPORTATION,40.7125522199,-73.9533410191
2,COMMERCIAL GARAGE,TRANSPORTATION,40.7018273113,-73.8173146273
3,COMMERCIAL GARAGE,TRANSPORTATION,40.7687161237,-73.9572008687
4,TOW TRUCK COMPANY,TRANSPORTATION,40.8245918496,-73.8433819236


#### Business (discontinued)

In [159]:
#within range
business_started = range_SODA(business_root, 'license_creation_date', [range_start,range_end])
business_ended = range_SODA(business_root, 'lic_expir_dd', [range_start,range_end])
#all businesses with active license [may have limit?]
business_active = API('https://data.cityofnewyork.us/resource/w7w3-xahh.json', None, {'license_status': 'Active'})

Something went wrong!
400
Something went wrong!
400
