In [1]:
import os
import requests
import json
import pandas as pd
from pandas import json_normalize

In [2]:
nyc_geo_df = pd.read_csv('clean_data/nyc_geo_cleaned.csv')
# creating lat/lon column for api pulls
nyc_geo_df['LatLon'] = nyc_geo_df['Latitude'].astype(str) + ',' + nyc_geo_df['Longitude'].astype(str)
# converting latitude & longitude to strings for api pull
nyc_geo_df['Latitude'] = nyc_geo_df['Latitude'].astype(str)
nyc_geo_df['Longitude'] = nyc_geo_df['Longitude'].astype(str)
nyc_geo_df.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,LatLon
0,Bronx,Wakefield,40.89470517661,-73.84720052054902,"40.89470517661,-73.84720052054902"
1,Bronx,Co-op City,40.87429419303012,-73.82993910812398,"40.87429419303012,-73.82993910812398"
2,Bronx,Eastchester,40.887555677350775,-73.82780644716412,"40.887555677350775,-73.82780644716412"
3,Bronx,Fieldston,40.89543742690383,-73.90564259591682,"40.89543742690383,-73.90564259591682"
4,Bronx,Riverdale,40.890834493891305,-73.9125854610857,"40.890834493891305,-73.9125854610857"


# Restaurant Info

## Foursquare API Requests

In [70]:
# fsq api info
foursquare_secret = os.environ["FSQ_API"]

In [90]:
# restaurant cat = 13065
# function for accessing desired information from the FSQ API

def fsq_locations (lat, lon, cat):
    
    """
        Returns a dataframe containing parsed json on a given location
    
    Parameters:
        lat (str): The latitude of the search area
        lon (str): The latitude of the search area
        cat (str): The poi category to return information on
    
    Returns:
        necessary framework to run the api_loop function
    """
    # construct url
    url= 'https://api.foursquare.com/v3/places/search?ll=' + lat + ',' + lon +'&radius=1000&categories=' + cat + '&limit=25&fields=name,distance,price,geocodes,rating,location'
    
    # create dictionary for headers
    headers = {"Accept": "application/json",
              'Authorization' : foursquare_secret}    
        
    # perform get request
    response = requests.get(url, headers=headers)

    # process request into usable JSON file
    fsq_response = json.loads(response.text)
    fsq_data.append(fsq_response)
    
    # converts normalized JSON into a dataframe
    #df = pd.json_normalize(fsq_data,['context'])
    #df['req_cat'] = cat
   
        
    # convert dataframe to global variable that can be accessed outside of the function
    #global fsq_df
    #fsq_df = df
    
    return


In [91]:
fsq_data = []
def fsq_api_loop(cat):
    # Loop for iterating fsq_location function throughout geographic dataframe
    """
    Iterates through the nyc_geo_df, and the fsq_locations function.
    
    Parameters:
        cat (str): desired POI category
    Returns:
        fsq_df: a dataframe containing results from the api request

    """
    num_rows = nyc_geo_df.shape[0]
    row_count = 0


    while row_count < num_rows:
        fsq_locations((nyc_geo_df.iloc[row_count,2]),(nyc_geo_df.iloc[row_count,3]),cat)
        row_count +=1

In [92]:
fsq_api_loop('13065')

In [125]:
# making a dataframe of the unparsed json
fsq_raw_df = pd.json_normalize(fsq_data)


In [126]:
fsq_raw_df

Unnamed: 0,results,context.geo_bounds.circle.center.latitude,context.geo_bounds.circle.center.longitude,context.geo_bounds.circle.radius
0,"[{'distance': 127, 'geocodes': {'main': {'lati...",40.894705,-73.847201,1000
1,"[{'distance': 734, 'geocodes': {'main': {'lati...",40.874294,-73.829939,1000
2,"[{'distance': 136, 'geocodes': {'main': {'lati...",40.887556,-73.827806,1000
3,"[{'distance': 905, 'geocodes': {'main': {'lati...",40.895437,-73.905643,1000
4,"[{'distance': 619, 'geocodes': {'main': {'lati...",40.890834,-73.912585,1000
...,...,...,...,...
301,"[{'distance': 204, 'geocodes': {'main': {'lati...",40.756658,-74.000111,1000
302,"[{'distance': 582, 'geocodes': {'main': {'lati...",40.587338,-73.805530,1000
303,"[{'distance': 1230, 'geocodes': {'main': {'lat...",40.611322,-73.765968,1000
304,"[{'distance': 606, 'geocodes': {'main': {'lati...",40.756091,-73.945631,1000


In [127]:
# exploding dataframe so we have one entry for every latitude & longitude input
fsq_raw_df = fsq_raw_df.explode('results')

In [116]:
poi_df = pd.json_normalize(fsq_raw_df['results'])

In [136]:
fsq_raw_df

Unnamed: 0,results,context.geo_bounds.circle.center.latitude,context.geo_bounds.circle.center.longitude,context.geo_bounds.circle.radius
0,"{'distance': 127, 'geocodes': {'main': {'latit...",40.894705,-73.847201,1000
0,"{'distance': 797, 'geocodes': {'main': {'latit...",40.894705,-73.847201,1000
0,"{'distance': 821, 'geocodes': {'main': {'latit...",40.894705,-73.847201,1000
0,"{'distance': 983, 'geocodes': {'main': {'latit...",40.894705,-73.847201,1000
0,"{'distance': 454, 'geocodes': {'main': {'latit...",40.894705,-73.847201,1000
...,...,...,...,...
305,"{'distance': 410, 'geocodes': {'main': {'latit...",40.617311,-74.081740,1000
305,"{'distance': 821, 'geocodes': {'main': {'latit...",40.617311,-74.081740,1000
305,"{'distance': 826, 'geocodes': {'main': {'latit...",40.617311,-74.081740,1000
305,"{'distance': 836, 'geocodes': {'main': {'latit...",40.617311,-74.081740,1000


In [137]:
# resetting index
fsq_raw_df.reset_index(drop=True,inplace=True)

In [155]:
# merging to get one final dataframe
merged_fsq_df = poi_df.merge(fsq_raw_df, left_index=True, right_index=True)

In [156]:
# checking for correct # of rows
merged_fsq_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7001 entries, 0 to 7000
Data columns (total 24 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   distance                                    6999 non-null   float64
 1   name                                        6999 non-null   object 
 2   price                                       6269 non-null   float64
 3   rating                                      4685 non-null   float64
 4   geocodes.main.latitude                      6999 non-null   float64
 5   geocodes.main.longitude                     6999 non-null   float64
 6   geocodes.roof.latitude                      6419 non-null   float64
 7   geocodes.roof.longitude                     6419 non-null   float64
 8   location.address                            6986 non-null   object 
 9   location.census_block                       6999 non-null   object 
 10  location.cou

In [None]:
# preparing dataframe for cleaning

In [157]:
merged_fsq_df = merged_fsq_df[['distance','name','price','rating','geocodes.main.latitude','geocodes.main.longitude','geocodes.roof.latitude','geocodes.roof.longitude','location.locality','location.neighborhood','location.postcode','context.geo_bounds.circle.center.latitude','context.geo_bounds.circle.center.longitude']]

In [158]:
# adding a column with the poi category
merged_fsq_df['category'] = "restaurant"
merged_fsq_df

Unnamed: 0,distance,name,price,rating,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,geocodes.roof.longitude,location.locality,location.neighborhood,location.postcode,context.geo_bounds.circle.center.latitude,context.geo_bounds.circle.center.longitude,category
0,127.0,Lollipops Gelato,1.0,8.6,40.893585,-73.843692,40.893585,-73.843692,Bronx,[Edenwald],10466,40.894705,-73.847201,restaurant
1,797.0,Ripe Kitchen and Bar,2.0,8.4,40.898196,-73.838821,40.898196,-73.838821,Mount Vernon,[Wakefield],10550,40.894705,-73.847201,restaurant
2,821.0,Ali's Roti Shop,1.0,8.1,40.893950,-73.856803,40.893950,-73.856803,Bronx,[Wakefield],10466,40.894705,-73.847201,restaurant
3,983.0,Jimbo's Hamburger Palace,1.0,8.0,40.891853,-73.858478,40.891853,-73.858478,Bronx,[Williambridge],10466,40.894705,-73.847201,restaurant
4,454.0,Cooler Runnings Jamaican Restaurant,2.0,6.4,40.898173,-73.850254,40.898173,-73.850254,Bronx,[Wakefield],10466,40.894705,-73.847201,restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6996,410.0,My Way Deli,,,40.614490,-74.084890,,,Staten Island,[Stapleton],10304,40.617311,-74.081740,restaurant
6997,821.0,Campo Bello,,,40.624474,-74.079350,,,Staten Island,[Stapleton],10304,40.617311,-74.081740,restaurant
6998,826.0,Al Baraka Restaurant,,,40.611661,-74.088107,,,Staten Island,[Stapleton],10304,40.617311,-74.081740,restaurant
6999,836.0,Chicken R Us,,,40.611571,-74.088152,40.611571,-74.088152,Staten Island,[Stapleton],10304,40.617311,-74.081740,restaurant


In [159]:
# exporting as csv
merged_fsq_df.to_csv('fsq_poi.csv', index=False)

## Yelp API Requests

In [62]:
yelp_secret = os.environ["YELP_API"]

In [63]:
# restaurant category = restaurants
# function for accessing desired information from the yelp API

def yelp_locations (lat, lon, cat):
    
    """
        Returns a dataframe containing parsed json on a given location
    
    Parameters:
        lat (str): The latitude of the search area
        lon (str): The latitude of the search area
        cat (str): The poi category to return information on
    
    Returns:
        necessary framework to run the api_loop function
    """
    # construct url
    url= 'https://api.yelp.com/v3/businesses/search?latitude=' + lat + '&longitude=' + lon +'&radius=1000&limit=25&categories=' + cat
    
    # create dictionary for headers
    headers = {"Accept": "application/json",
              'Authorization' : 'Bearer '+ yelp_secret}    
        
    # perform get request
    response = requests.get(url, headers=headers)

    # process request into usable JSON file
    yelp_response = json.loads(response.text)
    yelp_data_resto.append(yelp_response)
    
    # converts normalized JSON into a dataframe
    df = pd.json_normalize(yelp_data_resto,['businesses'])
    df['req_cat'] = cat

    
    # convert dataframe to global variable that can be accessed outside of the function
    global yelp_df
    yelp_df = df
    #print(yelp_response)
    return


In [64]:
yelp_data_resto=[]
def yelp_api_loop(cat):
    # Loop for iterating fsq_location function throughout geographic dataframe
    """
    Iterates through the nyc_geo_df, and the yelp_locations function.
    
    Parameters:
        cat (str): desired POI category
    Returns:
        yelp_df: a dataframe containing results from the api request

    """
    num_rows = nyc_geo_df.shape[0]
    row_count = 0


    while row_count < num_rows:
        yelp_locations((nyc_geo_df.iloc[row_count,2]),(nyc_geo_df.iloc[row_count,3]),cat)
        row_count +=1

In [None]:
yelp_api_loop('restaurants')
# exporting as csv
yelp_df.to_csv('yelp_poi.csv', index=False)

In [None]:
yelp_df

## Google Places

In [None]:
# google places api info
gp_secret = os.environ["gp_api"]

In [None]:
# restaurant category in google places = restaurant
# function for accessing desired information from the Google Places API

def gp_locations (lat_lon, cat):
    
    """
        Returns a dataframe containing parsed json on a given location
    
    Parameters:
        lat_lon (str): The latitude & longitude of the search area (pre-formatted to fit google's preferred formatting)
        cat (str): The poi category to return information on
    
    Returns:
        necessary framework to run the api_loop function
    """
    # construct url
    url= 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=' + lat_lon + '&radius=1000&type='+ cat + '&key=' + gp_secret
    
    # create dictionary for headers
    headers = {}
    #payload = {}
        
    # perform get request
    response = requests.get(url, headers=headers)

    # process request into usable JSON file
    gp_response = json.loads(response.text)
    gp_data_resto.append(gp_response)
    
    # converts normalized JSON into a dataframe
    df = pd.json_normalize(gp_data_resto,['results'])
    df['req_cat'] = cat
    
    
    # convert dataframe to global variable that can be accessed outside of the function
    global gp_df
    gp_df = df
    
    return


In [None]:
gp_data_resto=[]
def gp_api_loop(cat):
    # Loop for iterating fsq_location function throughout geographic dataframe
    """
    Iterates through the nyc_geo_df, and the gp_locations function.
    
    Parameters:
        cat (str): desired POI category
    Returns:
        gp_df: a dataframe containing results from the api request

    """
    num_rows = nyc_geo_df.shape[0]
    row_count = 0


    while row_count < num_rows:
        gp_locations((nyc_geo_df.iloc[row_count,4]),cat)
        row_count +=1

In [None]:
gp_api_loop('restaurant')

In [12]:
# exporting as csv
gp_df.to_csv('google_places_poi.csv', index=False)

-----

**Note:** Using only Google Places for the next api requests, as yelp & foursquare mainly have data on venues

# Grocery Store Info

In [None]:
gp_api_loop('supermarket')

In [None]:
gp_df.head()

In [None]:
# exporting to csv
gp_df.to_csv('google_places_supermarket.csv', index=False)

# Transit Info

In [None]:
gp_api_loop('transit_station')

In [None]:
gp_df.to_csv('google_places_transit.csv', index=False)

# Parks Info

In [None]:
gp_api_loop('park')

In [None]:
gp_df.to_csv('google_places_park.csv', index=False)

# Schools Info

In [None]:
gp_api_loop('school')

In [None]:
gp_df.to_csv('google_places_school.csv', index=False)