In [None]:
# imports

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [412]:
import requests 
from IPython.display import JSON
import json
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [413]:
f = open("FOURSQUARE_API_KEY.txt", "r")
FOURSQUARE_API_KEY = f.read()
headers = {'Authorization': FOURSQUARE_API_KEY,
           'accept': 'application/JSON'}

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [421]:
#collection of business outlets information for first 100 bike stations in london using foursquare data API.

df_citybikes = pd.read_csv('london_city_bikes_stns.csv')

url = 'https://api.foursquare.com/v3/places/search'

df_fsquare = pd.DataFrame()

df_station = pd.DataFrame()

i = 0

for index, row in df_citybikes.iterrows():
    ll = str(row['station_latitude']) + ',' + str(row['station_longitude'])
    params = {'ll': ll, 'radius': '1000'}
    res_station = requests.get(url, headers=headers, params=params)
    
    df_station = pd.json_normalize(res_station.json(), 
                                               record_path = ['results', 'categories'], 
                                               meta = [
                                                       ['results', 'fsq_id'],
                                                       ['results', 'name'],
                                                       ['results', 'distance'],
                                                       ['results', 'geocodes', 'main', 'latitude'],
                                                       ['results', 'geocodes', 'main', 'longitude']
                                                      ]
                              )
     
    df_station['bike_station_id'] = row['station_id']
    
    df_fsquare = df_fsquare.append(df_station, ignore_index = True)
    
    i += 1
    
    if i == 100:
        break

print("parsing completed for FSquare POIs for 100 bike stations")

def label_bsns_category (row):
    if row['id'] in range(10000,11000):
        return 'Arts and Entertainment'
    if row['id'] in range(11000,12000):
        return 'Business and Professional Services'
    if row['id'] in range(12000,13000):
        return 'Community and Government'
    if row['id'] in range(13000,14000):
        return 'Dining and Drinking'
    if row['id'] in range(14000,15000):
        return 'Event'
    if row['id'] in range(15000,16000):
        return 'Health and Medicine'
    if row['id'] in range(16000,17000):
        return 'Landmarks and Outdoors'
    if row['id'] in range(17000,18000):
        return 'Retail'
    if row['id'] in range(18000,19000):
        return 'Sports and Recreation'
    if row['id'] in range(19000,20000):
        return 'Travel and Transportation'

df_fsquare['poi_bsns_category'] = df_fsquare.apply (lambda row: label_bsns_category(row), axis=1)

df_fsquare.drop(['icon.prefix', 'icon.suffix', 'id', 'name'], 
                inplace = True, axis = 1)
    
df_fsquare.rename(columns = {'results.fsq_id':'poi_id', 'results.name':'poi_name',  
                                   'results.distance':'poi_distance_from_bike_stn', 
                                   'results.geocodes.main.latitude':'poi_latitude', 
                                   'results.geocodes.main.longitude':'poi_longitude'}, inplace = True)

df_fsquare.drop_duplicates(subset="poi_id", keep='first', inplace=True)

print(df_fsquare.info())

df_fsquare.to_csv("fsquare_POIs_for_100_BikeStns.csv", index=False)

print("foursquare POI file is written to csv")

#total 790 business outlets information received with characteristics such as 'average distance from nearest 
#bike station', 'nature of business', 'latitude' and 'longitude'. Majority of business outlets belonged to 
#'Dining and Drinking' category (553 out of 790). All business outlet information written to 
#'fsquare_POIs_for_100_BikeStns.csv'.

parsing completed for FSquare POIs for 100 bike stations
<class 'pandas.core.frame.DataFrame'>
Int64Index: 790 entries, 0 to 1549
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   poi_id                      790 non-null    object
 1   poi_name                    790 non-null    object
 2   poi_distance_from_bike_stn  790 non-null    object
 3   poi_latitude                790 non-null    object
 4   poi_longitude               790 non-null    object
 5   bike_station_id             790 non-null    object
 6   poi_bsns_category           790 non-null    object
dtypes: object(7)
memory usage: 49.4+ KB
None
foursquare POI file is written to csv


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [422]:
import requests 
from IPython.display import JSON
import json
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [423]:
y = open("YELP_API_KEY.txt", "r")
YELP_API_KEY = y.read()
headers = {'Authorization': "Bearer " + YELP_API_KEY,
           'accept': 'application/JSON'}

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [426]:
#collection of business outlets information for the first 100 bike stations in london city using yelp API.

df_citybikes = pd.read_csv('london_city_bikes_stns.csv')

url = 'https://api.yelp.com/v3/businesses/search'

df_yelp = pd.DataFrame()

df_stn = pd.DataFrame()

i = 0

for index, row in df_citybikes.iterrows():
    latitude = row['station_latitude']
    longitude = row['station_longitude']
    
    params = {'latitude':latitude, 'longitude':longitude, 'radius': 1000}
    
    res_stn = requests.get(url, headers=headers, params=params)
          
    df_stn = pd.json_normalize(res_stn.json(), record_path = ['businesses'])
    
    df_stn['bike_station_id'] = row['station_id']
    
    df_stn['distance'] = df_stn['distance'].astype(int)
    
    df_yelp = df_yelp.append(df_stn, ignore_index = True)
    
    i += 1
    
    if i == 100:
        break


print("parsing completed for yelp POIs for 100 bike stations")

df_yelp.drop(['alias', 'image_url', 'is_closed', 'url', 'transactions',
              'location.address1', 'location.address2', 'location.address3',
              'location.city', 'location.state', 'location.zip_code', 'location.country',
              'location.display_address', 'price', 'phone', 'display_phone', 'categories'
             ], inplace = True, axis = 1)
    
df_yelp.rename(columns = {'id':'poi_id', 'name': 'poi_name', 'distance':'poi_distance_from_bike_stn', 
                          'coordinates.latitude':'poi_latitude', 'coordinates.longitude':'poi_longitude'}, 
                           inplace = True)

df_yelp['poi_bsns_category'] = 'Dining and Drinking'

df_yelp.to_csv("yelp_POIs_for_100_BikeStns.csv", index=False)

print("yelp csv file is also written")

print(df_yelp.info())

#total of 2000 business outlets identified with POI characteristics such as 'ratings', 'reviews',  
#'average distance from nearest bike station(s)', 'latitude' and 'longitude'. Almost all outlets are 'food'
#outlets.

parsing completed for yelp POIs for 100 bike stations
yelp csv file is also written
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   poi_id                      2000 non-null   object 
 1   poi_name                    2000 non-null   object 
 2   review_count                2000 non-null   int64  
 3   rating                      2000 non-null   float64
 4   poi_distance_from_bike_stn  2000 non-null   int64  
 5   poi_latitude                2000 non-null   float64
 6   poi_longitude               2000 non-null   float64
 7   bike_station_id             2000 non-null   object 
 8   poi_bsns_category           2000 non-null   object 
dtypes: float64(3), int64(2), object(4)
memory usage: 140.8+ KB
None


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

#Ans: yelp provides more detailed data for businesses but the nature of data is 
restricted to food outlets. 
On the other hand, data provided by foursquare is varied and covers different businesses categories. However, the quality of data in four square is limited to just their averg

Get the top 10 restaurants according to their rating

In [None]:
df_yelp.sort_values(by='rating', ascending=False).head(10)