# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [13]:
import os
import requests
import pandas as pd

#Import CitiBike.csv.
df = pd.read_csv("../data/vancouver_citibike_stations.csv")

citibike_ids = [] #Stored and used as a variable to join DataFrames.

#Loop to pass CitiBike coordinates to Foursquare URL.
for index, row in df.iterrows():
    lat = row['latitude']
    long = row['longitude']
    bikeid = row['citibike_id']
    citibike_ids.append(bikeid)

df_citibike_id = pd.DataFrame({'citibike_id': citibike_ids})

#Foursquare API key in an environment variable.
fsq_api_key = os.getenv("FOURSQUARE_API_KEY")

#Function to pass information to Foursquare API:
def get_venues_fsq(latitude, longitude, radius, api_key):
    url = f"https://api.foursquare.com/v3/places/search?ll={latitude},{longitude}&radius={radius}"
    headers = {"Accept": "application/json", "Authorization": fsq_api_key}
    response = requests.get(url, headers=headers)
    return response

#Get results for locations in Vancouver.
vancouver_location = get_venues_fsq(
    latitude = lat,
    longitude = long,
    radius = 1000,
    api_key = fsq_api_key
)

data = vancouver_location.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
#Columns were cleaned by dropping irrelevant columns and renaming column names.
vancouver_fsq = pd.json_normalize(data['results'])

vancouver_fsq_col = vancouver_fsq[['fsq_id', 'distance', 'name', 'geocodes.main.latitude', 'geocodes.main.longitude', 'location.formatted_address', 'location.locality','location.region','location.postcode']]

vancouver_fsq_rename_col = vancouver_fsq_col.rename(columns={
    'name': 'fsq_name',
    'distance': 'fsq_distance',
    'geocodes.main.latitude': 'latitude',
    'geocodes.main.longitude': 'longitude',
    'location.formatted_address': 'fsq_address',
    'location.locality': 'fsq_city',
    'location.region': 'fsq_province',
    'location.postcode': 'fsq_postalcode'
    }
)

vancouver_fsq_rename_col['citibike_id'] = df_citibike_id['citibike_id']


Put your parsed results into a DataFrame

In [None]:
vancouver_fsq_rename_col

#Save As .csv file - vancouver_fsq_rename_col.to_csv("../data/vancouver_fsq_locations.csv")

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [14]:
import requests
import os
import pandas as pd

#Import CitiBike.csv.
df = pd.read_csv("../data/vancouver_citibike_stations.csv")

citibike_ids = [] #Stored and used as a variable to join DataFrames.

#Loop to pass CitiBike coordinates to Yelp URL.
for index, row in df.iterrows():
    lat = row['latitude']
    long = row['longitude']
    bikeid = row['citibike_id']
    citibike_ids.append(bikeid)

df_citibike_id = pd.DataFrame({'citibike_id': citibike_ids})

#Yelp API key in an environment variable.
yelp_api_key = os.getenv("YELP_API_KEY")

#Function to pass information to Foursquare API:
def get_venues_yelp(latitude, longitude, radius, api_key):
    url = f"https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}"
    headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"}
    response = requests.get(url, headers=headers)
    return response

vancouver_location = get_venues_yelp(
    latitude = lat, 
    longitude = long, 
    radius = 1000,
    api_key = yelp_api_key
)

data = vancouver_location.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [15]:
#Columns were cleaned by dropping irrelevant columns and renaming column names.
vancouver_yelp = pd.json_normalize(data['businesses'])

vancouver_yelp_col = vancouver_yelp[['id', 'name', 'review_count', 'rating', 'location.display_address']]

vancouver_yelp_rename_col = vancouver_yelp_col.rename(columns={
    'id': 'yelp_id',
    'name': 'yelp_name',
    'review_count': 'yelp_review_count',
    'rating': 'yelp_rating',
    'location.display_address': 'yelp_address'
    }
)

Put your parsed results into a DataFrame

In [17]:
vancouver_yelp_rename_col_add = vancouver_yelp_rename_col

vancouver_yelp_rename_col_add.head(5)

#Save As .csv file - vancouver_yelp_rename_col_add.to_csv("..data/vancouver_yelp_locations.csv")

Unnamed: 0,yelp_id,yelp_name,yelp_review_count,yelp_rating,yelp_address
0,FPP0VvrPrV8RrF6Yye31ig,Chef Hung Taiwanese Beef Noodle,67,2.7,"[3313 Shrum Lane, Unit 102, Vancouver, BC V6S ..."
1,oSRxJSXssqOAAVlF1DS0Zg,Doughgirls,63,4.4,"[3322 Shrum Lane, Vancouver, BC V6S 0B9, Canada]"
2,uW6NtwN8hxrOLMyPi9IVcA,Neptune Chinese Kitchen,37,2.9,"[3337 Shrum Lane, Vancouver, BC V6S 0C8, Canada]"
3,estK0IAUPgS0bftyYHTXNA,Blenz Coffee,29,3.3,"[3381 Wesbrook Mall, Vancouver, BC V6S 0B1, Ca..."
4,W3fcG0C3w-6ARo_5yO4FYQ,Togo Sushi,30,2.3,"[3380 Shrum Lane, Vancouver, BC V6T 1W5, Canada]"


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Yelp API provided more completed data.  Not only did it generate location details, but also, there were additional data on ratings and review counts, which could be variables used later in the statistical modeling portion of the project.

Get the top 10 restaurants according to their rating

In [6]:
import pandas as pd

df = pd.read_csv("../data/vancouver_yelp_locations.csv")

top_10_yelp = df.sort_values(by='yelp_rating', ascending=False)

top_10_yelp[['yelp_name', 'yelp_rating']].head(10)

Unnamed: 0,yelp_name,yelp_rating
19,Sharetea,5.0
13,Spanish Banks Beach Park,4.7
9,Bombay Masala Indian Restaurant,4.6
6,Museum of Anthropology,4.5
2,Doughgirls,4.4
5,Rice Burger,4.4
12,UBC Botanical Garden,4.3
18,Great Dane Coffee,4.1
7,Jamjar Canteen,4.0
3,Burgoo Bistro,3.9
