In [11]:
import requests
import pandas as pd
import pickle
import json
import os
import keyring

import sys

sys.path.append('..\\src')
import fsq_yelp_parsing

# Foursquare & Yelp APIs

Since a typical 5-minute bike ride covers about 1000-1100 meters, I'll look within a 1000-meter radius around bike stations and arrange the results by distance.

Imagine a scenario where someone is planning a day out with friends, biking between places. They'll start from a meeting spot reached by public transport. They're looking for places with activities suitable for groups, like artsy spots, outdoor fun, places to eat, and activities that are not too strenuous.

In [2]:
# UNPICKLE CITYBIKES DF
with open('../data/citybikes_filled_df.pickle', 'rb') as df:
    citybikes_df = pickle.load(df)

citybikes_df.head(5)

Unnamed: 0,name,id,timestamp,longitude,latitude,slots,free_bikes,empty_slots,has_ebikes,ebikes,payment,renting,returning
0,Harmon St at Adeline St,d0e8f4f1834b7b33a3faf8882f567ab8,2024-04-02T18:29:27.066000Z,-122.270582,37.849735,15,9,6,True,0,"[key, creditcard]",1,1
1,Fountain Alley at S 2nd St,983514094dd808b1604da2dcfc2d09af,2024-04-02T18:29:26.582000Z,-121.889277,37.336188,11,8,3,True,6,"[key, creditcard]",1,1
2,Oak St at 1st St,da17603652106fda93da4e255a5b0a22,2024-04-02T18:29:27.044000Z,-121.88109,37.322125,23,14,9,True,7,"[key, creditcard]",1,1
3,Bestor Art Park,7a21c92b3b4cd2f7759107b4fdebf869,2024-04-02T18:29:26.797000Z,-121.874119,37.323678,15,12,3,True,3,"[key, creditcard]",1,1
4,5th St at Virginia St,ce34d38fb230a23c1ced12d1e16df294,2024-04-02T18:29:26.971000Z,-121.87712,37.325998,27,19,7,True,12,"[key, creditcard]",1,1


In [3]:
fsq_api_key = keyring.get_password("foursquare", "fsq_api_key") # $200 free credit for API usage granted on the first of every month
yelp_api_key = keyring.get_password("yelp", "yelp_api_key") # 5000 API calls daily limit, resets at midnight 
yelp_client_id = keyring.get_password("yelp", "yelp_client_id")

In [4]:
url = 'https://api.foursquare.com/v3/places/search'
params = {'ll': '37.849735,-122.270582', 'radius': '1000', 'limit': '1', 'categories' : '13000,10000,16000,19042,19043,19046,19050,19054', 'sort' : 'distance'} #13000 is the category label for dining & drinking on foursquare, 10000 =  arts & entertainment, 16000 = landmarks & outdoors, 19042 = bus station, 19043 = bus stop, 19046 = metro station, 19050 = tram station, 19054 = public transportation 
headers = {'Accept': 'application/json', 'Authorization': f'{fsq_api_key}'}  

response = requests.request("GET", url, params=params, headers=headers) # the max limit for results per query is 50, the default is 20
print('status:', response.status_code)
res1 = response.json()
print(json.dumps(res1, indent=4))

status: 200
{
    "results": [
        {
            "fsq_id": "4bcb7c1a511f95216599b1c7",
            "categories": [
                {
                    "id": 13068,
                    "name": "American Restaurant",
                    "short_name": "American",
                    "plural_name": "American Restaurants",
                    "icon": {
                        "prefix": "https://ss3.4sqi.net/img/categories_v2/food/default_",
                        "suffix": ".png"
                    }
                }
            ],
            "chains": [],
            "closed_bucket": "LikelyOpen",
            "distance": 66,
            "geocodes": {
                "main": {
                    "latitude": 37.849484,
                    "longitude": -122.271567
                },
                "roof": {
                    "latitude": 37.849484,
                    "longitude": -122.271567
                }
            },
            "link": "/v3/places/4bcb7c1a511f95216599b1c

In [66]:
# fsq_id
res1['results'][0].get('fsq_id', None)
# cat_id
res1['results'][0]['categories'][0].get('id', None)
# cat_name
res1['results'][0]['categories'][0].get('name', None)
# lat
res1['results'][0]['geocodes']['main'].get('latitude', None)
# long
res1['results'][0]['geocodes']['main'].get('longitude', None)
# name
res1['results'][0].get('name', None)
# street address
res1['results'][0]['location'].get('address', None)
# zip
res1['results'][0]['location'].get('postcode', None)
# locality
res1['results'][0]['location'].get('locality', None)
# distance from bike station
res1['results'][0].get('distance', None)

66

In [5]:
cols = ["fsq_id", "cat_id", "cat_name", "lat", "long", "name", "street_address", "zip", "locality", "distance"]

fsq_df = pd.DataFrame(columns=cols)
print(fsq_df)

Empty DataFrame
Columns: [fsq_id, cat_id, cat_name, lat, long, name, street_address, zip, locality, distance]
Index: []


In [10]:
fsq_yelp_parsing.fsq_parsing(res1, fsq_df)

Unnamed: 0,fsq_id,cat_id,cat_name,lat,long,name,street_address,zip,locality,distance
0,4bcb7c1a511f95216599b1c7,13068,American Restaurant,37.849484,-122.271567,Vault Cafe,3250 Adeline St,94703,Berkeley,66


In [22]:
url = 'https://api.yelp.com/v3/businesses/search' # The Yelp API does not return businesses without any reviews.
params = {'latitude': '37.849735', 'longitude' : '-122.270582', 'radius': '1000', 'limit': '1', 'categories' : 'food,galleries,cafes,restaurants,bars,barcrawl,beergardens,coffeeshops,publicart,trainstations,arts,beaches,bowling,climbing,escapegames,hiking,rock_climbing,zoos,volleyball,arcades,gardens,outdoormovies,jazzandblues,museums,streetart', 'sort_by' : 'distance'}
headers = {'Accept': 'application/json', 'Authorization': f'Bearer {yelp_api_key}'} 

response = requests.request("GET", url, params=params, headers=headers)
print('status:', response.status_code)
res = response.json()
print(json.dumps(res, indent=4))

status: 200
{
    "businesses": [
        {
            "id": "LP6NBMIa3dxinjgtOxMEZg",
            "alias": "the-fat-fish-berkeley",
            "name": "The Fat Fish",
            "image_url": "https://s3-media4.fl.yelpcdn.com/bphoto/EfBBn_RHZhdxo892dp9rLQ/o.jpg",
            "is_closed": false,
            "url": "https://www.yelp.com/biz/the-fat-fish-berkeley?adjust_creative=YDZxEhaxjqKRlaRTv_Z18A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=YDZxEhaxjqKRlaRTv_Z18A",
            "review_count": 197,
            "categories": [
                {
                    "alias": "fishnchips",
                    "title": "Fish & Chips"
                }
            ],
            "rating": 3.9,
            "coordinates": {
                "latitude": 37.85004,
                "longitude": -122.27053
            },
            "transactions": [
                "pickup",
                "delivery"
            ],
            "price": "$$",
            "location": {
 

In [50]:
# yelp_id
res['businesses'][0].get('id', None)

# cat_alias
res['businesses'][0]['categories'][0].get('alias', None)

# latitude
res['businesses'][0]['coordinates'].get('latitude', None)

# longitude
res['businesses'][0]['coordinates'].get('longitude', None)

# name
res['businesses'][0].get('name', None)

# street address
res['businesses'][0]['location'].get('address1', None)

# zip
res['businesses'][0]['location'].get('zip_code', None)

# city
res['businesses'][0]['location'].get('city', None)

# price
res['businesses'][0].get('price', None)

# rating
res['businesses'][0].get('rating', None)

# review_count
res['businesses'][0].get('review_count', None)

# distance
res['businesses'][0].get('distance', None)

29.9305557971102

In [52]:
cols = ["yelp_id", "cat_alias", "latitude", "longitude", "name", "street_address", "zip", "city", "price", "rating", "review_count", "distance_from_bike_stn"]

yelp_df = pd.DataFrame(columns=cols)
print(yelp_df)

Empty DataFrame
Columns: [yelp_id, cat_alias, latitude, longitude, name, street_address, zip, city, price, rating, review_count, distance_from_bike_stn]
Index: []
