In [7]:
import requests
import pandas as pd
import pickle
import json
import os
import keyring

import sys

sys.path.append('..\\src')
import fsq_yelp_parsing

# Foursquare & Yelp APIs

Since a typical 5-minute bike ride covers about 1000-1100 meters, I'll look within a 1000-meter radius around bike stations and arrange the results by distance.

Imagine a scenario where someone is planning a day out with friends, biking between places. They'll start from a meeting spot reached by public transport. They're looking for places with activities suitable for groups, like artsy spots, outdoor fun, places to eat, and activities that are not too strenuous.

In [8]:
# UNPICKLE CITYBIKES DF
with open('../data/citybikes_filled_df.pickle', 'rb') as df:
    citybikes_df = pickle.load(df)

citybikes_df.head(5)

Unnamed: 0,name,id,timestamp,longitude,latitude,slots,free_bikes,empty_slots,has_ebikes,ebikes,payment,renting,returning
0,Harmon St at Adeline St,d0e8f4f1834b7b33a3faf8882f567ab8,2024-04-02T18:29:27.066000Z,-122.270582,37.849735,15,9,6,True,0,"[key, creditcard]",1,1
1,Fountain Alley at S 2nd St,983514094dd808b1604da2dcfc2d09af,2024-04-02T18:29:26.582000Z,-121.889277,37.336188,11,8,3,True,6,"[key, creditcard]",1,1
2,Oak St at 1st St,da17603652106fda93da4e255a5b0a22,2024-04-02T18:29:27.044000Z,-121.88109,37.322125,23,14,9,True,7,"[key, creditcard]",1,1
3,Bestor Art Park,7a21c92b3b4cd2f7759107b4fdebf869,2024-04-02T18:29:26.797000Z,-121.874119,37.323678,15,12,3,True,3,"[key, creditcard]",1,1
4,5th St at Virginia St,ce34d38fb230a23c1ced12d1e16df294,2024-04-02T18:29:26.971000Z,-121.87712,37.325998,27,19,7,True,12,"[key, creditcard]",1,1


In [9]:
fsq_api_key = keyring.get_password("foursquare", "fsq_api_key") # $200 free credit for API usage granted on the first of every month
yelp_api_key = keyring.get_password("yelp", "yelp_api_key") # 5000 API calls daily limit, resets at midnight 
yelp_client_id = keyring.get_password("yelp", "yelp_client_id")

In [8]:
url = 'https://api.foursquare.com/v3/places/search'
params = {'ll': '37.849735,-122.270582', 'radius': '1000', 'limit': '3', 'categories' : '13000,10000,16000,19042,19043,19046,19050,19054', 'sort' : 'distance'} #13000 is the category label for dining & drinking on foursquare, 10000 =  arts & entertainment, 16000 = landmarks & outdoors, 19042 = bus station, 19043 = bus stop, 19046 = metro station, 19050 = tram station, 19054 = public transportation 
headers = {'Accept': 'application/json', 'Authorization': f'{fsq_api_key}'}  

response = requests.request("GET", url, params=params, headers=headers) # the max limit for results per query is 50, the default is 20
print('status:', response.status_code)
res1 = response.json()
print(json.dumps(res1, indent=4))

status: 200
{
    "results": [
        {
            "fsq_id": "4b3fc82cf964a520a1ae25e3",
            "categories": [
                {
                    "id": 13303,
                    "name": "Mexican Restaurant",
                    "short_name": "Mexican",
                    "plural_name": "Mexican Restaurants",
                    "icon": {
                        "prefix": "https://ss3.4sqi.net/img/categories_v2/food/mexican_",
                        "suffix": ".png"
                    }
                }
            ],
            "chains": [],
            "closed_bucket": "Unsure",
            "distance": 33,
            "geocodes": {
                "main": {
                    "latitude": 37.850052,
                    "longitude": -122.270523
                },
                "roof": {
                    "latitude": 37.850052,
                    "longitude": -122.270523
                }
            },
            "link": "/v3/places/4b3fc82cf964a520a1ae25e3",
   

In [11]:
res1['results']

[{'fsq_id': '4b3fc82cf964a520a1ae25e3',
  'categories': [{'id': 13303,
    'name': 'Mexican Restaurant',
    'short_name': 'Mexican',
    'plural_name': 'Mexican Restaurants',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/mexican_',
     'suffix': '.png'}}],
  'chains': [],
  'closed_bucket': 'Unsure',
  'distance': 33,
  'geocodes': {'main': {'latitude': 37.850052, 'longitude': -122.270523},
   'roof': {'latitude': 37.850052, 'longitude': -122.270523}},
  'link': '/v3/places/4b3fc82cf964a520a1ae25e3',
  'location': {'address': '3221 Adeline St',
   'census_block': '060014239011019',
   'country': 'US',
   'cross_street': 'Alactraz St.',
   'dma': 'San Francisco-Oakland-San Jose',
   'formatted_address': '3221 Adeline St (Alactraz St.), Berkeley, CA 94703',
   'locality': 'Berkeley',
   'postcode': '94703',
   'region': 'CA'},
  'name': 'Las Palmas Super Burritos and Heros',
  'related_places': {},
  'timezone': 'America/Los_Angeles'},
 {'fsq_id': '57da76a603a44d3

In [11]:
# fsq_id
res1['results'][0].get('fsq_id', None)
# cat_id
res1['results'][0]['categories'][0].get('id', None)
# cat_name
res1['results'][0]['categories'][0].get('name', None)
# lat
res1['results'][0]['geocodes']['main'].get('latitude', None)
# long
res1['results'][0]['geocodes']['main'].get('longitude', None)
# name
res1['results'][0].get('name', None)
# street address
res1['results'][0]['location'].get('address', None)
# zip
res1['results'][0]['location'].get('postcode', None)
# locality
res1['results'][0]['location'].get('locality', None)
# distance from bike station
res1['results'][0].get('distance', None)

33

In [12]:
cols = ["fsq_id", "cat_id", "cat_name", "lat", "long", "name", "street_address", "zip", "locality", "distance"]

fsq_df = pd.DataFrame(columns=cols)
print(fsq_df)

Empty DataFrame
Columns: [fsq_id, cat_id, cat_name, lat, long, name, street_address, zip, locality, distance]
Index: []


In [16]:
url = 'https://api.foursquare.com/v3/places/search'
params = {'radius': '1000', 'limit': '50', 'categories' : '13000,10000,16000,19042,19043,19046,19050,19054', 'sort' : 'distance'} #13000 is the category label for dining & drinking on foursquare, 10000 =  arts & entertainment, 16000 = landmarks & outdoors, 19042 = bus station, 19043 = bus stop, 19046 = metro station, 19050 = tram station, 19054 = public transportation 
headers = {'Accept': 'application/json', 'Authorization': f'{fsq_api_key}'}  

In [17]:
fsq_places = fsq_yelp_parsing.get_fsq_places(citybikes_df, url, params,
                                             headers)

In [18]:
fsq_places.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25677 entries, 0 to 25676
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   fsq_id          25677 non-null  object 
 1   cat_id          25677 non-null  int64  
 2   cat_name        25677 non-null  object 
 3   lat             25677 non-null  float64
 4   long            25677 non-null  float64
 5   name            25677 non-null  object 
 6   street_address  24519 non-null  object 
 7   zip             25657 non-null  object 
 8   locality        25677 non-null  object 
 9   distance        25677 non-null  int64  
dtypes: float64(2), int64(2), object(6)
memory usage: 2.2+ MB


In [19]:
# PICKLE THE COMPLETED FSQ DF 
# with open('../data/fsq_places.pickle', 'wb') as df:
#     pickle.dump(fsq_places, df)

In [21]:
# UNPICKLE COMPLETED FSQ DF 
with open('../data/fsq_places.pickle', 'rb') as df:
    fsq_places = pickle.load(df)

fsq_places.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25677 entries, 0 to 25676
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   fsq_id          25677 non-null  object 
 1   cat_id          25677 non-null  int64  
 2   cat_name        25677 non-null  object 
 3   lat             25677 non-null  float64
 4   long            25677 non-null  float64
 5   name            25677 non-null  object 
 6   street_address  24519 non-null  object 
 7   zip             25657 non-null  object 
 8   locality        25677 non-null  object 
 9   distance        25677 non-null  int64  
dtypes: float64(2), int64(2), object(6)
memory usage: 2.2+ MB


In [24]:
url = 'https://api.yelp.com/v3/businesses/search'
categories = 'beaches,bicyclepaths,bowling,rock_climbing,escapegames,hiking,tennis,volleyball,zoos,arcades,galleries,gardens,movietheaters,festivals,museums,trainstations,busstations,metrostations,publictransport,trains,bars,barcrawl,beergardens,karaoke,restaurants'
params = {'latitude': '37.849735', 'longitude' : '-122.270582', 'radius': '1000', 'limit': '3', 'categories' : f'{categories}', 'sort' : 'distance'}
headers = {'Accept': 'application/json', 'Authorization': f'Bearer {yelp_api_key}'}

response = requests.request("GET", url, params=params, headers=headers) # the max limit for results per query is 50, the default is 20
print('status:', response.status_code)
res2 = response.json()

status: 200


In [25]:
print(json.dumps(res2, indent=4))

{
    "businesses": [
        {
            "id": "OjMumhyL26n5fDZ2CQLnyg",
            "alias": "lemat-berkeley",
            "name": "Lemat",
            "image_url": "https://s3-media2.fl.yelpcdn.com/bphoto/olmjVKwiGGVBHd771fDnaQ/o.jpg",
            "is_closed": false,
            "url": "https://www.yelp.com/biz/lemat-berkeley?adjust_creative=YDZxEhaxjqKRlaRTv_Z18A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=YDZxEhaxjqKRlaRTv_Z18A",
            "review_count": 239,
            "categories": [
                {
                    "alias": "ethiopian",
                    "title": "Ethiopian"
                },
                {
                    "alias": "vegetarian",
                    "title": "Vegetarian"
                },
                {
                    "alias": "gluten_free",
                    "title": "Gluten-Free"
                }
            ],
            "rating": 4.4,
            "coordinates": {
                "latitude": 37.85030

In [34]:
res2['businesses'][1]

{'id': '-EpmBwODsvQ6gHx2IuVO5w',
 'alias': 'el-tiny-cafe-berkeley',
 'name': 'El Tiny Cafe',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/tRqsfRFwSLuen9aRsNuXaA/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/el-tiny-cafe-berkeley?adjust_creative=YDZxEhaxjqKRlaRTv_Z18A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=YDZxEhaxjqKRlaRTv_Z18A',
 'review_count': 60,
 'categories': [{'alias': 'cafes', 'title': 'Cafes'}],
 'rating': 4.8,
 'coordinates': {'latitude': 37.85001740687401,
  'longitude': -122.27053198326358},
 'transactions': [],
 'location': {'address1': '3219 Adeline St',
  'address2': '',
  'address3': None,
  'city': 'Berkeley',
  'zip_code': '94703',
  'country': 'US',
  'state': 'CA',
  'display_address': ['3219 Adeline St', 'Berkeley, CA 94703']},
 'phone': '+15108796945',
 'display_phone': '(510) 879-6945',
 'distance': 31.707767038858425}

In [29]:
type(res2['businesses'])

list

In [26]:
# yelp_id
res2['businesses'][0].get('id', None)
# cat_alias
res2['businesses'][0]['categories'][0].get('alias', None)
# latitude
res2['businesses'][0]['coordinates'].get('latitude', None)
# longitude
res2['businesses'][0]['coordinates'].get('longitude', None)
# name
res2['businesses'][0].get('name', None)
# street address
res2['businesses'][0]['location'].get('address1', None)
# zip
res2['businesses'][0]['location'].get('zip_code', None)
# city
res2['businesses'][0]['location'].get('city', None)
# price
res2['businesses'][0].get('price', None)
# rating
res2['businesses'][0].get('rating', None)
# review_count
res2['businesses'][0].get('review_count', None)
# distance
res2['businesses'][0].get('distance', None)

86.99355863981393

In [41]:
cols = ["reference_bike_stn", "yelp_id", "cat_alias", "lat", "long", "name", "street_address", "zip", "city", "price", "rating", "review_count", "distance_from_bike_stn"]

yelp_df = pd.DataFrame(columns=cols)
print(yelp_df)

Empty DataFrame
Columns: [reference_bike_stn, yelp_id, cat_alias, lat, long, name, street_address, zip, city, price, rating, review_count, distance_from_bike_stn]
Index: []


In [10]:
url = 'https://api.yelp.com/v3/businesses/search'
categories = 'beaches,bicyclepaths,bowling,rock_climbing,escapegames,hiking,tennis,volleyball,zoos,arcades,galleries,gardens,movietheaters,festivals,museums,trainstations,busstations,metrostations,publictransport,trains,bars,barcrawl,beergardens,karaoke,restaurants'
params = {'radius': '1000', 'limit': '50', 'categories' : f'{categories}', 'sort' : 'distance'}
headers = {'Accept': 'application/json', 'Authorization': f'Bearer {yelp_api_key}'}

In [11]:
yelp_df = fsq_yelp_parsing.get_yelp_places(citybikes_df, url, params, headers)

In [12]:
yelp_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 0 to 49
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   reference_bike_stn      50 non-null     object 
 1   yelp_id                 50 non-null     object 
 2   cat_alias               50 non-null     object 
 3   lat                     50 non-null     float64
 4   long                    50 non-null     float64
 5   name                    50 non-null     object 
 6   street_address          50 non-null     object 
 7   zip                     50 non-null     object 
 8   city                    50 non-null     object 
 9   price                   37 non-null     object 
 10  rating                  50 non-null     float64
 11  review_count            50 non-null     int64  
 12  distance_from_bike_stn  50 non-null     float64
dtypes: float64(4), int64(1), object(8)
memory usage: 5.5+ KB
