# Part 2: Connecting to Foursquare and Yelp APIs

## Imports

In [1]:
from datetime import datetime
import os
import pandas as pd
import requests

Global variables:

In [2]:
foursquare_api_key: str = os.getenv('APIKEY_FOURSQUARE')
yelp_api_key: str = os.getenv('APIKEY_YELP')

## Read bike stations data

In [3]:
dirname = '../data/'
basename = 'stations.csv'
filename = dirname + basename

stations_df = pd.read_csv(filename, sep=',', index_col=False)
stations_df.head()

Unnamed: 0,station_id,name,latitude,longitude,altitude,free_bikes,empty_slots,has_ebikes,timestamp
0,e1593acef03a0fd770595370586bc358,P31 - Estación Canal 13,-33.428334,-70.627312,0.0,3,10,True,2024-05-03T00:15:40.367000Z
1,3983dd515589a80338dd44a28f5ec414,V34 - Mestizo,-33.394,-70.6,0.0,5,18,True,2024-05-03T00:15:40.004000Z
2,405a3a5ca08c7536d3eb286cf8553025,V10 - Casa Costanera,-33.398,-70.598,0.0,0,11,True,2024-05-03T00:15:40.005000Z
3,0e8dfc3f137cb3911bd32f5fd45e0f93,V35 - Municipalidad de Vitacura,-33.398,-70.601,0.0,1,11,True,2024-05-03T00:15:40.009000Z
4,bdf4fc889476008ea8c644640f306bbc,V07 - Bicentenario,-33.401,-70.602,0.0,8,8,True,2024-05-03T00:15:40.018000Z


## Foursquare

### Send a request to Foursquare for each bike station in your city of choice 

In [13]:
def foursquare_place_search(
    latitude: float,
    longitude: float,
    categories: str | None,
    radius: int,
    limit: int,
) -> requests.Response | None:
    """
    Return a get response from Foursquare's Place Search.

    Make a get request to Foursquare's Place Search using
    latitude, longitude, and radius. Accept a JSON response.
    Return a response object or None if there is an HTTPError.

    :param api_key: Foursquare API key
    :type api_key: str

    :param latitude: latitude of a place
    :type latitude: float

    :param longitude: longitude of a place
    :type longitude: float

    :param radius: metres around a place considered nearby 
    :type radius: int

    :param limit: max number of results to include in response
    :type limit: int

    :return: a Place Search response from Foursquare
    :rtype: requests.Response | None    
    """
    url = 'https://api.foursquare.com/v3/places/search'

    ll = f'{latitude},{longitude}'
    params = {
        'll': ll,
        'radius': radius,
        'limit': limit,        
        'categories': categories
    }

    headers = {
        'accept': 'application/json',
        'authorization': foursquare_api_key
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
    except requests.HTTPError as err:
        print(f'get request failed, {err}')
        return None
    else:
        return response

In [14]:
def row_to_foursquare_results(
    stations_df: pd.DataFrame, 
    row: int,
    categories: (str | None) = None,
    radius: int = 1000,
    limit: int = 50,
) -> list[dict] | None:
    """
    Return the results list from a Foursquare Place Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Foursquare get request fails or if the request 
    succeeds, but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = foursquare_place_search(lat, long,
                                       categories, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['results']
    except KeyError as err:
        print(f'response does not contain results, {err}')
        return None
    else:
        return results

In [15]:
def result_to_places(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of place information.

    Given a particular result, *i.e.,* a place, extract the
    information that will be added to the foursquare_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'fsq_id': result['fsq_id'],
        'name': result['name'],
        'closed_bucket': result['closed_bucket'],
        'distance': result['distance'],
        'station_id': stations_df.loc[row, 'station_id']
    }

In [16]:
def result_to_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    return_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'fsq_id': result['fsq_id'],
            'category_id': category['id'],
            'category_name': category['name']
        }
        return_list.append(new_category)
    return return_list

Initialize two lists intended to store dictionaries related to Foursquare places and categories.

In [191]:
foursquare_places = []
foursquare_categories = []

Loop through the rows of stations_df, parsing Foursquare Place Searches and extending foursquare_places and foursquare_categories on each iteration.

In [192]:
print(f'foursquare_places empty: {len(foursquare_places) == 0}')
print(f'foursquare_categories empty: {len(foursquare_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

for row in range(num_stations):
    results = row_to_foursquare_results(stations_df, row)
    for result in results:
        new_places = result_to_places(stations_df, result, row)
        new_categories = result_to_categories(result, row)
        foursquare_categories += new_categories
        foursquare_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

foursquare_places empty: True
foursquare_categories empty: True
running...
...done after 115 s


### Put the results into DataFrames

In [193]:
foursquare_places_df = pd.DataFrame(foursquare_places)
foursquare_places_df.head()

Unnamed: 0,fsq_id,name,closed_bucket,distance,station_id
0,58dbe5d5126ae82bf8f7ff16,Lima 17,Unsure,163,e1593acef03a0fd770595370586bc358
1,4bd9e4db3904a593501d449e,Plaza de la Aviación,VeryLikelyOpen,435,e1593acef03a0fd770595370586bc358
2,54e39d82498e2bf4dbe8df34,Starbucks,VeryLikelyOpen,417,e1593acef03a0fd770595370586bc358
3,50c5137fe4b02085d85e4047,Café del Negro,Unsure,410,e1593acef03a0fd770595370586bc358
4,59ad8e4228122f2d257f8772,Restaurant 47 Ronin,VeryLikelyOpen,498,e1593acef03a0fd770595370586bc358


In [194]:
foursquare_places_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9171 entries, 0 to 9170
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         9171 non-null   object
 1   name           9171 non-null   object
 2   closed_bucket  9171 non-null   object
 3   distance       9171 non-null   int64 
 4   station_id     9171 non-null   object
dtypes: int64(1), object(4)
memory usage: 358.4+ KB


In [195]:
foursquare_categories_df = pd.DataFrame(foursquare_categories)
foursquare_categories_df.head()

Unnamed: 0,fsq_id,category_id,category_name
0,58dbe5d5126ae82bf8f7ff16,13322,Peruvian Restaurant
1,4bd9e4db3904a593501d449e,13065,Restaurant
2,4bd9e4db3904a593501d449e,16017,Garden
3,4bd9e4db3904a593501d449e,16032,Park
4,4bd9e4db3904a593501d449e,16041,Plaza


In [196]:
foursquare_categories_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11909 entries, 0 to 11908
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         11909 non-null  object
 1   category_id    11909 non-null  int64 
 2   category_name  11909 non-null  object
dtypes: int64(1), object(2)
memory usage: 279.2+ KB


### Export the DataFrames to CSV

In [197]:
dirname = '../data/'

basename = 'foursquare_places.csv'
filename = dirname + basename
foursquare_places_df.to_csv(filename, sep=',', index=False)

basename = 'foursquare_categories.csv'
filename = dirname + basename
foursquare_categories_df.to_csv(filename, sep=',', index=False)

## Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [60]:
def yelp_business_search(
    latitude: float,
    longitude: float,
    # categories: str,
    radius: int,
    limit: int
) -> requests.Response | None:
    """
    Return a Yelp business search response.
    """
    url = 'https://api.yelp.com/v3/businesses/search'

    params = {
        'latitude': latitude,
        'longitude': longitude,
        # 'categories': categories,
        'radius': radius,
        'limit': limit
    }

    headers = {
        'accept': 'application/json',
        'authorization': f'Bearer {yelp_api_key}' 
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
    except requests.HTTPError as err:
        print(f'get request failed, {err}')
        return None
    else:
        return response

In [61]:
def row_to_businesses(
    stations_df: pd.DataFrame, 
    row: int,
    radius: int = 1000,
    limit: int = 50,
) -> list[dict] | None:
    """
    Return the results list from a Yelp Businesses Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Yelp get request fails or if the request 
    succeeds but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = yelp_business_search(lat, long, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['businesses']
    except KeyError as err:
        print(f'response does not contain results, {err}')
        return None
    else:
        return results

In [65]:
def result_to_businesses(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of place information.

    Given a particular result, *i.e.,* a place, extract the
    information that will be added to the foursquare_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'yelp_id': result['id'],
        'name': result['name'],
        'is_closed': result['is_closed'],
        'review_count': result['review_count'],
        'price': result.get('price'),
        'rating': result['rating'],
        'distance': result['distance'],
        'station_id': stations_df.loc[row, 'station_id']
    }

In [70]:
def result_to_bus_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    return_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'yelp_id': result['id'],
            'category_name': category['title']
        }
        return_list.append(new_category)
    return return_list

In [72]:
results = row_to_businesses(stations_df, 0)
results[0]

{'id': 'U15KdCy6HH05WISNtN9plQ',
 'alias': 'restaurant-peumayén-santiago',
 'name': 'Restaurant Peumayén',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/wkJAtF3Z4X5FUw5CR4c7tg/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/restaurant-peumay%C3%A9n-santiago?adjust_creative=63YPOQR85PnJzp_H-e7TYQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=63YPOQR85PnJzp_H-e7TYQ',
 'review_count': 73,
 'categories': [{'alias': 'latin', 'title': 'Latin American'}],
 'rating': 4.5,
 'coordinates': {'latitude': -33.4328073, 'longitude': -70.6348213},
 'transactions': [],
 'price': '$$$$',
 'location': {'address1': 'Constitución 136',
  'address2': '',
  'address3': '',
  'city': 'Santiago',
  'zip_code': '7500000',
  'country': 'CL',
  'state': 'RM',
  'display_address': ['Constitución 136', 'RM 7500000 Santiago', 'Chile']},
 'phone': '',
 'display_phone': '',
 'distance': 856.1614007191765,
 'attributes': {'business_temp_closed': None,
  'menu_url': None,


In [73]:
result_to_businesses(stations_df, results[1], 1)

{'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ',
 'name': 'Barrica 94',
 'is_closed': False,
 'review_count': 78,
 'price': '$$',
 'rating': 4.3,
 'distance': 988.6312698678695,
 'station_id': '3983dd515589a80338dd44a28f5ec414'}

In [74]:
result_to_bus_categories(results[1], 1)

[{'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Wine Bars'},
 {'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Chilean'},
 {'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Cocktail Bars'}]

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [79]:
yelp_places = []
yelp_categories = []

In [80]:
print(f'yelp_places empty: {len(yelp_places) == 0}')
print(f'yelp_categories empty: {len(yelp_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

num_stations = 3

for row in range(num_stations):
    results = row_to_businesses(stations_df, row)
    for result in results:
        new_places = result_to_businesses(stations_df, result, row)
        new_categories = result_to_bus_categories(result, row)
        yelp_categories += new_categories
        yelp_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

yelp_places empty: True
yelp_categories empty: True
running...
...done after 2 s


In [81]:
yelp_places

[{'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ',
  'name': 'Barrica 94',
  'is_closed': False,
  'review_count': 78,
  'price': '$$',
  'rating': 4.3,
  'distance': 988.6312698678695,
  'station_id': 'e1593acef03a0fd770595370586bc358'},
 {'yelp_id': 'U15KdCy6HH05WISNtN9plQ',
  'name': 'Restaurant Peumayén',
  'is_closed': False,
  'review_count': 73,
  'price': '$$$$',
  'rating': 4.5,
  'distance': 856.1614007191765,
  'station_id': 'e1593acef03a0fd770595370586bc358'},
 {'yelp_id': 'y2FR-JBGie4wiW6RjtKLhg',
  'name': 'Aquí Está Coco',
  'is_closed': False,
  'review_count': 58,
  'price': '$$$$',
  'rating': 4.3,
  'distance': 1099.6913151374479,
  'station_id': 'e1593acef03a0fd770595370586bc358'},
 {'yelp_id': '25PGm8dLZuar_Q0CZWK-dQ',
  'name': 'Ciudad Vieja',
  'is_closed': False,
  'review_count': 27,
  'price': '$$',
  'rating': 4.4,
  'distance': 880.5152120850115,
  'station_id': 'e1593acef03a0fd770595370586bc358'},
 {'yelp_id': 'u-WjtuV5-d_Ffi-19pj8Kg',
  'name': 'Cerro San Cristóbal',


In [82]:
yelp_categories

[{'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Wine Bars'},
 {'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Chilean'},
 {'yelp_id': '8N6Y3HsLXHBeFwhlO0YxwQ', 'category_name': 'Cocktail Bars'},
 {'yelp_id': 'U15KdCy6HH05WISNtN9plQ', 'category_name': 'Latin American'},
 {'yelp_id': 'y2FR-JBGie4wiW6RjtKLhg', 'category_name': 'Seafood'},
 {'yelp_id': '25PGm8dLZuar_Q0CZWK-dQ', 'category_name': 'Sandwiches'},
 {'yelp_id': 'u-WjtuV5-d_Ffi-19pj8Kg', 'category_name': 'Parks'},
 {'yelp_id': 'x72iXJX0J7kibMpisppZ9g', 'category_name': 'Bars'},
 {'yelp_id': 'x72iXJX0J7kibMpisppZ9g', 'category_name': 'Italian'},
 {'yelp_id': 'N-j6cssa5SGs9DptgFaKMg', 'category_name': 'Seafood'},
 {'yelp_id': 'b5duAmqZb0KsiXn2beMlyw', 'category_name': 'Brasseries'},
 {'yelp_id': 'lmCk52sv-Vb5ncRXF_gHvw', 'category_name': 'Mexican'},
 {'yelp_id': 'FTnehJpfn5t3iq_eGcE6pg', 'category_name': 'Chilean'},
 {'yelp_id': 'FTnehJpfn5t3iq_eGcE6pg', 'category_name': 'Bars'},
 {'yelp_id': 'O86DNpypfjb9drywlNy5t

Put your parsed results into a DataFrame

In [84]:
yelp_places_df = pd.DataFrame(yelp_places)
yelp_places_df.head()

Unnamed: 0,yelp_id,name,is_closed,review_count,price,rating,distance,station_id
0,8N6Y3HsLXHBeFwhlO0YxwQ,Barrica 94,False,78,$$,4.3,988.63127,e1593acef03a0fd770595370586bc358
1,U15KdCy6HH05WISNtN9plQ,Restaurant Peumayén,False,73,$$$$,4.5,856.161401,e1593acef03a0fd770595370586bc358
2,y2FR-JBGie4wiW6RjtKLhg,Aquí Está Coco,False,58,$$$$,4.3,1099.691315,e1593acef03a0fd770595370586bc358
3,25PGm8dLZuar_Q0CZWK-dQ,Ciudad Vieja,False,27,$$,4.4,880.515212,e1593acef03a0fd770595370586bc358
4,u-WjtuV5-d_Ffi-19pj8Kg,Cerro San Cristóbal,False,49,,4.6,650.204483,e1593acef03a0fd770595370586bc358


In [85]:
yelp_categories_df = pd.DataFrame(yelp_categories)
yelp_categories_df.head()

Unnamed: 0,yelp_id,category_name
0,8N6Y3HsLXHBeFwhlO0YxwQ,Wine Bars
1,8N6Y3HsLXHBeFwhlO0YxwQ,Chilean
2,8N6Y3HsLXHBeFwhlO0YxwQ,Cocktail Bars
3,U15KdCy6HH05WISNtN9plQ,Latin American
4,y2FR-JBGie4wiW6RjtKLhg,Seafood


## Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating