# Part 2: Connecting to Foursquare and Yelp APIs

## Imports

In [1]:
from datetime import datetime
import os
import pandas as pd

from utils import export, get

### Get API keys

In [2]:
foursquare_api_key: str = os.getenv('APIKEY_FOURSQUARE')
yelp_api_key: str = os.getenv('APIKEY_YELP')

## Read bike stations data

In [3]:
dirname = '../data/'
basename = 'stations.csv'
filename = dirname + basename

stations_df = pd.read_csv(filename, sep=',', index_col=False)
stations_df.head()

Unnamed: 0,timestamp,station_id,name,latitude,longitude,altitude,slots,free_bikes,empty_slots,has_ebikes,ebikes,normal_bikes
0,2024-05-03T15:29:29.800000Z,e1593acef03a0fd770595370586bc358,P31 - Estación Canal 13,-33.428334,-70.627312,0.0,13,6,7,True,0,6
1,2024-05-03T15:29:29.533000Z,3983dd515589a80338dd44a28f5ec414,V34 - Mestizo,-33.394,-70.6,0.0,23,8,15,True,0,8
2,2024-05-03T15:29:29.537000Z,405a3a5ca08c7536d3eb286cf8553025,V10 - Casa Costanera,-33.398,-70.598,0.0,11,8,3,True,0,8
3,2024-05-03T15:29:29.538000Z,0e8dfc3f137cb3911bd32f5fd45e0f93,V35 - Municipalidad de Vitacura,-33.398,-70.601,0.0,15,9,3,True,0,9
4,2024-05-03T15:29:29.539000Z,bdf4fc889476008ea8c644640f306bbc,V07 - Bicentenario,-33.401,-70.602,0.0,17,8,8,True,0,8


## Foursquare

### Send a request to Foursquare for each bike station in your city of choice 

In [4]:
def foursquare_search(
    latitude: float,
    longitude: float,
    radius: int,
    limit: int,
    api_key: str = foursquare_api_key
):
    """
    Return a get response from Foursquare's Place Search.

    Make a get request to Foursquare's Place Search using
    latitude, longitude, and radius. Accept a JSON response.

    :param latitude: latitude of a place
    :type latitude: float

    :param longitude: longitude of a place
    :type longitude: float

    :param radius: metres around a place considered nearby 
    :type radius: int

    :param limit: max number of results to include in response
    :type limit: int

    :param api_key: Foursquare API key
    :type api_key: str
    
    :return: a Place Search response from Foursquare
    :rtype: requests.Response | None    
    """
    # Foursquare Place Search endpoint
    url = 'https://api.foursquare.com/v3/places/search'

    params = {
        'll': f'{latitude},{longitude}',
        'radius': radius,
        'limit': limit
    }

    headers = {
        'accept': 'application/json',
        'authorization': foursquare_api_key
    }

    return get(url, params, headers)

In [5]:
def foursquare_results(
    stations_df: pd.DataFrame,
    row: int,
    radius: int = 500,
    limit: int = 10,
) -> list[dict] | None:
    """
    Return the results list from a Foursquare Place Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Foursquare get request fails or if the request 
    succeeds but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :param radius: metres around a place considered nearby 
    :type radius: int

    :param limit: max number of results to include in response
    :type limit: int
    
    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = foursquare_search(lat, long, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['results']
    except KeyError as err:
        print(f'response does not contain results, {err}')
        return None
    else:
        return results

Print out a single Foursquare result to get an idea of its structure.

In [6]:
tmp = foursquare_results(stations_df, row=0, radius=500, limit=1)
tmp[0]

{'fsq_id': '58dbe5d5126ae82bf8f7ff16',
 'categories': [{'id': 13322,
   'name': 'Peruvian Restaurant',
   'short_name': 'Peruvian',
   'plural_name': 'Peruvian Restaurants',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/peruvian_',
    'suffix': '.png'}}],
 'chains': [],
 'closed_bucket': 'Unsure',
 'distance': 163,
 'geocodes': {'main': {'latitude': -33.42865, 'longitude': -70.625766},
  'roof': {'latitude': -33.42865, 'longitude': -70.625766}},
 'link': '/v3/places/58dbe5d5126ae82bf8f7ff16',
 'location': {'admin_region': 'Santiago',
  'country': 'CL',
  'cross_street': '',
  'formatted_address': 'Providencia, Metropolitana de Santiago de Chile',
  'locality': 'Providencia',
  'region': 'Metropolitana de Santiago de Chile'},
 'name': 'Lima 17',
 'related_places': {},
 'timezone': 'America/Santiago'}

In [7]:
def result_to_place(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of Foursqaure place information.

    Given a particular result, *i.e.,* a place, extract the
    information that will be added to the foursquare_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param result: a single result from a Foursquare Place Search
    :type result: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'fsq_id': result.get('fsq_id'),
        'name': result.get('name'),
        'closed_bucket': result.get('closed_bucket'),
        'distance': result.get('distance'),
        'station_id': stations_df.loc[row, 'station_id']
    }

In [8]:
def result_to_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param result: a single result from a Foursquare Place Search
    :type result: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    cat_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'fsq_id': result.get('fsq_id'),
            'category_id': category.get('id'),
            'category_name': category.get('name')
        }
        cat_list.append(new_category)
    return cat_list

Initialize two lists intended to store dictionaries related to Foursquare places and categories.

In [9]:
foursquare_places = []
foursquare_categories = []

Loop through the rows of stations_df, parsing Foursquare Place Searches and extending foursquare_places and foursquare_categories on each iteration.

In [10]:
print(f'foursquare_places empty: {len(foursquare_places) == 0}')
print(f'foursquare_categories empty: {len(foursquare_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

for row in range(num_stations):
    results = foursquare_results(stations_df, row, radius=1000, limit=50)
    for result in results:
        new_places = result_to_place(stations_df, result, row)
        new_categories = result_to_categories(result, row)
        foursquare_categories += new_categories
        foursquare_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

foursquare_places empty: True
foursquare_categories empty: True
running...
...done after 103 s


### Put the results into DataFrames

In [11]:
foursquare_places_df = pd.DataFrame(foursquare_places)
foursquare_places_df.head()

Unnamed: 0,fsq_id,name,closed_bucket,distance,station_id
0,58dbe5d5126ae82bf8f7ff16,Lima 17,Unsure,163,e1593acef03a0fd770595370586bc358
1,4bd9e4db3904a593501d449e,Plaza de la Aviación,VeryLikelyOpen,435,e1593acef03a0fd770595370586bc358
2,54e39d82498e2bf4dbe8df34,Starbucks,VeryLikelyOpen,417,e1593acef03a0fd770595370586bc358
3,50c5137fe4b02085d85e4047,Café del Negro,Unsure,410,e1593acef03a0fd770595370586bc358
4,59ad8e4228122f2d257f8772,Restaurant 47 Ronin,VeryLikelyOpen,498,e1593acef03a0fd770595370586bc358


In [12]:
foursquare_places_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9217 entries, 0 to 9216
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         9217 non-null   object
 1   name           9217 non-null   object
 2   closed_bucket  9217 non-null   object
 3   distance       9217 non-null   int64 
 4   station_id     9217 non-null   object
dtypes: int64(1), object(4)
memory usage: 360.2+ KB


In [13]:
foursquare_categories_df = pd.DataFrame(foursquare_categories)
foursquare_categories_df.head()

Unnamed: 0,fsq_id,category_id,category_name
0,58dbe5d5126ae82bf8f7ff16,13322,Peruvian Restaurant
1,4bd9e4db3904a593501d449e,13065,Restaurant
2,4bd9e4db3904a593501d449e,16017,Garden
3,4bd9e4db3904a593501d449e,16032,Park
4,4bd9e4db3904a593501d449e,16041,Plaza


In [14]:
foursquare_categories_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11974 entries, 0 to 11973
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         11974 non-null  object
 1   category_id    11974 non-null  int64 
 2   category_name  11974 non-null  object
dtypes: int64(1), object(2)
memory usage: 280.8+ KB


### Export the DataFrames to CSV

In [15]:
export(foursquare_places_df, 'foursquare_places.csv')
export(foursquare_categories_df, 'foursquare_categories.csv')

## Yelp

### Send a request to Yelp for each bike station in your city of choice

In [16]:
def yelp_search(
    latitude: float,
    longitude: float,
    radius: int,
    limit: int,
    api_key: str = yelp_api_key,
):
    """
    Return a get response from Yelp's Businesses Search.

    Make a get request to Yelp's Businesses Search using
    latitude, longitude, and radius. Accept a JSON response.

    :param latitude: latitude of a place
    :type latitude: float

    :param longitude: longitude of a place
    :type longitude: float

    :param radius: metres around a place considered nearby 
    :type radius: int

    :param limit: max number of results to include in response
    :type limit: int

    :param api_key: Yelp API key
    :type api_key: str
    
    :return: a Businesses Search response from Yelp
    :rtype: requests.Response | None    
    """
    # Yelp Businesses Search endpoint
    url = 'https://api.yelp.com/v3/businesses/search'

    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': radius,
        'limit': limit
    }

    headers = {
        'accept': 'application/json',
        'authorization': f'Bearer {yelp_api_key}' 
    }

    return get(url, params=params, headers=headers)

In [17]:
def yelp_businesses(
    stations_df: pd.DataFrame, 
    row: int,
    radius: int = 500,
    limit: int = 10,
) -> list[dict] | None:
    """
    Return the results list from a Yelp Businesses Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Yelp get request fails or if the request 
    succeeds but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = yelp_search(lat, long, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['businesses']
    except KeyError as err:
        print(f'response does not contain businesses, {err}')
        return None
    else:
        return results

Print out a single Yelp result to get an idea of its structure.

In [19]:
tmp = yelp_businesses(stations_df, row=0, radius=500, limit=1)
tmp[0]

{'id': 'kWQxKQrcn3PNHzxsdPALcw',
 'alias': 'jimbos-santiago-3',
 'name': "Jimbo's",
 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/Xy6cPUzV-mMm_UUnQ-QsfA/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/jimbos-santiago-3?adjust_creative=63YPOQR85PnJzp_H-e7TYQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=63YPOQR85PnJzp_H-e7TYQ',
 'review_count': 3,
 'categories': [{'alias': 'australian', 'title': 'Australian'}],
 'rating': 4.7,
 'coordinates': {'latitude': -33.4286, 'longitude': -70.6258},
 'transactions': [],
 'location': {'address1': 'Los Pinones 29',
  'address2': None,
  'address3': '',
  'city': 'Santiago',
  'zip_code': '',
  'country': 'CL',
  'state': 'RM',
  'display_address': ['Los Pinones 29', 'RM Santiago', 'Chile']},
 'phone': '+56951266916',
 'display_phone': '+56 9 5126 6916',
 'distance': 151.5245960834386,
 'attributes': {'business_temp_closed': None,
  'menu_url': None,
  'open24_hours': None,
  'waitlist_reservation': Non

In [20]:
def result_to_business(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of business information.

    Given a particular result, *i.e.,* a business, extract the
    information that will be added to the yelp_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param result: a single result from a Yelp Businesses Search
    :type result: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'yelp_id': result.get('id'),
        'name': result.get('name'),
        'is_closed': result.get('is_closed'),
        'review_count': result.get('review_count'),
        'price': result.get('price'),
        'rating': result.get('rating'),
        'distance': result.get('distance'),
        'station_id': stations_df.loc[row, 'station_id']
    }

In [21]:
def result_to_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    return_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'yelp_id': result['id'],
            'category_name': category['title']
        }
        return_list.append(new_category)
    return return_list

Initialize two lists to store information from Yelp on businesses and business categories.

In [22]:
yelp_places = []
yelp_categories = []

In [23]:
print(f'yelp_places empty: {len(yelp_places) == 0}')
print(f'yelp_categories empty: {len(yelp_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

for row in range(num_stations):
    results = yelp_businesses(stations_df, row, radius=1000, limit=50)
    for result in results:
        new_places = result_to_business(stations_df, result, row)
        new_categories = result_to_categories(result, row)
        yelp_categories += new_categories
        yelp_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

yelp_places empty: True
yelp_categories empty: True
running...
...done after 194 s


### Put the results into DataFrames

In [24]:
yelp_places_df = pd.DataFrame(yelp_places)
yelp_places_df.head()

Unnamed: 0,yelp_id,name,is_closed,review_count,price,rating,distance,station_id
0,8N6Y3HsLXHBeFwhlO0YxwQ,Barrica 94,False,78,$$,4.3,988.63127,e1593acef03a0fd770595370586bc358
1,U15KdCy6HH05WISNtN9plQ,Restaurant Peumayén,False,73,$$$$,4.5,856.161401,e1593acef03a0fd770595370586bc358
2,y2FR-JBGie4wiW6RjtKLhg,Aquí Está Coco,False,58,$$$$,4.3,1099.691315,e1593acef03a0fd770595370586bc358
3,25PGm8dLZuar_Q0CZWK-dQ,Ciudad Vieja,False,27,$$,4.4,880.515212,e1593acef03a0fd770595370586bc358
4,x72iXJX0J7kibMpisppZ9g,Liguria Manuel Montt,False,27,$$$,3.8,775.05052,e1593acef03a0fd770595370586bc358


In [26]:
yelp_places_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10366 entries, 0 to 10365
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   yelp_id       10366 non-null  object 
 1   name          10366 non-null  object 
 2   is_closed     10366 non-null  bool   
 3   review_count  10366 non-null  int64  
 4   price         7007 non-null   object 
 5   rating        10366 non-null  float64
 6   distance      10366 non-null  float64
 7   station_id    10366 non-null  object 
dtypes: bool(1), float64(2), int64(1), object(4)
memory usage: 577.1+ KB


In [27]:
yelp_categories_df = pd.DataFrame(yelp_categories)
yelp_categories_df.head()

Unnamed: 0,yelp_id,category_name
0,8N6Y3HsLXHBeFwhlO0YxwQ,Wine Bars
1,8N6Y3HsLXHBeFwhlO0YxwQ,Chilean
2,8N6Y3HsLXHBeFwhlO0YxwQ,Cocktail Bars
3,U15KdCy6HH05WISNtN9plQ,Latin American
4,y2FR-JBGie4wiW6RjtKLhg,Seafood


In [28]:
yelp_categories_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15181 entries, 0 to 15180
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   yelp_id        15181 non-null  object
 1   category_name  15181 non-null  object
dtypes: object(2)
memory usage: 237.3+ KB


### Export the DataFrames to CSV

In [29]:
export(yelp_places_df, 'yelp_places.csv')
export(yelp_categories_df, 'yelp_categories.csv')

## Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating