# Part 2: Connecting to Foursquare and Yelp APIs

## Imports

In [1]:
from datetime import datetime
import os
import pandas as pd
# import requests

from utils import export, get

Get API keys:

In [2]:
foursquare_api_key: str = os.getenv('APIKEY_FOURSQUARE')
yelp_api_key: str = os.getenv('APIKEY_YELP')

## Read bike stations data

In [3]:
dirname = '../data/'
basename = 'stations.csv'
filename = dirname + basename

stations_df = pd.read_csv(filename, sep=',', index_col=False)
stations_df.head()

Unnamed: 0,station_id,name,latitude,longitude,altitude,free_bikes,empty_slots,has_ebikes,timestamp
0,e1593acef03a0fd770595370586bc358,P31 - Estación Canal 13,-33.428334,-70.627312,0.0,3,10,True,2024-05-03T00:15:40.367000Z
1,3983dd515589a80338dd44a28f5ec414,V34 - Mestizo,-33.394,-70.6,0.0,5,18,True,2024-05-03T00:15:40.004000Z
2,405a3a5ca08c7536d3eb286cf8553025,V10 - Casa Costanera,-33.398,-70.598,0.0,0,11,True,2024-05-03T00:15:40.005000Z
3,0e8dfc3f137cb3911bd32f5fd45e0f93,V35 - Municipalidad de Vitacura,-33.398,-70.601,0.0,1,11,True,2024-05-03T00:15:40.009000Z
4,bdf4fc889476008ea8c644640f306bbc,V07 - Bicentenario,-33.401,-70.602,0.0,8,8,True,2024-05-03T00:15:40.018000Z


## Foursquare

### Send a request to Foursquare for each bike station in your city of choice 

In [4]:
def foursquare_search(
    latitude: float,
    longitude: float,
    radius: int,
    limit: int,
    api_key: str = foursquare_api_key
):
    """
    Return a get response from Foursquare's Place Search.

    Make a get request to Foursquare's Place Search using
    latitude, longitude, and radius. Accept a JSON response.

    :param api_key: Foursquare API key
    :type api_key: str

    :param latitude: latitude of a place
    :type latitude: float

    :param longitude: longitude of a place
    :type longitude: float

    :param radius: metres around a place considered nearby 
    :type radius: int

    :param limit: max number of results to include in response
    :type limit: int

    :return: a Place Search response from Foursquare
    :rtype: requests.Response | None    
    """
    url = 'https://api.foursquare.com/v3/places/search'

    # ll = f'{latitude},{longitude}'
    params = {
        'll': f'{latitude},{longitude}',
        'radius': radius,
        'limit': limit
    }

    headers = {
        'accept': 'application/json',
        'authorization': foursquare_api_key
    }

    return get(url, params, headers)

In [5]:
def foursquare_results(
    stations_df: pd.DataFrame,
    row: int,
    radius: int = 500,
    limit: int = 10,
) -> list[dict] | None:
    """
    Return the results list from a Foursquare Place Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Foursquare get request fails or if the request 
    succeeds, but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = foursquare_search(lat, long, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['results']
    except KeyError as err:
        print(f'response does not contain results, {err}')
        return None
    else:
        return results

In [6]:
def result_to_place(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of Foursqaure place information.

    Given a particular result, *i.e.,* a place, extract the
    information that will be added to the foursquare_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'fsq_id': result.get('fsq_id'),
        'name': result.get('name'),
        'closed_bucket': result.get('closed_bucket'),
        'distance': result.get('distance'),
        'station_id': stations_df.loc[row, 'station_id']
    }

In [7]:
def result_to_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    cat_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'fsq_id': result.get('fsq_id'),
            'category_id': category.get('id'),
            'category_name': category.get('name')
        }
        cat_list.append(new_category)
    return cat_list

Initialize two lists intended to store dictionaries related to Foursquare places and categories.

In [8]:
foursquare_places = []
foursquare_categories = []

Loop through the rows of stations_df, parsing Foursquare Place Searches and extending foursquare_places and foursquare_categories on each iteration.

In [9]:
print(f'foursquare_places empty: {len(foursquare_places) == 0}')
print(f'foursquare_categories empty: {len(foursquare_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

for row in range(num_stations):
    results = foursquare_results(stations_df, row, radius=100, limit=2)
    for result in results:
        new_places = result_to_place(stations_df, result, row)
        new_categories = result_to_categories(result, row)
        foursquare_categories += new_categories
        foursquare_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

foursquare_places empty: True
foursquare_categories empty: True
running...
...done after 113 s


### Put the results into DataFrames

In [10]:
foursquare_places_df = pd.DataFrame(foursquare_places)
foursquare_places_df.head()

Unnamed: 0,fsq_id,name,closed_bucket,distance,station_id
0,4b574bbef964a520dc2f28e3,Mestizo,VeryLikelyOpen,33,3983dd515589a80338dd44a28f5ec414
1,4e911fa3be7b7962e9d1e3e4,Papito's,Unsure,31,3983dd515589a80338dd44a28f5ec414
2,516c733ee4b01f5270ed60b3,La Vinoteca,VeryLikelyOpen,52,405a3a5ca08c7536d3eb286cf8553025
3,58fd1218603d2a48552f0c84,Brunapoli,VeryLikelyOpen,50,405a3a5ca08c7536d3eb286cf8553025
4,4dc723c1e4cd169dc6772033,Teatro Mori Vitacura,Unsure,75,0e8dfc3f137cb3911bd32f5fd45e0f93


In [13]:
foursquare_places_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 372 entries, 0 to 371
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         372 non-null    object
 1   name           372 non-null    object
 2   closed_bucket  372 non-null    object
 3   distance       372 non-null    int64 
 4   station_id     372 non-null    object
dtypes: int64(1), object(4)
memory usage: 14.7+ KB


In [11]:
foursquare_categories_df = pd.DataFrame(foursquare_categories)
foursquare_categories_df.head()

Unnamed: 0,fsq_id,category_id,category_name
0,4b574bbef964a520dc2f28e3,13343,South American Restaurant
1,4b574bbef964a520dc2f28e3,13345,Spanish Restaurant
2,4e911fa3be7b7962e9d1e3e4,13007,Beer Garden
3,516c733ee4b01f5270ed60b3,17069,Grocery Store
4,516c733ee4b01f5270ed60b3,17076,Liquor Store


In [12]:
foursquare_categories_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 446 entries, 0 to 445
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   fsq_id         446 non-null    object
 1   category_id    446 non-null    int64 
 2   category_name  446 non-null    object
dtypes: int64(1), object(2)
memory usage: 10.6+ KB


### Export the DataFrames to CSV

In [None]:
dirname = '../data/'

basename = 'foursquare_places.csv'
filename = dirname + basename
foursquare_places_df.to_csv(filename, sep=',', index=False)

basename = 'foursquare_categories.csv'
filename = dirname + basename
foursquare_categories_df.to_csv(filename, sep=',', index=False)

In [14]:
export(foursquare_places_df, 'foursquare_places.csv')
export(foursquare_categor)

## Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
def yelp_business_search(
    latitude: float,
    longitude: float,
    # categories: str,
    radius: int,
    limit: int
) -> requests.Response | None:
    """
    Return a Yelp business search response.
    """
    url = 'https://api.yelp.com/v3/businesses/search'

    params = {
        'latitude': latitude,
        'longitude': longitude,
        # 'categories': categories,
        'radius': radius,
        'limit': limit
    }

    headers = {
        'accept': 'application/json',
        'authorization': f'Bearer {yelp_api_key}' 
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
    except requests.HTTPError as err:
        print(f'get request failed, {err}')
        return None
    else:
        return response

In [None]:
def row_to_businesses(
    stations_df: pd.DataFrame, 
    row: int,
    radius: int = 1000,
    limit: int = 50,
) -> list[dict] | None:
    """
    Return the results list from a Yelp Businesses Search.

    Given a particular row, *i.e.,* a bike station, from the 
    stations_df DataFrame previously assembled, return a list 
    of places nearby the bike station in question. Return None
    if the Yelp get request fails or if the request 
    succeeds but the corresponding response does not contain
    results.

    :param stations_df: bike stations data
    :type stations_df: DataFrame

    :param row: the number of a row in stations_df
    :type row: int

    :return: the results list from a Foursqaure Place Search
    :rtype: list[dict]
    """
    station_id, lat, long =\
        stations_df.loc[row, ['station_id', 'latitude', 'longitude']].to_list()
    
    response = yelp_business_search(lat, long, radius, limit)

    if response is None:
        return None

    data = response.json()

    try:
        results = data['businesses']
    except KeyError as err:
        print(f'response does not contain results, {err}')
        return None
    else:
        return results

In [None]:
def result_to_businesses(
    stations_df: pd.DataFrame,
    result: dict, 
    row: int
) -> dict:
    """
    Return a dictionary of place information.

    Given a particular result, *i.e.,* a place, extract the
    information that will be added to the foursquare_places_df
    DataFrame.

    :param stations_df: bike stations data
    :type stations_df: DataFrame
    
    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a dictionary to be added as a row to a DataFrame
    :rtype: dict
    """
    return {
        'yelp_id': result['id'],
        'name': result['name'],
        'is_closed': result['is_closed'],
        'review_count': result['review_count'],
        'price': result.get('price'),
        'rating': result['rating'],
        'distance': result['distance'],
        'station_id': stations_df.loc[row, 'station_id']
    }

In [None]:
def result_to_bus_categories(result: dict, row: int) -> list[dict]:
    """
    Return a dictionary of place category information.

    Given a particular result, *i.e.,* a place, extract all of
    its categories as a list that will be added to the 
    foursquare_categories_df DataFrame.

    :param: a single result from a Foursquare Place Search
    :type: dict

    :param row: the number of a row in stations_df
    :type row: int

    :return: a list of dictionaries to be added as rows to a DataFrame
    :rtype: dict
    """
    return_list = []
    categories = result['categories']
    for category in categories:
        new_category = {
            'yelp_id': result['id'],
            'category_name': category['title']
        }
        return_list.append(new_category)
    return return_list

In [None]:
results = row_to_businesses(stations_df, 0)
results[0]

In [None]:
result_to_businesses(stations_df, results[1], 1)

In [None]:
result_to_bus_categories(results[1], 1)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
yelp_places = []
yelp_categories = []

In [None]:
print(f'yelp_places empty: {len(yelp_places) == 0}')
print(f'yelp_categories empty: {len(yelp_categories) == 0}')
print('running...')
num_stations = stations_df.shape[0]
start = datetime.now()

num_stations = 3

for row in range(num_stations):
    results = row_to_businesses(stations_df, row)
    for result in results:
        new_places = result_to_businesses(stations_df, result, row)
        new_categories = result_to_bus_categories(result, row)
        yelp_categories += new_categories
        yelp_places.append(new_places)

end = datetime.now()
runtime = end - start
print(f'...done after {runtime.seconds} s')

In [None]:
yelp_places

In [None]:
yelp_categories

Put your parsed results into a DataFrame

In [None]:
yelp_places_df = pd.DataFrame(yelp_places)
yelp_places_df.head()

In [None]:
yelp_categories_df = pd.DataFrame(yelp_categories)
yelp_categories_df.head()

## Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating