# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [1]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os
from typing import List, Dict, Optional
from api_utils import get_city_bikes_data # imports get_city_bikes_data function


load_dotenv()
FOURSQUARE_API_KEY = os.getenv('FOURSQUARE_KEY')

def get_foursquare_data(
    lat: float, 
    lon: float, 
    radius: int = 1000, 
    categories: List[str] = ['13065', '13032', '11044']
) -> pd.DataFrame:
    """
    Fetch Foursquare POI data for a given location.
    
    Args:
        lat: Location latitude
        lon: Location longitude
        radius: Search radius in meters
        categories: Category IDs to filter results
                   13065: Restaurant
                   13032: Bar
                   11044: Point of Interest
    
    Returns:
        DataFrame with POI details
    """
    if not FOURSQUARE_API_KEY:
        raise ValueError("Foursquare API key not found in environment variables")

    search_url = 'https://api.foursquare.com/v3/places/search'
    headers = {
        "Authorization": FOURSQUARE_API_KEY,
        "Accept": "application/json"
    }
    params = {
        'll': f'{lat},{lon}',
        'radius': radius,
        'categories': ','.join(categories),
        'limit': 50,
        'sort': 'DISTANCE'
    }

    try:
        response = requests.get(search_url, params=params, headers=headers)
        response.raise_for_status()
        
        foursquare_pois = response.json().get('results', [])
        
        if not foursquare_pois:
            return pd.DataFrame()

        foursquare_results = [{
            'POI_name': poi.get('name'),
            'address': poi.get('location', {}).get('formatted_address', 'N/A'),
            'latitude': poi.get('geocodes', {}).get('main', {}).get('latitude'),
            'longitude': poi.get('geocodes', {}).get('main', {}).get('longitude'),
            'rating': poi.get('rating', 'N/A'),
            'distance_meters': poi.get('distance'),
            'category_id': poi.get('categories', [{}])[0].get('id'),
            'category_name': poi.get('categories', [{}])[0].get('name', 'N/A'),
            'total_ratings': poi.get('stats', {}).get('total_ratings', 0)
        } for poi in foursquare_pois]

        return pd.DataFrame(foursquare_results)

    except requests.exceptions.RequestException as e:
        raise Exception(f"Foursquare API request failed: {str(e)}")
    except Exception as e:
        raise Exception(f"Error processing Foursquare data: {str(e)}")

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:

"""
Fetches city bike data for a given city and retrieves Foursquare data for the first bike station.
"""
city_name = 'hamilton'
stations_df = get_city_bikes_data(city_name)

# Direct access to latitude and longitude for the first bike station
latitude = stations_df['latitude'][0]
longitude = stations_df['longitude'][0]

print(f"Fetching Foursquare data for coordinates: {latitude}, {longitude}")
foursquare_df = get_foursquare_data(latitude, longitude)

Fetching Foursquare data for coordinates: 43.26785864558976, -79.8679232597351


Put your parsed results into a DataFrame

In [3]:

# Display the retrieved POI data
foursquare_df.head()

Unnamed: 0,POI_name,address,latitude,longitude,rating,distance_meters,category_id,category_name,total_ratings
0,Hutch's Harbour Front,"325 Bay St N, Hamilton ON L8L 1M5",43.267592,-79.868115,,33,13051,Fish and Chips Shop,0
1,Thai Asian Bowl,"81 Stuart St, Hamilton ON L8L 1B5",43.266504,-79.868441,,156,13352,Thai Restaurant,0
2,Neit Bar,"337 James St N (Murray), Hamilton ON L8L 1H3",43.265031,-79.865781,,358,13065,Restaurant,0
3,This Ain't Hollywood,"345 James St N (at Murray), Hamilton ON L8L 1H3",43.264986,-79.865634,,369,10000,Arts and Entertainment,0
4,Route 66,"468 James St N, Hamilton ON L8L 1J1",43.268901,-79.863467,,378,13065,Restaurant,0


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [4]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os
from api_utils import get_city_bikes_data # imports get_city_bikes_data function

# Load environment variables
load_dotenv()
YELP_API_KEY = os.getenv('YELP_API')

def get_yelp_data(lat, lon, radius=1000, terms=['restaurants', 'libraries', 'shopping']):
    """
    Fetch Yelp POI data for given latitude and longitude, including distance in meters.
    
    Parameters:
    - lat (float): Latitude of the location.
    - lon (float): Longitude of the location.
    - radius (int): Search radius in meters (default 1000).
    - terms (list): List of search terms to query.
    
    Returns:
    - pd.DataFrame: DataFrame containing Yelp POI information.
    """
    api_key = YELP_API_KEY
    api_url = "https://api.yelp.com/v3/businesses/search"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    all_yelp_results = []

    for term in terms:
        params = {
            'term': term,
            'latitude': lat,
            'longitude': lon,
            'radius': radius,
            'limit': 50
        }

        print(f"Making request to Yelp for term '{term}'...")
        response = requests.get(api_url, headers=headers, params=params)

        if response.status_code == 200:
            try:
                yelp_pois = response.json().get('businesses', [])
                for poi in yelp_pois:
                    all_yelp_results.append({
                        'name': poi.get('name'),
                        'address': poi.get('location', {}).get('address1', 'N/A'),
                        'latitude': poi.get('coordinates', {}).get('latitude'),
                        'longitude': poi.get('coordinates', {}).get('longitude'),
                        'rating': poi.get('rating', 'N/A'),
                        'review_count': poi.get('review_count', 'N/A'),
                        'distance_meters': poi.get('distance'),  # Distance in meters
                        'POI_Type': term  # Include the term/category as a field
                    })
            except Exception as e:
                print(f"Error parsing Yelp response for term '{term}':", e)
        else:
            print(f"Error fetching data from Yelp for term '{term}': {response.status_code}")
            print(response.text)

    yelp_df = pd.DataFrame(all_yelp_results)
    return yelp_df



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [5]:
# Example usage
city_name = 'hamilton'
stations_df = get_city_bikes_data(city_name)

# Direct access to latitude and longitude for the first bike station
latitude = stations_df['latitude'][0]
longitude = stations_df['longitude'][0]              

yelp_df = get_yelp_data(latitude, longitude)

print("Yelp DataFrame:")


Making request to Yelp for term 'restaurants'...
Making request to Yelp for term 'libraries'...
Making request to Yelp for term 'shopping'...
Yelp DataFrame:


Put your parsed results into a DataFrame

In [6]:
yelp_df.head()

Unnamed: 0,name,address,latitude,longitude,rating,review_count,distance_meters,POI_Type
0,Berkeley North,31 King William Street,43.257405,-79.867715,4.3,42,1162.56421,restaurants
1,Charred Chicken,244 James Street N,43.262736,-79.866384,4.2,121,583.045938,restaurants
2,Mesa,255 James Street N,43.26306,-79.86645,4.0,57,545.679223,restaurants
3,El Grito Mexicano,236 James Street N,43.2626,-79.866237,5.0,1,600.437633,restaurants
4,Born and Raised,224 James Street N,43.26235,-79.86626,3.8,27,618.142975,restaurants


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [176]:
def collect_multi_location_data(stations_df: pd.DataFrame, num_locations: int = 5) -> tuple:
    """
    Collects data from both APIs for multiple bike station locations.
    
    Args:
        stations_df: DataFrame containing bike stations
        num_locations: Number of locations to sample
    
    Returns:
        tuple: (all_foursquare_data, all_yelp_data)
    """
    # Sample stations if there are more than requested
    if len(stations_df) > num_locations:
        sampled_stations = stations_df.sample(n=num_locations, random_state=42)
    else:
        sampled_stations = stations_df
        print(f"Warning: Only {len(stations_df)} stations available")
    
    all_foursquare_data = []
    all_yelp_data = []
    
    for idx, station in sampled_stations.iterrows():
        lat, lon = station['latitude'], station['longitude']
        print(f"\nFetching data for station: {station['name']}")
        print(f"Coordinates: {lat}, {lon}")
        
        try:
            # Get Foursquare data
            foursquare_df = get_foursquare_data(lat, lon)
            foursquare_df['station_id'] = station['id']
            foursquare_df['station_name'] = station['name']
            all_foursquare_data.append(foursquare_df)
            
            # Get Yelp data
            yelp_df = get_yelp_data(lat, lon)
            yelp_df['station_id'] = station['id']
            yelp_df['station_name'] = station['name']
            all_yelp_data.append(yelp_df)
            
        except Exception as e:
            print(f"Error collecting data for station {station['name']}: {str(e)}")
            continue
    
    # Combine all data
    combined_foursquare = pd.concat(all_foursquare_data, ignore_index=True) if all_foursquare_data else pd.DataFrame()
    combined_yelp = pd.concat(all_yelp_data, ignore_index=True) if all_yelp_data else pd.DataFrame()
    
    return combined_foursquare, combined_yelp
def compare_api_quality(foursquare_df: pd.DataFrame, yelp_df: pd.DataFrame) -> str:
    """
    Enhanced comparison of Foursquare and Yelp API results.
    """
    # Get unique stations
    foursquare_stations = foursquare_df['station_name'].unique()
    yelp_stations = yelp_df['station_name'].unique()
    
    comparison_text = f"""
API Comparison Results (across {len(foursquare_stations)} locations):

Coverage Metrics:
----------------"""
    
    # Overall statistics
    total_foursquare = len(foursquare_df)
    total_yelp = len(yelp_df)
    
    # Per-station metrics
    station_metrics = []
    
    for station in foursquare_stations:
        f_data = foursquare_df[foursquare_df['station_name'] == station]
        y_data = yelp_df[yelp_df['station_name'] == station]
        
        metrics = {
                    'station': station,
                    'foursquare_count': len(f_data),
                    'yelp_count': len(y_data),
                    'foursquare_rated': f_data['rating'].replace('N/A', pd.NA).notna().sum(),
                    'yelp_rated': y_data['rating'].replace('N/A', pd.NA).notna().sum(),
                    'foursquare_avg_rating': f_data['rating']
                        .replace('N/A', pd.NA)
                        .dropna()
                        .astype(float)
                        .mean(),
                    'yelp_avg_rating': y_data['rating']
                        .replace('N/A', pd.NA)
                        .dropna()
                        .astype(float)
                        .mean()
}

        station_metrics.append(metrics)
    
    # Add per-station results
    comparison_text += f"""

Total POIs found:
- Foursquare: {total_foursquare} ({total_foursquare/len(foursquare_stations):.1f} per station)
- Yelp: {total_yelp} ({total_yelp/len(yelp_stations):.1f} per station)

Per-Station Breakdown:
--------------------"""
    
    for metrics in station_metrics:
        comparison_text += f"""
Station: {metrics['station']}
- Foursquare POIs: {metrics['foursquare_count']} (rated: {metrics['foursquare_rated']})
- Yelp POIs: {metrics['yelp_count']} (rated: {metrics['yelp_rated']})
- Avg Ratings: Foursquare={metrics['foursquare_avg_rating']:.2f}, Yelp={metrics['yelp_avg_rating']:.2f}
"""
    
    # Overall conclusion
    comparison_text += "\nConclusions:\n-----------\n"
    if total_foursquare > total_yelp:
        comparison_text += "- Foursquare provides more POIs overall\n"
    else:
        comparison_text += "- Yelp provides more POIs overall\n"
    
    foursquare_rated = foursquare_df['rating'].replace('N/A', pd.NA).notna().sum()
    yelp_rated = yelp_df['rating'].replace('N/A', pd.NA).notna().sum()
    
    if foursquare_rated/total_foursquare > yelp_rated/total_yelp:
        comparison_text += "- Foursquare has a higher percentage of rated POIs\n"
    else:
        comparison_text += "- Yelp has a higher percentage of rated POIs\n"
    
    return comparison_text



In [177]:


# Usage example:
try:
    # Get bike stations
    stations_df = get_city_bikes_data(city='hamilton')
    
    # Collect data for multiple locations
    foursquare_data, yelp_data = collect_multi_location_data(stations_df, num_locations=5)
    
    # Compare the results
    comparison_results = compare_api_quality(foursquare_data, yelp_data)
    print(comparison_results)
    
except Exception as e:
    print(f"Error in API comparison: {str(e)}")


Fetching data for station: Pearl Street Bridge
Coordinates: 43.25650634283747, -79.88345324993132
Making request to Yelp for term 'restaurants'...
Making request to Yelp for term 'libraries'...
Making request to Yelp for term 'shopping'...

Fetching data for station: Hunter at Ferguson
Coordinates: 43.251798383773966, -79.86318781971931
Making request to Yelp for term 'restaurants'...
Making request to Yelp for term 'libraries'...
Making request to Yelp for term 'shopping'...

Fetching data for station: James at Colbourne
Coordinates: 43.2637774120007, -79.8660421593136
Making request to Yelp for term 'restaurants'...
Making request to Yelp for term 'libraries'...
Making request to Yelp for term 'shopping'...

Fetching data for station: MAC Rack Go Terminal
Coordinates: 43.26177410041038, -79.92258936166763
Making request to Yelp for term 'restaurants'...
Making request to Yelp for term 'libraries'...
Making request to Yelp for term 'shopping'...

Fetching data for station: MAC Rack M

Get the top 10 restaurants according to their rating

In [None]:
def get_top_restaurants(yelp_df):
    """
    Retrieves the top 10 restaurants from both API results based on rating.

    Args:
       yelp_df (pandas.DataFrame): Dataframe of yelp data

    Returns:
        dict: A dictionary containing the top 10 restaurants from each api, according to their rating
    """
    yelp_top_10 = yelp_df.dropna(subset=['rating'])
    yelp_top_10 = yelp_top_10.sort_values('rating', ascending=False).head(10)
    top_restaurants = yelp_top_10[['name', 'address', 'rating', 'POI_Type']]

    return {"yelp_top_10 ":  top_restaurants}

In [186]:
print(get_top_restaurants(yelp_df))

{'yelp_top_10\n ':                            name                  address  rating     POI_Type
0             El Grito Mexicano       236 James Street N     5.0  restaurants
65                Rock 'N' Tees        199 King Street E     5.0     shopping
40       Mehfill Indian Cuisine  135 King William Street     5.0  restaurants
21                        Tomah       132 Queen Street S     5.0  restaurants
47   Hammerheads Fresh On Locke       140 Locke Street S     5.0  restaurants
19                  Boom Scorch        187 King Street E     5.0  restaurants
100       The Framing Warehouse         98 John Street N     5.0     shopping
17                 Mystic Ramen   51 King William Street     5.0  restaurants
58            The Pale Blue Dot         240 James Street     5.0     shopping
15                      Tacomex        162 King Street W     5.0  restaurants}
