In [1]:
# imports
import pandas as pd
import os # use this to access your environment variables
import requests # this will be used to call the APIs
from IPython.display import JSON #for nice JSON (or dict) display in JupyterLab (not currently supported in Jupyter Notebook)
from pprint import pprint # you will most likely use this instead of the JSON printing function

In [None]:
FOURSQUARE_KEY = os.getenv('FOURSQUARE_API_KEY')
YELP_API_KEY = os.getenv('YELP_API_KEY')

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
# Read in the milan csv
df_milan = pd.read_csv('/Users/blairjdaniel/lighthouse/lighthouse/statistical_project/Statistical-Modelling-Project/notebooks/milan.csv')
df_milan


Unnamed: 0.1,Unnamed: 0,name,longitude,latitude,timestamp,free_bikes,empty_slots,total_slots
0,0,368 - Tolstoi - Lorenteggio,9.149430,45.453710,2025-02-06T20:49:44.915500Z,20,6,30
1,1,25 - Centrale 1,9.202572,45.485456,2025-02-06T20:49:44.945428Z,8,13,24
2,2,161 - Coni Zugna - Solari,9.168010,45.457079,2025-02-06T20:49:44.919206Z,25,0,36
3,3,16 - Moscova,9.184560,45.477534,2025-02-06T20:49:44.945565Z,14,7,24
4,4,359 - Tertulliano - Caroncini,9.218048,45.449209,2025-02-06T20:49:44.915729Z,11,15,27
...,...,...,...,...,...,...,...,...
317,317,185 - Santissima Trinità,9.176445,45.478022,2025-02-06T20:49:44.919397Z,24,7,36
318,318,64 - Diaz,9.189676,45.462064,2025-02-06T20:49:44.944672Z,10,24,39
319,319,173 - Vercelli - Cherubini,9.159315,45.466847,2025-02-06T20:49:44.918566Z,25,6,36
320,320,98 - San Marco,9.189180,45.478693,2025-02-06T20:49:44.944535Z,10,12,24


In [65]:
# Define the get_stations function for Foursquare
def get_stations(latitude, longitude, radius=1000, limit=50, api_key=FOURSQUARE_KEY, categories=None, fields=None):
    """
    Get venues from Foursquare within a specified radius

    Args:
        latitude and longitude (float): Latitude and Longitude for query
        radius (int): given venues within radius
        limit: limit to 1 request for testing
        offset: offset for pagination
        api_key: for access to the API
        categories: types of places I want to filter for from the Foursquare API
        fields: specific fields to include in the response

    Returns:
        dict: Json response from the Foursquare API
    """
    # Define the headers for the request
    headers = {
        'Accept': 'application/json',
        'Authorization': f"{api_key}"
    }

    # Define the endpoint and parameters for the request
    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        'll': f'{latitude},{longitude}',
        'radius': radius,
        'limit': limit,
        
    }

    # Add categories to the parameters if provided
    if categories:
        params['categories'] = categories

    # Add fields to the parameters if provided
    if fields:
        params['fields'] = fields

    # Send the request to the Foursquare API
    response = requests.get(url, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        response.raise_for_status()


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [79]:
# Define the categories variable for Foursquare with valid category IDs
categories = '4bf58dd8d48988d16d941735,4bf58dd8d48988d1fe931735,63be6904847c3692a84b9bec'

# Define the fields to include in the response
fields = 'name,location,geocodes,categories,rating'

# Initialize an empty list to store the station data
all_stations = []

# Loop through each row of the DataFrame
for index, row in df_milan.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    name = row['name']
    stations_fs = get_stations(latitude, longitude, categories=categories, fields=fields)
    
    # Flatten and extract relevant fields from the JSON response
    for venue in stations_fs.get('results', []):
        station_info = {
            'name': venue.get('name'),
            'latitude': venue['geocodes']['main']['latitude'],
            'longitude': venue['geocodes']['main']['longitude'],
            'address': venue['location'].get('address', ''),
            'category': ', '.join([category['name'] for category in venue.get('categories', [])]),
            'rating': venue.get('rating', None)
        
            
        }
        all_stations.append(station_info)
    




In [None]:
len(stations_fs['results'])

Put your parsed results into a DataFrame

In [109]:
# Create a DataFrame from the flattened list of station data
stations_df = pd.DataFrame(all_stations)

# Drop duplicates based on the 'name' column
stations_df = stations_df.drop_duplicates(subset='name').reset_index()

# Count the number of items in the 'category' column
stations_df['category_count'] = stations_df['category'].apply(lambda x: len(x.split(',')))
stations_df

# Display the resulting DataFrame but sorted by rati
#stations_df.sort_values(by=['rating'], ascending=False).head(50)

Unnamed: 0,index,name,latitude,longitude,address,category,rating,category_count
0,0,Blanco Caffè,45.450255,9.151228,Via Leone Tolstoj 9,"Bar, Café, Sandwich Spot",7.7,3
1,1,Lieti Calici,45.456005,9.157317,Via Stendhal 57,"Bar, Café, Restaurant",8.0,3
2,2,Il Caffe Ambrosiano,45.461335,9.155175,Via washington 51,"Bar, Café, Restaurant",6.7,3
3,3,Original Marines,45.454318,9.150712,Via Lorenteggio 25,"Children's Clothing Store, Clothing Store",,2
4,4,Insolite Follie,45.456182,9.152393,Via Tolstoi Leone 45,Clothing Store,,1
...,...,...,...,...,...,...,...,...
1260,11695,MC Kenzy - Milano Romana,45.458234,9.191991,Corso di Porta Romana 46,Clothing Store,,1
1261,11738,Passi Italiani,45.480447,9.210632,Piazza Lima 1,Shoe Store,,1
1262,11755,Miss Marilyn Milano,45.477924,9.208932,Corso Buenos aires 24,Fashion Retail,,1
1263,11784,Anna Ceruti Per Sposa Bella,45.445258,9.213100,Corso Lodi 52,Bridal Store,,1


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [35]:
# Read in the milan csv
df_milan = pd.read_csv('/Users/blairjdaniel/lighthouse/lighthouse/statistical_project/Statistical-Modelling-Project/notebooks/milan.csv')

# Define the get_stations function for Yelp
def get_yelp_stations(latitude, longitude, radius=1000, limit=20, api_key=YELP_API_KEY, categories=None):
    """
    Get venues from Yelp within a specified radius

    Args:
        latitude and longitude (float): Latitude and Longitude for query
        radius (int): given venues within radius
        limit: limit to 1 request for testing
        api_key: for access to the API
        categories: types of places I want to filter for from the Yelp API

    Returns:
        dict: Json response from the Yelp API
    """
    # Define the headers for the request
    headers = {
        'Authorization': f'Bearer {api_key}',
    }

    # Define the endpoint and parameters for the request
    url = 'https://api.yelp.com/v3/businesses/search'
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': radius,
        'limit': limit
    }

    # Add categories to the parameters if provided
    if categories:
        params['categories'] = categories

    # Send the request to the Yelp API
    response = requests.get(url, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        response.raise_for_status()




Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [92]:
# Categories list
category = ['fashion', 'coffee', 'buses']
# Initialize an empty list to store the station data
all_stations_yelp = []

# Loop through just one row of the DataFrame
for index, row in df_milan.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    name = row['name']
    stations_yelp_one = get_yelp_stations(latitude, longitude, categories=category)
    
    # Extract relevant information from the JSON response
    for business in stations_yelp_one.get('businesses', []):
        station_info_yelp = {
            'name': business.get('name'),
            'latitude': business['coordinates']['latitude'],
            'longitude': business['coordinates']['longitude'],
            'address': ', '.join(business['location']['display_address']),
            'category': ', '.join([cat['title'] for cat in business.get('categories', [])]),
            'rating': business.get('rating')
        }
        all_stations_yelp.append(station_info_yelp)

# Create a DataFrame from the list of station data
stations_df_yelp = pd.DataFrame(all_stations_yelp)
stations_df_yelp.sort_values(by=['rating'], ascending=False)


Unnamed: 0,name,latitude,longitude,address,category,rating
3208,Taste of England,45.460140,9.190320,"Piazza Velasca 5, 20122 Milan, Italy","Specialty Food, Coffee & Tea",5.0
2653,Sakurasan,45.455770,9.205990,"viale Lazio, 6, 20135 Milan, Italy","Arts & Crafts, Accessories, Women's Clothing",5.0
2730,Naturino,45.463860,9.197760,"via durini, 4, 20122 Milan, Italy",Shoe Stores,5.0
2725,Gucci,45.467962,9.195424,"Via Monte Napoleone 5/7, 20121 Milan, Italy","Children's Clothing, Men's Clothing, Women's C...",5.0
2710,Wait and See,45.461883,9.183036,"Via Santa Marta 14, 20123 Milan, Italy",Women's Clothing,5.0
...,...,...,...,...,...,...
2229,Lucchino Alessandro Boutique,45.471890,9.232120,"Via Folranini, 19, 20133 Milan, Italy",Fashion,0.0
3878,Wild West,45.503622,9.179436,"Via Legnone 81, 20158 Milan, Italy","Men's Clothing, Women's Clothing",0.0
3879,La Cialdina,45.504913,9.175958,"Via Giuseppe Tartini 12, 20158 Milan, Italy",Coffee & Tea,0.0
2223,Salvagente Bimbi,45.473340,9.231480,"Via Strambio Gaetano, 6, 20100 Milan, Italy",Children's Clothing,0.0


In [103]:
stations_df_yelp

Unnamed: 0,name,latitude,longitude,address,category,rating
0,Il Gigante,45.454650,9.151550,"Via Lorenteggio 3, 20147 Milan, Italy",Department Stores,3.5
1,Original Marines,45.454637,9.150784,"Vie Lorenteggio 25, 20146 Milan, Italy","Children's Clothing, Men's Clothing, Women's C...",4.0
2,Babet,45.452600,9.149960,"Via Leone Tolstoi 45, 20146 Milan, Italy","Used, Vintage & Consignment",0.0
3,OVS,45.452881,9.144893,"Via Redaelli 2/a, 20146 Milan, Italy",Department Stores,3.5
4,Tresor,45.454658,9.142772,"20146 Milan, Italy",Women's Clothing,5.0
...,...,...,...,...,...,...
6411,Deus Ex Machina,45.490897,9.189025,"Via Thaon de Revel 3, 20159 Milan, Italy",Sports Wear,4.0
6412,Dammann,45.480845,9.188004,"Viale Monte Grappa 11, 20121 Milan, Italy",Coffee & Tea,4.5
6413,Gucci,45.467962,9.195424,"Via Monte Napoleone 5/7, 20121 Milan, Italy","Children's Clothing, Men's Clothing, Women's C...",5.0
6414,Agatha Ruiz de La Prada,45.482500,9.183550,"Via Maroncelli 5, 20154 Milan, Italy","Fashion, Luggage, Baby Gear & Furniture",3.0


In [106]:
# Count the number of items in the 'category' column
stations_df_yelp['category_count'] = stations_df_yelp['category'].apply(lambda x: len(x.split(',')))
stations_df_yelp

Unnamed: 0,name,latitude,longitude,address,category,rating,category_count
0,Il Gigante,45.454650,9.151550,"Via Lorenteggio 3, 20147 Milan, Italy",Department Stores,3.5,1
1,Original Marines,45.454637,9.150784,"Vie Lorenteggio 25, 20146 Milan, Italy","Children's Clothing, Men's Clothing, Women's C...",4.0,3
2,Babet,45.452600,9.149960,"Via Leone Tolstoi 45, 20146 Milan, Italy","Used, Vintage & Consignment",0.0,2
3,OVS,45.452881,9.144893,"Via Redaelli 2/a, 20146 Milan, Italy",Department Stores,3.5,1
4,Tresor,45.454658,9.142772,"20146 Milan, Italy",Women's Clothing,5.0,1
...,...,...,...,...,...,...,...
6411,Deus Ex Machina,45.490897,9.189025,"Via Thaon de Revel 3, 20159 Milan, Italy",Sports Wear,4.0,1
6412,Dammann,45.480845,9.188004,"Viale Monte Grappa 11, 20121 Milan, Italy",Coffee & Tea,4.5,1
6413,Gucci,45.467962,9.195424,"Via Monte Napoleone 5/7, 20121 Milan, Italy","Children's Clothing, Men's Clothing, Women's C...",5.0,3
6414,Agatha Ruiz de La Prada,45.482500,9.183550,"Via Maroncelli 5, 20154 Milan, Italy","Fashion, Luggage, Baby Gear & Furniture",3.0,3


In [110]:
stations_df_yelp.to_csv('yelp.csv')
stations_df.to_csv('foursquare.csv')

Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

I'd say the Foursquare had the most complete data but the Yelp key was free to use and way easier to access. I really struggled with the Foursquare API and trying to access the fsq using fields. My data was not clean, so I eliminated that step. 
I found it way easier to get the ratings from the Yelp API.

Get the top 10 restaurants according to their rating

In [95]:
# Filter the DataFrame to include only restaurants and sort by rating
restaurants_df = stations_df_yelp[stations_df_yelp['category'].str.contains('Coffee & Tea', case=False, na=False)]
top_10_restaurants = restaurants_df.sort_values(by='rating', ascending=False).head(10)

top_10_restaurants

Unnamed: 0,name,latitude,longitude,address,category,rating
5,Sbarbaro e Visigalli,45.45183,9.15285,"Via Giambellino 5, 20146 Milan, Italy","Coffee & Tea, Candy Stores",5.0
4768,Taipy,45.47269,9.159056,"Via Tiziano 21, 20145 Milan, Italy","Bistros, Coffee & Tea, Bars",5.0
3107,Taipy,45.47269,9.159056,"Via Tiziano 21, 20145 Milan, Italy","Bistros, Coffee & Tea, Bars",5.0
1086,Sbarbaro e Visigalli,45.45183,9.15285,"Via Giambellino 5, 20146 Milan, Italy","Coffee & Tea, Candy Stores",5.0
3182,Ai Sapori Del Sud,45.488934,9.193025,"Via Pola 15, 20124 Milan, Italy","Bakeries, Desserts, Coffee & Tea",5.0
3208,Taste of England,45.46014,9.19032,"Piazza Velasca 5, 20122 Milan, Italy","Specialty Food, Coffee & Tea",5.0
3435,Latteria e Caffetteria Nilo,45.52579,9.20857,"Viale Suzzani 275, 20162 Milan, Italy","Bars, Coffee & Tea",5.0
3647,Ai Sapori Del Sud,45.488934,9.193025,"Via Pola 15, 20124 Milan, Italy","Bakeries, Desserts, Coffee & Tea",5.0
940,Sbarbaro e Visigalli,45.45183,9.15285,"Via Giambellino 5, 20146 Milan, Italy","Coffee & Tea, Candy Stores",5.0
929,Taipy,45.47269,9.159056,"Via Tiziano 21, 20145 Milan, Italy","Bistros, Coffee & Tea, Bars",5.0


In [98]:
# Filter the DataFrame to include only restaurants and sort by rating
restaurants_df_fs = stations_df[stations_df['category'].str.contains('Café', case=False, na=False)]
top_10_restaurants_fs = restaurants_df_fs.sort_values(by='rating', ascending=False).head(10)

top_10_restaurants_fs

Unnamed: 0,index,name,latitude,longitude,address,category,rating
627,1558,Orsonero Coffee,45.477871,9.211206,Via Broggi 15,"Café, Coffee Shop, Restaurant",9.4
190,201,Pasticceria Marchesi,45.46823,9.194959,Via Monte Napoleone 9,"Bakery, Café, Pastry Shop",9.0
196,208,Chocolat,45.467684,9.174152,Via Giovanni Boccaccio 9,"Bar, Café, Ice Cream Parlor",9.0
68,69,Mag Cafè,45.451214,9.173414,Ripa di Porta Ticinese 43,"Cocktail Bar, Café, Restaurant",8.9
105,108,10 Corso Como Cafe,45.481935,9.187596,Corso Como 10,"Café, Italian Restaurant, Mediterranean Restau...",8.8
353,617,Mag Mastri Artigiani del Gelato,45.481822,9.221399,Viale Gran Sasso 27,"Café, Frozen Yogurt Shop, Ice Cream Parlor",8.7
378,643,Bar Luce,45.444167,9.205268,Largo Isarco 2 — lodi,"Bar, Café",8.6
331,582,Deus Cafe Milano,45.491041,9.188769,Via Thaon di Revel 3,"Bar, Café, Restaurant",8.5
654,1839,Zinc Bar,45.446659,9.195914,Via Cesare Balbo 36,"Cocktail Bar, Café, Restaurant",8.5
354,618,Pasticceria Antica Sicilia,45.480975,9.216635,Via Carlo Matteucci 4,"Dessert Shop, Café",8.5


There are just under 100 Coffee & Tea houses with a 5 star rating in Milan but Foursquare goes deeper with a 10 star rating with a decimal system in place. This allows for easier distinction between the top ten.