In [23]:
import pandas as pd
import numpy as np
import geopandas as gpd
import requests
import time
from shapely.geometry import Point
from geopandas.tools import sjoin
from skimpy import skim

In [3]:
# Import of the Polygons
gdf_combined = gpd.read_file("C:/Users/edoar/combined_quartieri.geojson")

# YELP API

In [6]:
neighborhoods = [f"{name}, Milan" for name in gdf_combined["Neighborhood"].tolist()]
neighborhoods

['Parco delle Abbazie, Milan',
 'Adriano, Milan',
 'Affori, Milan',
 'Baggio, Milan',
 'Bande Nere, Milan',
 'Barona, Milan',
 'Bicocca, Milan',
 'Bovisasca, Milan',
 'Bovisa, Milan',
 'Brera, Milan',
 'Bruzzano, Milan',
 'Buenos Aires - Venezia, Milan',
 'Cantalupa, Milan',
 'Cascina Triulza - Expo, Milan',
 'Centrale, Milan',
 'Chiaravalle, Milan',
 'Città Studi, Milan',
 'Comasina, Milan',
 'Corsica, Milan',
 'De Angeli - Monte Rosa, Milan',
 'Dergano, Milan',
 'Duomo, Milan',
 'Ex Om - Morivione, Milan',
 'Farini, Milan',
 'GARIBALDI REPUBBLICA, Milan',
 'Gallaratese, Milan',
 'Ghisolfa, Milan',
 'Giambellino, Milan',
 'Giardini Porta Venezia, Milan',
 'Gratosoglio - Ticinello, Milan',
 'Greco, Milan',
 'Guastalla, Milan',
 'Isola, Milan',
 'Lambrate, Milan',
 'Lodi - Corvetto, Milan',
 'Lorenteggio, Milan',
 'Loreto, Milan',
 'Maciachini - Maggiolina, Milan',
 'Magenta - San Vittore, Milan',
 'Maggiore - Musocco, Milan',
 'Mecenate, Milan',
 'Muggiano, Milan',
 'Navigli, Milan',
 

In [8]:
# Replace with your Yelp API key
API_KEY = "6CbFJGYPopltfBbkb00hIFGRI4XBrAsccPevTJ53ol4YIuJrF48kEylTmKvEl4-TEP8p0pEc3ydzUsatpgGI5aqSHCsysi5-yWyeJD3V-6al5x6_AcFkw23LehKEZ3Yx"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

# Constants
businesses_per_request = 48  # Maximum allowed by Yelp per request

def make_request(url, params=None):
    """
    Makes a request to the Yelp API.
    """
    response = requests.get(url, headers=HEADERS, params=params)
    response.raise_for_status()
    return response

def search_businesses(location, term="restaurant", limit=businesses_per_request, offset=0):
    """
    Searches for businesses in a given location with pagination.
    """
    url = "https://api.yelp.com/v3/businesses/search"
    params = {
        "location": location,
        "term": term,
        "limit": limit,
        "offset": offset
    }
    response = make_request(url, params=params)
    return response.json().get("businesses", [])

# Initialize data storage
data = []

try:
    for neighborhood in neighborhoods:  # Loop through each neighborhood
        offset = 0
        while True:  # fetching data until no more results
            print(f"Fetching businesses in {neighborhood} with offset: {offset}...")

            try:
                # fetch businesses using the current offset and location
                businesses = search_businesses(location=neighborhood, term="restaurant", limit=businesses_per_request, offset=offset)

                if not businesses:
                    # No more businesses to fetch
                    print(f"No more businesses returned for {neighborhood}.")
                    break

                for biz in businesses:
                    name = biz.get("name", None)
                    location_info = biz.get("location", {})
                    address = location_info.get("address1", None)
                    categories = biz.get("categories", [])
                    category_list = [cat.get("title", "") for cat in categories if cat.get("title")]
                    category_str = ", ".join(category_list) if category_list else None
                    rating = biz.get("rating", None)
                    review_count = biz.get("review_count", None)
                    price = biz.get("price", None)

                    # Extract coordinates
                    coordinates = biz.get("coordinates", {})
                    latitude = coordinates.get("latitude", None)
                    longitude = coordinates.get("longitude", None)

                    # Append to data
                    data.append({
                        "Fetch Location": neighborhood,
                        "Business Name": name,
                        "Business Address": address,
                        "Categories": category_str,
                        "Average Star Rating": rating,
                        "Review Count": review_count,
                        "Price": price,
                        "Latitude": latitude,
                        "Longitude": longitude
                    })

                # Increment offset for the next batch
                offset += len(businesses)

                # Optional: Sleep to respect API rate limits
                time.sleep(0.5)

                # Break if the offset exceeds Yelp's maximum results per query
                if offset >= 240:  # Maximum 240 results per query
                    print(f"Reached maximum results for {neighborhood}.")
                    break

            except requests.HTTPError as he:
                # Log the error and skip this neighborhood
                print(f"HTTP error occurred for {neighborhood}: {he}")
                break

    # Convert the collected data into a DataFrame
    df = pd.DataFrame(data)

    # Convert DataFrame to GeoDataFrame
    df["geometry"] = df.apply(
        lambda row: Point(row["Longitude"], row["Latitude"]) if row["Longitude"] and row["Latitude"] else None,
        axis=1
    )
    Restaurants = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

    # Print summary
    print(Restaurants.head())
    print(f"Total businesses collected: {len(data)}")

except Exception as e:
    print(f"An unexpected error occurred: {e}")

Fetching businesses in Parco delle Abbazie, Milan with offset: 0...
Fetching businesses in Parco delle Abbazie, Milan with offset: 48...
Fetching businesses in Parco delle Abbazie, Milan with offset: 96...
Fetching businesses in Parco delle Abbazie, Milan with offset: 98...
No more businesses returned for Parco delle Abbazie, Milan.
Fetching businesses in Adriano, Milan with offset: 0...
HTTP error occurred for Adriano, Milan: 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Adriano%2C+Milan&term=restaurant&limit=48&offset=0
Fetching businesses in Affori, Milan with offset: 0...
Fetching businesses in Affori, Milan with offset: 48...
Fetching businesses in Affori, Milan with offset: 96...
Fetching businesses in Affori, Milan with offset: 144...
Fetching businesses in Affori, Milan with offset: 192...
Reached maximum results for Affori, Milan.
Fetching businesses in Baggio, Milan with offset: 0...
Fetching businesses in Baggio, Milan with offset:

In [10]:
Restaurants

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",The Kitchen,Via Monte Nero 73,"Italian, Pubs, Burgers",5.0,2,€€,45.470379,8.690606,POINT (8.69061 45.47038)
1,"Parco delle Abbazie, Milan",La locanda delle Due Suocere,Corso Trieste 44A,Italian,5.0,7,€€€,45.449084,8.638340,POINT (8.63834 45.44908)
2,"Parco delle Abbazie, Milan",Cannavacciuolo,Piazza Martiri della Libertà 1,"Bistros, Italian",4.4,5,,45.445820,8.617884,POINT (8.61788 45.44582)
3,"Parco delle Abbazie, Milan",Santa Maria,Via Novara 39,Trattorie,5.0,1,,45.431625,8.728954,POINT (8.72895 45.43163)
4,"Parco delle Abbazie, Milan",Wallaby Australian Pub,Via Gioacchino Rossini 7,"Burgers, Pubs, Sandwiches",4.5,2,€€,45.469280,8.890330,POINT (8.89033 45.46928)
...,...,...,...,...,...,...,...,...,...,...
18096,"Forze Armate, Milan",Tutti i Gusti,Via Zumbini 36,Pizza,3.3,3,€,45.440950,9.156340,POINT (9.15634 45.44095)
18097,"Forze Armate, Milan",MooKuzai,Via Arona 18,Japanese,5.0,1,,45.482830,9.158510,POINT (9.15851 45.48283)
18098,"Forze Armate, Milan",Gemelli Diversi,Via Moroni 32,"Italian, Pizza",3.4,12,€€,45.465750,9.138610,POINT (9.13861 45.46575)
18099,"Forze Armate, Milan",Homu,Via Carlo Dolci 8,"Japanese, Chinese, Buffets",3.8,10,€€,45.472372,9.142926,POINT (9.14293 45.47237)


In [12]:
# Removing the dupes
Restaurants_nodup = Restaurants.drop_duplicates(subset=['Business Name','Business Address','Categories','Average Star Rating',
                                                        'Review Count','Price','geometry'])
Restaurants_nodup

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",The Kitchen,Via Monte Nero 73,"Italian, Pubs, Burgers",5.0,2,€€,45.470379,8.690606,POINT (8.69061 45.47038)
1,"Parco delle Abbazie, Milan",La locanda delle Due Suocere,Corso Trieste 44A,Italian,5.0,7,€€€,45.449084,8.638340,POINT (8.63834 45.44908)
2,"Parco delle Abbazie, Milan",Cannavacciuolo,Piazza Martiri della Libertà 1,"Bistros, Italian",4.4,5,,45.445820,8.617884,POINT (8.61788 45.44582)
3,"Parco delle Abbazie, Milan",Santa Maria,Via Novara 39,Trattorie,5.0,1,,45.431625,8.728954,POINT (8.72895 45.43163)
4,"Parco delle Abbazie, Milan",Wallaby Australian Pub,Via Gioacchino Rossini 7,"Burgers, Pubs, Sandwiches",4.5,2,€€,45.469280,8.890330,POINT (8.89033 45.46928)
...,...,...,...,...,...,...,...,...,...,...
17853,"XXII Marzo, Milan",Bianca Maria,Viale Bianca Maria 4,"Lumbard, Lounges, Bistros",5.0,1,,45.463536,9.206917,POINT (9.20692 45.46354)
17856,"XXII Marzo, Milan",Saporimaestri,Via Maestri Campionesi 4,"Pizza, Italian, Mediterranean",4.0,1,,45.457377,9.213376,POINT (9.21338 45.45738)
17857,"XXII Marzo, Milan",Spoon,Via Marcona 6,"Japanese, Sushi Bars",4.0,1,€€€,45.463469,9.208250,POINT (9.20825 45.46347)
17983,"Forze Armate, Milan",Roadhouse Grill,Viale Cristoforo Colombo 13,"Steakhouses, Barbeque",4.0,2,€€,45.424919,9.067463,POINT (9.06746 45.42492)


In [14]:
# Have to be the same type
print(Restaurants_nodup.crs)  # Restaurants CRS
print(gdf_combined.crs)  # Neighborhood polygons CRS

EPSG:4326
EPSG:4326


# Spatial Join

In [17]:
# Perform the spatial join
joined = sjoin(Restaurants_nodup, gdf_combined, how="left", predicate="within")

# Check the result
joined

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
0,"Parco delle Abbazie, Milan",The Kitchen,Via Monte Nero 73,"Italian, Pubs, Burgers",5.0,2,€€,45.470379,8.690606,POINT (8.69061 45.47038),,
1,"Parco delle Abbazie, Milan",La locanda delle Due Suocere,Corso Trieste 44A,Italian,5.0,7,€€€,45.449084,8.638340,POINT (8.63834 45.44908),,
2,"Parco delle Abbazie, Milan",Cannavacciuolo,Piazza Martiri della Libertà 1,"Bistros, Italian",4.4,5,,45.445820,8.617884,POINT (8.61788 45.44582),,
3,"Parco delle Abbazie, Milan",Santa Maria,Via Novara 39,Trattorie,5.0,1,,45.431625,8.728954,POINT (8.72895 45.43163),,
4,"Parco delle Abbazie, Milan",Wallaby Australian Pub,Via Gioacchino Rossini 7,"Burgers, Pubs, Sandwiches",4.5,2,€€,45.469280,8.890330,POINT (8.89033 45.46928),,
...,...,...,...,...,...,...,...,...,...,...,...,...
17853,"XXII Marzo, Milan",Bianca Maria,Viale Bianca Maria 4,"Lumbard, Lounges, Bistros",5.0,1,,45.463536,9.206917,POINT (9.20692 45.46354),31.0,Guastalla
17856,"XXII Marzo, Milan",Saporimaestri,Via Maestri Campionesi 4,"Pizza, Italian, Mediterranean",4.0,1,,45.457377,9.213376,POINT (9.21338 45.45738),83.0,XXII Marzo
17857,"XXII Marzo, Milan",Spoon,Via Marcona 6,"Japanese, Sushi Bars",4.0,1,€€€,45.463469,9.208250,POINT (9.20825 45.46347),83.0,XXII Marzo
17983,"Forze Armate, Milan",Roadhouse Grill,Viale Cristoforo Colombo 13,"Steakhouses, Barbeque",4.0,2,€€,45.424919,9.067463,POINT (9.06746 45.42492),,


In [19]:
joined[joined['Business Name']=='Tartufotto'] # all correct

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
1608,"Brera, Milan",Tartufotto,Via Cusani 8,"Italian, Wine Bars, Bistros",4.4,39,€€€,45.468923,9.184549,POINT (9.18455 45.46892),9.0,Brera


In [15]:
joined.nunique() # there is something wrong with the prices

Fetch Location           70
Business Name          3110
Business Address       3105
Categories             1305
Average Star Rating      37
Review Count            113
Price                     9
Latitude               3161
Longitude              3184
geometry               3264
index_right              83
Neighborhood             83
dtype: int64

In [25]:
joined["Price"].unique() # there seem to be prices not in euros

array(['€€', '€€€', None, '€€€€', '€', '$$$$', '$$$', '$$', '$'],
      dtype=object)

In [27]:
joined[joined["Price"] == '$$'] # they correspond to geometries outside of the neighborhoods in Milan

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
1889,"Buenos Aires - Venezia, Milan",Güerrín,Av. Corrientes 1368,Pizza,4.3,535,$$,-34.604085,-58.385987,POINT (-58.38599 -34.60408),,
1890,"Buenos Aires - Venezia, Milan",El Preferido de Palermo,Jorge Luis Borges 2108,"Pubs, Argentine",4.1,82,$$,-34.585412,-58.425359,POINT (-58.42536 -34.58541),,
1892,"Buenos Aires - Venezia, Milan",Santos Manjares,Paraguay 938,"Argentine, Steakhouses",4.4,62,$$,-34.597871,-58.380344,POINT (-58.38034 -34.59787),,
1893,"Buenos Aires - Venezia, Milan",1810 Cocina Regional,Julián Álvarez 1998,"Argentine, Empanadas",4.3,70,$$,-34.589390,-58.419222,POINT (-58.41922 -34.58939),,
1895,"Buenos Aires - Venezia, Milan",Parrilla Peña,Rodríguez Peña 682,"Argentine, Steakhouses",4.4,221,$$,-34.600975,-58.391444,POINT (-58.39144 -34.60097),,
...,...,...,...,...,...,...,...,...,...,...,...,...
2117,"Buenos Aires - Venezia, Milan",El Secreto Del Polaco,Monroe 3915,,4.3,21,$$,-34.567183,-58.474369,POINT (-58.47437 -34.56718),,
2119,"Buenos Aires - Venezia, Milan",Melo,Pacheco de Melo 1833,International,4.3,14,$$,-34.592283,-58.393141,POINT (-58.39314 -34.59228),,
2120,"Buenos Aires - Venezia, Milan",Burger Joint,Jorge Luis Borges 1766,Burgers,4.3,448,$$,-34.587558,-58.428625,POINT (-58.42863 -34.58756),,
2121,"Buenos Aires - Venezia, Milan",La Pulperia,Uriarte 1667,Argentine,4.8,8,$$,-34.587035,-58.431410,POINT (-58.43141 -34.58703),,


In [29]:
# Remove rows where the geometry is NaN
PolyRestaurants = joined[~joined["Neighborhood"].isna()]
PolyRestaurants = PolyRestaurants.reset_index(drop=True)

# Keeping only relevant variables
PolyRestaurants = PolyRestaurants[["Business Name", "Business Address", "Categories", "Average Star Rating",
                                   "Review Count", "Price", "geometry", "Neighborhood"]]
# Print the cleaned GeoDataFrame
PolyRestaurants

Unnamed: 0,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,geometry,Neighborhood
0,L'Immagine,Via Varesina 61,"Italian, Bistros, Cafes",4.9,97,€€,POINT (9.14548 45.49722),Villapizzone
1,Affori,Via Michele Novaro 1,Italian,5.0,2,,POINT (9.17001 45.51939),Affori
2,L' Oasi,Viale Certosa 119,"Pizza, Italian",4.9,13,,POINT (9.1407 45.49475),Villapizzone
3,Osteria del Biliardo,Via Cialdini 107,"Italian, Breweries",4.0,4,€€,POINT (9.1695 45.51492),Affori
4,Bistrot della Pesa,Via Maroncelli 1,"Lumbard, Bistros",4.7,3,,POINT (9.18401 45.4819),GARIBALDI REPUBBLICA
...,...,...,...,...,...,...,...,...
2592,Al Pizzetta,Viale Monte Nero 73,Pizza,3.8,17,€,POINT (9.20759 45.46095),Guastalla
2593,L'OV Milano,Viale Premuda 14,"Bistros, American, Breakfast & Brunch",3.2,24,€€,POINT (9.20764 45.46419),XXII Marzo
2594,Bianca Maria,Viale Bianca Maria 4,"Lumbard, Lounges, Bistros",5.0,1,,POINT (9.20692 45.46354),Guastalla
2595,Saporimaestri,Via Maestri Campionesi 4,"Pizza, Italian, Mediterranean",4.0,1,,POINT (9.21338 45.45738),XXII Marzo


### Further checking for missing values or inconsistencies

In [32]:
skim(PolyRestaurants)

### Quite a few (almost 40% of the entries) has a missing price, but there is not much we can do except aknowledge the fact that the data retrieved through the Yelp API is not ideal for calculating statistics about the price of restaurants

In [36]:
# Saving the file
PolyRestaurants.to_file("PolyRestaurants.geojson", driver="GeoJSON")