In [5]:
import pandas as pd
import geopandas as gpd
import requests
import time
from shapely.geometry import Point
from geopandas.tools import sjoin

In [3]:
# Import of the Polygons
gdf_combined = gpd.read_file("C:/Users/edoar/combined_quartieri.geojson")

# YELP API

In [6]:
neighborhoods = [f"{name}, Milan" for name in gdf_combined["Neighborhood"].tolist()]
neighborhoods

['Parco delle Abbazie, Milan',
 'Adriano, Milan',
 'Affori, Milan',
 'Baggio, Milan',
 'Bande Nere, Milan',
 'Barona, Milan',
 'Bicocca, Milan',
 'Bovisasca, Milan',
 'Bovisa, Milan',
 'Brera, Milan',
 'Bruzzano, Milan',
 'Buenos Aires - Venezia, Milan',
 'Cantalupa, Milan',
 'Cascina Triulza - Expo, Milan',
 'Centrale, Milan',
 'Chiaravalle, Milan',
 'Città Studi, Milan',
 'Comasina, Milan',
 'Corsica, Milan',
 'De Angeli - Monte Rosa, Milan',
 'Dergano, Milan',
 'Duomo, Milan',
 'Ex Om - Morivione, Milan',
 'Farini, Milan',
 'GARIBALDI REPUBBLICA, Milan',
 'Gallaratese, Milan',
 'Ghisolfa, Milan',
 'Giambellino, Milan',
 'Giardini Porta Venezia, Milan',
 'Gratosoglio - Ticinello, Milan',
 'Greco, Milan',
 'Guastalla, Milan',
 'Isola, Milan',
 'Lambrate, Milan',
 'Lodi - Corvetto, Milan',
 'Lorenteggio, Milan',
 'Loreto, Milan',
 'Maciachini - Maggiolina, Milan',
 'Magenta - San Vittore, Milan',
 'Maggiore - Musocco, Milan',
 'Mecenate, Milan',
 'Muggiano, Milan',
 'Navigli, Milan',
 

In [8]:
# Replace with your Yelp API key
API_KEY = "6CbFJGYPopltfBbkb00hIFGRI4XBrAsccPevTJ53ol4YIuJrF48kEylTmKvEl4-TEP8p0pEc3ydzUsatpgGI5aqSHCsysi5-yWyeJD3V-6al5x6_AcFkw23LehKEZ3Yx"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

# Constants
businesses_per_request = 48  # Maximum allowed by Yelp per request

def make_request(url, params=None):
    """
    Makes a request to the Yelp API.
    """
    response = requests.get(url, headers=HEADERS, params=params)
    response.raise_for_status()
    return response

def search_businesses(location, term="restaurant", limit=businesses_per_request, offset=0):
    """
    Searches for businesses in a given location with pagination.
    """
    url = "https://api.yelp.com/v3/businesses/search"
    params = {
        "location": location,
        "term": term,
        "limit": limit,
        "offset": offset
    }
    response = make_request(url, params=params)
    return response.json().get("businesses", [])

# Initialize data storage
data = []

try:
    for neighborhood in neighborhoods:  # Loop through each neighborhood
        offset = 0
        while True:  # fetching data until no more results
            print(f"Fetching businesses in {neighborhood} with offset: {offset}...")

            try:
                # fetch businesses using the current offset and location
                businesses = search_businesses(location=neighborhood, term="restaurant", limit=businesses_per_request, offset=offset)

                if not businesses:
                    # No more businesses to fetch
                    print(f"No more businesses returned for {neighborhood}.")
                    break

                for biz in businesses:
                    name = biz.get("name", "N/A")
                    location_info = biz.get("location", {})
                    address = location_info.get("address1", "N/A")
                    categories = biz.get("categories", [])
                    category_list = [cat.get("title", "") for cat in categories if cat.get("title")]
                    category_str = ", ".join(category_list) if category_list else "N/A"
                    rating = biz.get("rating", "N/A")
                    review_count = biz.get("review_count", "N/A")
                    price = biz.get("price", "N/A")

                    # Extract coordinates
                    coordinates = biz.get("coordinates", {})
                    latitude = coordinates.get("latitude", None)
                    longitude = coordinates.get("longitude", None)

                    # Append to data
                    data.append({
                        "Fetch Location": neighborhood,
                        "Business Name": name,
                        "Business Address": address,
                        "Categories": category_str,
                        "Average Star Rating": rating,
                        "Review Count": review_count,
                        "Price": price,
                        "Latitude": latitude,
                        "Longitude": longitude
                    })

                # Increment offset for the next batch
                offset += len(businesses)

                # Optional: Sleep to respect API rate limits
                time.sleep(0.5)

                # Break if the offset exceeds Yelp's maximum results per query
                if offset >= 240:  # Maximum 240 results per query
                    print(f"Reached maximum results for {neighborhood}.")
                    break

            except requests.HTTPError as he:
                # Log the error and skip this neighborhood
                print(f"HTTP error occurred for {neighborhood}: {he}")
                break

    # Convert the collected data into a DataFrame
    df = pd.DataFrame(data)

    # Convert DataFrame to GeoDataFrame
    df["geometry"] = df.apply(
        lambda row: Point(row["Longitude"], row["Latitude"]) if row["Longitude"] and row["Latitude"] else None,
        axis=1
    )
    Restaurants = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

    # Print summary
    print(Restaurants.head())
    print(f"Total businesses collected: {len(data)}")

except Exception as e:
    print(f"An unexpected error occurred: {e}")

Fetching businesses in Parco delle Abbazie, Milan with offset: 0...
Fetching businesses in Parco delle Abbazie, Milan with offset: 48...
Fetching businesses in Parco delle Abbazie, Milan with offset: 96...
Fetching businesses in Parco delle Abbazie, Milan with offset: 144...
Fetching businesses in Parco delle Abbazie, Milan with offset: 192...
Reached maximum results for Parco delle Abbazie, Milan.
Fetching businesses in Adriano, Milan with offset: 0...
HTTP error occurred for Adriano, Milan: 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Adriano%2C+Milan&term=restaurant&limit=48&offset=0
Fetching businesses in Affori, Milan with offset: 0...
Fetching businesses in Affori, Milan with offset: 48...
Fetching businesses in Affori, Milan with offset: 96...
Fetching businesses in Affori, Milan with offset: 144...
Fetching businesses in Affori, Milan with offset: 192...
Reached maximum results for Affori, Milan.
Fetching businesses in Baggio, Milan 

In [9]:
Restaurants

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",Il Castigo,Piazza Scipione 7,"Wine Bars, Italian",4.8,40,€€,45.684583,8.706904,POINT (8.7069 45.68458)
1,"Parco delle Abbazie, Milan",Settembrini 18,Via Settembrini 18,"Italian, Seafood, Pizza",4.5,560,€€,45.481243,9.205606,POINT (9.20561 45.48124)
2,"Parco delle Abbazie, Milan",Da Oscar,Via Lazzaro Palazzi 4,"Barbeque, Seafood, Wine Bars",4.4,316,€€,45.476440,9.205910,POINT (9.20591 45.47644)
3,"Parco delle Abbazie, Milan",Osteria della Pista,Via Verbano 1,"Seafood, Italian, Pizza",4.6,41,€€,45.674290,8.739960,POINT (8.73996 45.67429)
4,"Parco delle Abbazie, Milan",Salsamenteria di Parma,Via San Pietro all'Orto 9,Emilian,4.4,181,€€,45.466267,9.195122,POINT (9.19512 45.46627)
...,...,...,...,...,...,...,...,...,...,...
18522,"Forze Armate, Milan",Tokyo Nomi,Via Novara 123,"Japanese, Chinese, Sushi Bars",3.0,1,,45.473480,9.117590,POINT (9.11759 45.47348)
18523,"Forze Armate, Milan",Felice,Viale Certosa 97,Chinese,4.0,3,,45.493960,9.142450,POINT (9.14245 45.49396)
18524,"Forze Armate, Milan",Mezzaluna,Via Marostica 29,"Italian, Seafood, Pizza",4.0,1,,45.461540,9.143417,POINT (9.14342 45.46154)
18525,"Forze Armate, Milan",Pizzeria dell'Angelo,Via Belfiore 7,Pizza,4.0,5,€€,45.467230,9.158300,POINT (9.1583 45.46723)


In [10]:
# Removing the dupes
Restaurants_nodup = Restaurants.drop_duplicates(subset=['Business Name','Business Address','Categories','Average Star Rating',
                                                        'Review Count','Price','geometry'])
Restaurants_nodup

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",Il Castigo,Piazza Scipione 7,"Wine Bars, Italian",4.8,40,€€,45.684583,8.706904,POINT (8.7069 45.68458)
1,"Parco delle Abbazie, Milan",Settembrini 18,Via Settembrini 18,"Italian, Seafood, Pizza",4.5,560,€€,45.481243,9.205606,POINT (9.20561 45.48124)
2,"Parco delle Abbazie, Milan",Da Oscar,Via Lazzaro Palazzi 4,"Barbeque, Seafood, Wine Bars",4.4,316,€€,45.476440,9.205910,POINT (9.20591 45.47644)
3,"Parco delle Abbazie, Milan",Osteria della Pista,Via Verbano 1,"Seafood, Italian, Pizza",4.6,41,€€,45.674290,8.739960,POINT (8.73996 45.67429)
4,"Parco delle Abbazie, Milan",Salsamenteria di Parma,Via San Pietro all'Orto 9,Emilian,4.4,181,€€,45.466267,9.195122,POINT (9.19512 45.46627)
...,...,...,...,...,...,...,...,...,...,...
18273,"XXII Marzo, Milan",Mani In Pasta,Via Carlo Pisacane 47,"Pizza, Italian",3.7,6,,45.472920,9.210990,POINT (9.21099 45.47292)
18275,"XXII Marzo, Milan",La Risacca 6,Via Marcona 6,Seafood,3.3,16,€€€€,45.463390,9.208240,POINT (9.20824 45.46339)
18281,"XXII Marzo, Milan",Pandenus,Corso Concordia 11,"Cocktail Bars, Breakfast & Brunch, Bakeries",3.2,10,€€,45.468204,9.210167,POINT (9.21017 45.4682)
18286,"XXII Marzo, Milan",Spoon,Via Marcona 6,"Japanese, Sushi Bars",4.0,1,€€€,45.463469,9.208250,POINT (9.20825 45.46347)


In [11]:
# Have to be the same type
print(Restaurants_nodup.crs)  # Restaurants CRS
print(gdf_combined.crs)  # Neighborhood polygons CRS

EPSG:4326
EPSG:4326


# Spatial Join

In [13]:
# Perform the spatial join
joined = sjoin(Restaurants_nodup, gdf_combined, how="left", predicate="within")

# Check the result
joined

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
0,"Parco delle Abbazie, Milan",Il Castigo,Piazza Scipione 7,"Wine Bars, Italian",4.8,40,€€,45.684583,8.706904,POINT (8.7069 45.68458),,
1,"Parco delle Abbazie, Milan",Settembrini 18,Via Settembrini 18,"Italian, Seafood, Pizza",4.5,560,€€,45.481243,9.205606,POINT (9.20561 45.48124),11.0,Buenos Aires - Venezia
2,"Parco delle Abbazie, Milan",Da Oscar,Via Lazzaro Palazzi 4,"Barbeque, Seafood, Wine Bars",4.4,316,€€,45.476440,9.205910,POINT (9.20591 45.47644),11.0,Buenos Aires - Venezia
3,"Parco delle Abbazie, Milan",Osteria della Pista,Via Verbano 1,"Seafood, Italian, Pizza",4.6,41,€€,45.674290,8.739960,POINT (8.73996 45.67429),,
4,"Parco delle Abbazie, Milan",Salsamenteria di Parma,Via San Pietro all'Orto 9,Emilian,4.4,181,€€,45.466267,9.195122,POINT (9.19512 45.46627),21.0,Duomo
...,...,...,...,...,...,...,...,...,...,...,...,...
18273,"XXII Marzo, Milan",Mani In Pasta,Via Carlo Pisacane 47,"Pizza, Italian",3.7,6,,45.472920,9.210990,POINT (9.21099 45.47292),11.0,Buenos Aires - Venezia
18275,"XXII Marzo, Milan",La Risacca 6,Via Marcona 6,Seafood,3.3,16,€€€€,45.463390,9.208240,POINT (9.20824 45.46339),83.0,XXII Marzo
18281,"XXII Marzo, Milan",Pandenus,Corso Concordia 11,"Cocktail Bars, Breakfast & Brunch, Bakeries",3.2,10,€€,45.468204,9.210167,POINT (9.21017 45.4682),11.0,Buenos Aires - Venezia
18286,"XXII Marzo, Milan",Spoon,Via Marcona 6,"Japanese, Sushi Bars",4.0,1,€€€,45.463469,9.208250,POINT (9.20825 45.46347),83.0,XXII Marzo


In [14]:
joined[joined['Business Name']=='Tartufotto'] # all correct

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
47,"Parco delle Abbazie, Milan",Tartufotto,Via Cusani 8,"Italian, Wine Bars, Bistros",4.4,39,€€€,45.468923,9.184549,POINT (9.18455 45.46892),9.0,Brera


In [15]:
joined.nunique() # there is something wrong with the prices

Fetch Location           70
Business Name          3110
Business Address       3105
Categories             1305
Average Star Rating      37
Review Count            113
Price                     9
Latitude               3161
Longitude              3184
geometry               3264
index_right              83
Neighborhood             83
dtype: int64

In [16]:
joined["Price"].unique() # there seem to be prices not in euros

array(['€€', 'N/A', '€€€', '€', '$$', '€€€€', '$$$', '$$$$', '$'],
      dtype=object)

In [17]:
joined[joined["Price"] == '$$'] # they correspond to geometries outside of the neighborhoods in Milan

Unnamed: 0,Fetch Location,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,Latitude,Longitude,geometry,index_right,Neighborhood
31,"Parco delle Abbazie, Milan",Restaurant Jägerstube,,Swiss Food,5.0,1,$$,46.214226,7.854204,POINT (7.8542 46.21423),,
2031,"Buenos Aires - Venezia, Milan",Parrilla Peña,Rodríguez Peña 682,"Argentine, Steakhouses",4.4,221,$$,-34.600975,-58.391444,POINT (-58.39144 -34.60097),,
2033,"Buenos Aires - Venezia, Milan",El Preferido de Palermo,Jorge Luis Borges 2108,"Pubs, Argentine",4.1,82,$$,-34.585412,-58.425359,POINT (-58.42536 -34.58541),,
2038,"Buenos Aires - Venezia, Milan",Chori,Thames 1653,"Steakhouses, Salad, Vegetarian",4.3,136,$$,-34.587838,-58.430244,POINT (-58.43024 -34.58784),,
2039,"Buenos Aires - Venezia, Milan",Santos Manjares,Paraguay 938,"Argentine, Steakhouses",4.4,62,$$,-34.597871,-58.380344,POINT (-58.38034 -34.59787),,
...,...,...,...,...,...,...,...,...,...,...,...,...
2265,"Buenos Aires - Venezia, Milan",Los Portugueses,Av. Angel Gallardo 800,Portuguese,3.3,12,$$,-34.606311,-58.441771,POINT (-58.44177 -34.60631),,
11633,"Parco dei Navigli, Milan",Ristorante Argentino,Piazza Riforma,"Bistros, Pizza",4.3,24,$$,46.004090,8.951460,POINT (8.95146 46.00409),,
16909,"Umbria - Molise, Milan",Al Battello,Riva dal Drèra,Italian,5.0,3,$$,45.922900,8.917160,POINT (8.91716 45.9229),,
16939,"Umbria - Molise, Milan",Locanda Locarnese,Via Bossi 1,Italian,4.9,7,$$,46.169083,8.794450,POINT (8.79445 46.16908),,


In [18]:
# Remove rows where the geometry is NaN
PolyRestaurants = joined[~joined["Neighborhood"].isna()]
PolyRestaurants = PolyRestaurants.reset_index(drop=True)

# Keeping only relevant variables
PolyRestaurants = PolyRestaurants[["Business Name", "Business Address", "Categories", "Average Star Rating",
                                   "Review Count", "Price", "geometry", "Neighborhood"]]

# Saving the file
PolyRestaurants.to_file("PolyRestaurants.geojson", driver="GeoJSON")

# Print the cleaned GeoDataFrame
PolyRestaurants

Unnamed: 0,Business Name,Business Address,Categories,Average Star Rating,Review Count,Price,geometry,Neighborhood
0,Settembrini 18,Via Settembrini 18,"Italian, Seafood, Pizza",4.5,560,€€,POINT (9.20561 45.48124),Buenos Aires - Venezia
1,Da Oscar,Via Lazzaro Palazzi 4,"Barbeque, Seafood, Wine Bars",4.4,316,€€,POINT (9.20591 45.47644),Buenos Aires - Venezia
2,Salsamenteria di Parma,Via San Pietro all'Orto 9,Emilian,4.4,181,€€,POINT (9.19512 45.46627),Duomo
3,Rizzo Come a Casa,Via Varesina 163,Italian,5.0,10,,POINT (9.13914 45.50287),Villapizzone
4,L'Immagine,Via Varesina 61,"Italian, Bistros, Cafes",4.9,97,€€,POINT (9.14548 45.49722),Villapizzone
...,...,...,...,...,...,...,...,...
2649,Cucineria,Viale Monte Nero 76,Italian,3.3,3,€€,POINT (9.20814 45.46076),XXII Marzo
2650,Mani In Pasta,Via Carlo Pisacane 47,"Pizza, Italian",3.7,6,,POINT (9.21099 45.47292),Buenos Aires - Venezia
2651,La Risacca 6,Via Marcona 6,Seafood,3.3,16,€€€€,POINT (9.20824 45.46339),XXII Marzo
2652,Pandenus,Corso Concordia 11,"Cocktail Bars, Breakfast & Brunch, Bakeries",3.2,10,€€,POINT (9.21017 45.4682),Buenos Aires - Venezia


### There are quite a few restaurants with no information available about their price, there is not much we can do except aknowledge the fact that the data retrieved through the Yelp API is not ideal for calculating statistics about the price of restaurants

In [17]:
# Rows with no price information
count_na = PolyRestaurants[PolyRestaurants['Price'] == 'N/A'].shape[0]

print(f"Number of rows with Price = 'N/A': {count_na}")

Number of rows with Price = 'N/A': 1063
