In [1]:
import pandas as pd
import geopandas as gpd
import requests
import time
from shapely.geometry import Point
from geopandas.tools import sjoin

In [3]:
# Import of the Polygons
gdf_combined = gpd.read_file("C:/Users/edoar/combined_quartieri.geojson")

# API di YELP

In [6]:
neighborhoods = [f"{name}, Milan" for name in gdf_combined["Neighborhood"].tolist()]
neighborhoods

['Parco delle Abbazie, Milan',
 'Adriano, Milan',
 'Affori, Milan',
 'Baggio, Milan',
 'Bande Nere, Milan',
 'Barona, Milan',
 'Bicocca, Milan',
 'Bovisasca, Milan',
 'Bovisa, Milan',
 'Brera, Milan',
 'Bruzzano, Milan',
 'Buenos Aires - Venezia, Milan',
 'Cantalupa, Milan',
 'Cascina Triulza - Expo, Milan',
 'Centrale, Milan',
 'Chiaravalle, Milan',
 'Città Studi, Milan',
 'Comasina, Milan',
 'Corsica, Milan',
 'De Angeli - Monte Rosa, Milan',
 'Dergano, Milan',
 'Duomo, Milan',
 'Ex Om - Morivione, Milan',
 'Farini, Milan',
 'GARIBALDI REPUBBLICA, Milan',
 'Gallaratese, Milan',
 'Ghisolfa, Milan',
 'Giambellino, Milan',
 'Giardini Porta Venezia, Milan',
 'Gratosoglio - Ticinello, Milan',
 'Greco, Milan',
 'Guastalla, Milan',
 'Isola, Milan',
 'Lambrate, Milan',
 'Lodi - Corvetto, Milan',
 'Lorenteggio, Milan',
 'Loreto, Milan',
 'Maciachini - Maggiolina, Milan',
 'Magenta - San Vittore, Milan',
 'Maggiore - Musocco, Milan',
 'Mecenate, Milan',
 'Muggiano, Milan',
 'Navigli, Milan',
 

In [8]:
# Replace with your Yelp API key
API_KEY = "6CbFJGYPopltfBbkb00hIFGRI4XBrAsccPevTJ53ol4YIuJrF48kEylTmKvEl4-TEP8p0pEc3ydzUsatpgGI5aqSHCsysi5-yWyeJD3V-6al5x6_AcFkw23LehKEZ3Yx"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

# Constants
businesses_per_request = 48  # Maximum allowed by Yelp per request

def make_request(url, params=None):
    """
    Makes a request to the Yelp API.
    """
    response = requests.get(url, headers=HEADERS, params=params)
    response.raise_for_status()
    return response

def search_businesses(location, term="gym", limit=businesses_per_request, offset=0):
    """
    Searches for businesses (gyms) in a given location with pagination.
    """
    url = "https://api.yelp.com/v3/businesses/search"
    params = {
        "location": location,  # Neighborhood or address
        "term": term,  # Use "gym" or "palestre"
        "limit": limit,
        "offset": offset
    }
    response = make_request(url, params=params)
    return response.json().get("businesses", [])

# Initialize data storage
data = []

try:
    for neighborhood in neighborhoods:  # Loop through each neighborhood
        offset = 0
        while True:  # Keep fetching data until no more results
            print(f"Fetching gyms in {neighborhood} with offset: {offset}...")

            try:
                # Fetch businesses using the current offset and location
                businesses = search_businesses(location=neighborhood, term="gym", limit=businesses_per_request, offset=offset)

                if not businesses:
                    # No more gyms to fetch
                    print(f"No more gyms returned for {neighborhood}.")
                    break

                for biz in businesses:
                    name = biz.get("name", "N/A")
                    location_info = biz.get("location", {})
                    address = location_info.get("address1", "N/A")
                    categories = biz.get("categories", [])
                    category_list = [cat.get("title", "") for cat in categories if cat.get("title")]
                    category_str = ", ".join(category_list) if category_list else "N/A"
                    rating = biz.get("rating", "N/A")
                    review_count = biz.get("review_count", "N/A")

                    # Extract coordinates
                    coordinates = biz.get("coordinates", {})
                    latitude = coordinates.get("latitude", None)
                    longitude = coordinates.get("longitude", None)

                    # Append to data
                    data.append({
                        "Fetch Location": neighborhood,
                        "Gym Name": name,
                        "Gym Address": address,
                        "Categories": category_str,
                        "Average Star Rating": rating,
                        "Review Count": review_count,
                        "Latitude": latitude,
                        "Longitude": longitude
                    })

                # Increment offset for the next batch
                offset += len(businesses)

                # Optional: Sleep to respect API rate limits
                time.sleep(0.5)

                # Break if the offset exceeds Yelp's maximum results per query
                if offset >= 240:  # Maximum 240 results per query
                    print(f"Reached maximum results for {neighborhood}.")
                    break

            except requests.HTTPError as he:
                # Log the error and skip this neighborhood
                print(f"HTTP error occurred for {neighborhood}: {he}")
                break

    # Convert the collected data into a DataFrame
    df = pd.DataFrame(data)

    # Convert DataFrame to GeoDataFrame
    df["geometry"] = df.apply(
        lambda row: Point(row["Longitude"], row["Latitude"]) if row["Longitude"] and row["Latitude"] else None,
        axis=1
    )
    Gyms = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

    # Print summary
    print(Gyms.head())
    print(f"Total gyms collected: {len(data)}")

except Exception as e:
    print(f"An unexpected error occurred: {e}")


Fetching gyms in Parco delle Abbazie, Milan with offset: 0...
Fetching gyms in Parco delle Abbazie, Milan with offset: 48...
Fetching gyms in Parco delle Abbazie, Milan with offset: 96...
Fetching gyms in Parco delle Abbazie, Milan with offset: 144...
Fetching gyms in Parco delle Abbazie, Milan with offset: 192...
Reached maximum results for Parco delle Abbazie, Milan.
Fetching gyms in Adriano, Milan with offset: 0...
HTTP error occurred for Adriano, Milan: 400 Client Error: Bad Request for url: https://api.yelp.com/v3/businesses/search?location=Adriano%2C+Milan&term=gym&limit=48&offset=0
Fetching gyms in Affori, Milan with offset: 0...
Fetching gyms in Affori, Milan with offset: 48...
Fetching gyms in Affori, Milan with offset: 93...
No more gyms returned for Affori, Milan.
Fetching gyms in Baggio, Milan with offset: 0...
Fetching gyms in Baggio, Milan with offset: 48...
Fetching gyms in Baggio, Milan with offset: 96...
Fetching gyms in Baggio, Milan with offset: 144...
Fetching gyms 

In [10]:
Gyms

Unnamed: 0,Fetch Location,Gym Name,Gym Address,Categories,Average Star Rating,Review Count,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",McFit,Via Pier Francesco Mola 48,Gyms,4.0,1,45.495993,9.139686,POINT (9.13969 45.49599)
1,"Parco delle Abbazie, Milan",Hard Candy Fitness,Via Parini 1,Gyms,3.7,3,45.477589,9.195213,POINT (9.19521 45.47759)
2,"Parco delle Abbazie, Milan",Get Fit Express,Piazza Città di Lombardia 5,Gyms,4.3,3,45.483848,9.192945,POINT (9.19294 45.48385)
3,"Parco delle Abbazie, Milan",Centro balneare Lido Locarno,Via Gioacchino Respini 11,"Gyms, Swimming Pools, Swimming Lessons/Schools",4.7,3,46.161382,8.803369,POINT (8.80337 46.16138)
4,"Parco delle Abbazie, Milan",Tonic,Via Giambellino 5,Gyms,4.0,3,45.451830,9.152850,POINT (9.15285 45.45183)
...,...,...,...,...,...,...,...,...,...
12879,"Forze Armate, Milan",Alina Quintana,Galleria San Babila 4A,Dance Studios,0.0,0,45.466360,9.198580,POINT (9.19858 45.46636)
12880,"Forze Armate, Milan",Virgin Active,Via Carlo Imbonati 24A,"Gyms, Trainers, Pilates",2.0,1,45.502156,9.182671,POINT (9.18267 45.50216)
12881,"Forze Armate, Milan",Nonsolodanza,Via Pastrengo 16,Dance Studios,0.0,0,45.486760,9.185670,POINT (9.18567 45.48676)
12882,"Forze Armate, Milan",Let's Dance,Via Puricelli 20,Dance Studios,5.0,1,45.464980,9.129650,POINT (9.12965 45.46498)


In [12]:
# Removing the dupes
Gyms_nodup = Gyms.drop_duplicates(subset=['Gym Name','Gym Address','Categories','Average Star Rating',
                                                        'Review Count','geometry'])
# Saving the file
Gyms_nodup.to_file("Gyms.geojson", driver="GeoJSON")

Gyms_nodup

Unnamed: 0,Fetch Location,Gym Name,Gym Address,Categories,Average Star Rating,Review Count,Latitude,Longitude,geometry
0,"Parco delle Abbazie, Milan",McFit,Via Pier Francesco Mola 48,Gyms,4.0,1,45.495993,9.139686,POINT (9.13969 45.49599)
1,"Parco delle Abbazie, Milan",Hard Candy Fitness,Via Parini 1,Gyms,3.7,3,45.477589,9.195213,POINT (9.19521 45.47759)
2,"Parco delle Abbazie, Milan",Get Fit Express,Piazza Città di Lombardia 5,Gyms,4.3,3,45.483848,9.192945,POINT (9.19294 45.48385)
3,"Parco delle Abbazie, Milan",Centro balneare Lido Locarno,Via Gioacchino Respini 11,"Gyms, Swimming Pools, Swimming Lessons/Schools",4.7,3,46.161382,8.803369,POINT (8.80337 46.16138)
4,"Parco delle Abbazie, Milan",Tonic,Via Giambellino 5,Gyms,4.0,3,45.451830,9.152850,POINT (9.15285 45.45183)
...,...,...,...,...,...,...,...,...,...
7763,"Parco Nord, Milan",Elena Sokolova,"Via Mentana, 6",Dance Studios,0.0,0,45.577499,9.276400,POINT (9.2764 45.5775)
7764,"Parco Nord, Milan",Studio Pole Dance,"Via Mentana, 6",Dance Studios,0.0,0,45.577499,9.276400,POINT (9.2764 45.5775)
7766,"Parco Nord, Milan",La Palestra S.S.D.R.L.,"Via SAN Gottardo, 58",Dance Studios,0.0,0,45.584100,9.268260,POINT (9.26826 45.5841)
11596,"Triulzo Superiore, Milan","Accademia di Musica e Danza, F. Gaffurio","LG. Carlo Crocetta, 8",Dance Studios,0.0,0,45.358787,9.329503,POINT (9.3295 45.35879)


In [14]:
# DEVONO ESSERE LO STESSO TIPO DI COORDINATE
print(Gyms_nodup.crs)  # Restaurants CRS
print(gdf_combined.crs)  # Neighborhood polygons CRS

EPSG:4326
EPSG:4326


# Join Spaziale

In [16]:
# Perform the spatial join
joined = sjoin(Gyms_nodup, gdf_combined, how="left", predicate="within")

# Check the result
joined

Unnamed: 0,Fetch Location,Gym Name,Gym Address,Categories,Average Star Rating,Review Count,Latitude,Longitude,geometry,index_right,Neighborhood
0,"Parco delle Abbazie, Milan",McFit,Via Pier Francesco Mola 48,Gyms,4.0,1,45.495993,9.139686,POINT (9.13969 45.49599),81.0,Villapizzone
1,"Parco delle Abbazie, Milan",Hard Candy Fitness,Via Parini 1,Gyms,3.7,3,45.477589,9.195213,POINT (9.19521 45.47759),9.0,Brera
2,"Parco delle Abbazie, Milan",Get Fit Express,Piazza Città di Lombardia 5,Gyms,4.3,3,45.483848,9.192945,POINT (9.19294 45.48385),24.0,GARIBALDI REPUBBLICA
3,"Parco delle Abbazie, Milan",Centro balneare Lido Locarno,Via Gioacchino Respini 11,"Gyms, Swimming Pools, Swimming Lessons/Schools",4.7,3,46.161382,8.803369,POINT (8.80337 46.16138),,
4,"Parco delle Abbazie, Milan",Tonic,Via Giambellino 5,Gyms,4.0,3,45.451830,9.152850,POINT (9.15285 45.45183),27.0,Giambellino
...,...,...,...,...,...,...,...,...,...,...,...
7763,"Parco Nord, Milan",Elena Sokolova,"Via Mentana, 6",Dance Studios,0.0,0,45.577499,9.276400,POINT (9.2764 45.5775),,
7764,"Parco Nord, Milan",Studio Pole Dance,"Via Mentana, 6",Dance Studios,0.0,0,45.577499,9.276400,POINT (9.2764 45.5775),,
7766,"Parco Nord, Milan",La Palestra S.S.D.R.L.,"Via SAN Gottardo, 58",Dance Studios,0.0,0,45.584100,9.268260,POINT (9.26826 45.5841),,
11596,"Triulzo Superiore, Milan","Accademia di Musica e Danza, F. Gaffurio","LG. Carlo Crocetta, 8",Dance Studios,0.0,0,45.358787,9.329503,POINT (9.3295 45.35879),,


In [18]:
joined[joined['Gym Name']=='McFit'] # giusto

Unnamed: 0,Fetch Location,Gym Name,Gym Address,Categories,Average Star Rating,Review Count,Latitude,Longitude,geometry,index_right,Neighborhood
0,"Parco delle Abbazie, Milan",McFit,Via Pier Francesco Mola 48,Gyms,4.0,1,45.495993,9.139686,POINT (9.13969 45.49599),81.0,Villapizzone
800,"Bicocca, Milan",McFit,Via Luisa Battistotti Sassi 11,Gyms,0.0,0,45.46338,9.227671,POINT (9.22767 45.46338),18.0,Corsica


In [20]:
joined.nunique()

Fetch Location          22
Gym Name               747
Gym Address            808
Categories             170
Average Star Rating     27
Review Count            14
Latitude               808
Longitude              806
geometry               811
index_right             58
Neighborhood            58
dtype: int64

In [28]:
# Remove rows where the geometry is NaN
PolyGyms = joined[~joined["Neighborhood"].isna()]

# Reset the index if needed
PolyGyms = PolyGyms.reset_index(drop=True)

# Keeping only relevant variables
PolyGyms = PolyGyms[["Gym Name", "Gym Address", "Categories", "Average Star Rating",
                           "Review Count", "geometry", "Neighborhood"]].rename(columns={"Gym Name": "Nome",
                                                                                       "Gym Address": "Indirizzo"})
PolyGyms["info"] = "PALESTRA"

# Saving the file
PolyGyms.to_file("PolyGyms.geojson", driver="GeoJSON")

# Print the cleaned GeoDataFrame
PolyGyms

Unnamed: 0,Nome,Indirizzo,Categories,Average Star Rating,Review Count,geometry,Neighborhood,info
0,McFit,Via Pier Francesco Mola 48,Gyms,4.0,1,POINT (9.13969 45.49599),Villapizzone,PALESTRA
1,Hard Candy Fitness,Via Parini 1,Gyms,3.7,3,POINT (9.19521 45.47759),Brera,PALESTRA
2,Get Fit Express,Piazza Città di Lombardia 5,Gyms,4.3,3,POINT (9.19294 45.48385),GARIBALDI REPUBBLICA,PALESTRA
3,Tonic,Via Giambellino 5,Gyms,4.0,3,POINT (9.15285 45.45183),Giambellino,PALESTRA
4,Milanimal,Via Lodovico Muratori 34,"Martial Arts, Gyms",4.8,4,POINT (9.20972 45.45129),Porta Romana,PALESTRA
...,...,...,...,...,...,...,...,...
324,Level 2,Via Pitteri 8,"Martial Arts, Gyms",4.0,1,POINT (9.24887 45.47333),Lambrate,PALESTRA
325,Associazione Sportiva Dilettantistica Lo Studi...,"Via Rutilia, 16",Dance Studios,0.0,0,POINT (9.19761 45.44014),Ex Om - Morivione,PALESTRA
326,Tilt Spazio Danza,"Via Averardo Buschi, 1",Dance Studios,0.0,0,POINT (9.23254 45.48233),Città Studi,PALESTRA
327,A.s.fitness Professionals FIA,"Via Monte Nevoso, 17","Sports Clubs, Fitness & Instruction",0.0,0,POINT (9.23403 45.48763),Loreto,PALESTRA
