In [None]:
import pandas as pd
import geopandas as gpd
import osmnx as ox

### Auxiliary function, extracted from the Enrichment class, used to generate a POI dataset from OSM data.

In [None]:
def download_poi_osm(list_pois: list[str], place: str, write_intermediate_files: bool = False) -> gpd.GeoDataFrame:

    # Final list of the columns that are expected to be found in the POI dataframe.
    list_columns_df_poi = ['osmid', 'element_type', 'name', 'name:en', 'wikidata', 'geometry', 'category']

    # Here we download the POIs from OSM if the list of types of POIs is not empty.
    gdf = gpd.GeoDataFrame(columns=list_columns_df_poi, crs="EPSG:4326")
    if list_pois:

        print(f"Downloading POIs from OSM for the location {place}...")
        for key in list_pois:

            # downloading POI
            print(f"Downloading {key} POIs from OSM...")
            poi = ox.features_from_place(place, tags={key: True})
            print(f"Download completed!")

            # Immediately return the empty dataframe if it doesn't contain any suitable POI...
            if poi.empty:
                print(f"No POI found for category {key}!")
                break

            # Remove the POIs that do not have a name.
            poi.reset_index(inplace=True)
            poi.drop(columns='category', inplace = True, errors='ignore') # Delete the column 'category' if it exists.
            poi.rename(columns={key: 'category'}, inplace=True)
            poi.drop(columns = poi.columns.difference(list_columns_df_poi), inplace=True)
            poi = poi.loc[~poi['name'].isna()]
            poi['category'].replace({'yes': key}, inplace=True)

            # Now write out this subset of POIs to a file.
            if write_intermediate_files: poi.to_parquet('./' + key + '.parquet')

            # And finally, concatenate this subset of POIs to the other POIs
            # that have been added to the main dataframe so far.
            gdf = pd.concat([gdf, poi])

        gdf.reset_index(drop=True, inplace=True)
        return gdf

### Use the above function to generate a POI dataset from OpenStreetMap data

Note: provide (1) a list of categories, (2) the place containing the POIs, and (3) the path to the output file.

In [None]:
list_POI_cats = ['amenity', 'shop', 'tourism']
place = 'Rome, Italy'
path_output = './pois.parquet'

gdf = download_poi_osm(list_POI_cats, place, True)
gdf.info()

gdf.to_parquet(path_output)