# Use case 1: Ingesting and Filtering Data from OpenStreetMap

In [8]:
import os
import urllib.request
import quackosm
import osmnx as ox

In [9]:
# Define Paths and URLs
RAW_DATA_DIR = '../data/raw'
PROCESSED_DATA_DIR = '../data/processed'

# The PBF file will be downloaded from this URL
PBF_URL = "https://download.geofabrik.de/europe/italy-latest.osm.pbf"
PBF_FILENAME = PBF_URL.split('/')[-1]
PBF_FILEPATH = os.path.join(RAW_DATA_DIR, PBF_FILENAME)

# The final processed file
OUTPUT_FILE = os.path.join(PROCESSED_DATA_DIR, 'milan_buildings.geoparquet')

# Create Directories and Download Data (if needed)
os.makedirs(RAW_DATA_DIR, exist_ok=True)
os.makedirs(PROCESSED_DATA_DIR, exist_ok=True)

# Check if the PBF file already exists before downloading
if not os.path.exists(PBF_FILEPATH):
    print(f"PBF file not found. Downloading from {PBF_URL}...")
    urllib.request.urlretrieve(PBF_URL, PBF_FILEPATH)
    print(f"Download complete. File saved to {PBF_FILEPATH}")
else:
    print(f"PBF file already exists at {PBF_FILEPATH}. Skipping download.")

# Define the Area of Interest (Milan's boundary)
print("\nFetching the boundary for Milan.")
place_name = "Milan, Italy"
milan_gdf = ox.geocode_to_gdf(place_name)
print("Boundary fetched.")

# Filter the PBF file
print(f"Starting building extraction from PBF file: {PBF_FILEPATH}.")

# Create the reader object by passing the FILTERS for configuration.
pbf_reader = quackosm.PbfFileReader(
    geometry_filter=milan_gdf.geometry.iloc[0],
    tags_filter={'building': True}
)

buildings_gdf = pbf_reader.convert_pbf_to_geodataframe(PBF_FILEPATH)

print(f"Extraction complete. Found {len(buildings_gdf)} buildings.")

# Save to GeoParquet format
print(f"Saving data to {OUTPUT_FILE}...")
buildings_gdf.to_parquet(OUTPUT_FILE)

print(f"\nDone! The GeoParquet file was created successfully at: {OUTPUT_FILE}.")

PBF file already exists at ../data/raw\italy-latest.osm.pbf. Skipping download.

Fetching the boundary for Milan...


Output()

Boundary fetched.
Starting building extraction from PBF file: ../data/raw\italy-latest.osm.pbf


Extraction complete. Found 62133 buildings.
Saving data to ../data/processed\milan_buildings.geoparquet...

Done! The GeoParquet file was created successfully at: ../data/processed\milan_buildings.geoparquet
