In [18]:
#!pip install osm2geojson

In [21]:
#full domain
import requests
import geopandas as gpd
from shapely.geometry import MultiPolygon
from shapely.ops import unary_union
import osm2geojson  # pip install osm2geojson

# Bounding box for the larger Paris area
south, west = 48.783461, 2.197275
north, east = 48.9291117, 2.4811851

# Overpass query: includes both ways and relations
overpass_url = "https://overpass-api.de/api/interpreter"
query = f"""
[out:json][timeout:180];
(
  way["building"]({south},{west},{north},{east});
  relation["building"]({south},{west},{north},{east});
);
out body;
>;
out skel qt;
"""

# Send the request
print("⏳ Sending Overpass API request...")
response = requests.get(overpass_url, params={"data": query})
data = response.json()

# Convert to valid GeoJSON (handles multipolygons properly)
print("🔄 Converting OSM to GeoJSON...")
geojson = osm2geojson.json2geojson(data)

# Load into GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(geojson["features"])
gdf.set_crs("EPSG:4326", inplace=True)

# Optional: Clean invalid geometries
print("🧼 Fixing invalid geometries...")
gdf["geometry"] = gdf["geometry"].buffer(0)

# Merge all overlapping/touching buildings
print("🔗 Merging geometries...")
merged = unary_union(gdf["geometry"])

# Handle single vs multiple polygons
if isinstance(merged, MultiPolygon):
    merged_gdf = gpd.GeoDataFrame(geometry=list(merged.geoms), crs=gdf.crs)
else:
    merged_gdf = gpd.GeoDataFrame(geometry=[merged], crs=gdf.crs)

# Save to file
output_path = "paris_buildings_large_merged.gpkg"
merged_gdf.to_file(output_path, driver="GPKG")

print(f"✅ Done! Merged {len(gdf)} buildings into {len(merged_gdf)} shapes.")
print(f"📦 Saved to: {output_path}")


⏳ Sending Overpass API request...
🔄 Converting OSM to GeoJSON...
🧼 Fixing invalid geometries...
🔗 Merging geometries...


  collections = lib.create_collection(


✅ Done! Merged 484071 buildings into 160762 shapes.
📦 Saved to: paris_buildings_large_merged.gpkg


In [23]:
ll -h

total 194320
drwxr-xr-x    4 gabeiras3j  staff   128B Apr 22 10:28 [1m[36m__pycache__[m[m/
drwxr-xr-x  288 gabeiras3j  staff   9.0K Apr 23 12:59 [1m[36mcache[m[m/
-rw-r--r--    1 gabeiras3j  staff    44K Apr 16 10:04 downloaded_log.txt
-rw-r--r--    1 gabeiras3j  staff    21K Apr 22 10:28 downloader_updated.py
-rw-r--r--    1 gabeiras3j  staff   3.6M Apr 23 12:40 get_lidar_paris.ipynb
-rw-r--r--    1 gabeiras3j  staff   727K Apr 23 12:40 get_lidar_paris_clean01-test01.ipynb
-rw-r--r--    1 gabeiras3j  staff   714K Apr 23 12:40 get_lidar_paris_clean01.ipynb
drwxr-xr-x    3 gabeiras3j  staff    96B Apr 22 10:23 [1m[36mglobus_utils[m[m/
-rw-r--r--    1 gabeiras3j  staff   1.0M Apr 16 17:35 lambda_b_by_cell_Paris_MS.pkl
-rw-r--r--    1 gabeiras3j  staff   201K Apr 16 17:15 lambda_b_updated_Paris.tif
-rw-r--r--    1 gabeiras3j  staff   201K Apr 21 11:22 lambda_b_updated_Paris_lidar.tif
-rw-r--r--    1 gabeiras3j  staff   196K Apr 21 09:32 lambda_b_updated_Paris_lidar.tif2nd
-rw-

In [19]:
import requests
import geopandas as gpd
from shapely.geometry import MultiPolygon
from shapely.ops import unary_union
import osm2geojson  # pip install osm2geojson

# Overpass query: includes ways and relations
overpass_url = "https://overpass-api.de/api/interpreter"
query = """
[out:json][timeout:25];
(
  way["building"](48.8519,2.3266,48.8659,2.3449);
  relation["building"](48.8519,2.3266,48.8659,2.3449);
);
out body;
>;
out skel qt;
"""

# Send request
response = requests.get(overpass_url, params={"data": query})
data = response.json()

# Convert to GeoJSON
geojson = osm2geojson.json2geojson(data)

# Load into GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(geojson["features"])
gdf.set_crs("EPSG:4326", inplace=True)

# Optional: Clean invalid geometries
gdf["geometry"] = gdf["geometry"].buffer(0)

# Merge all touching/overlapping buildings
merged = unary_union(gdf["geometry"])

# Handle result: single or multi geometry
if isinstance(merged, MultiPolygon):
    merged_gdf = gpd.GeoDataFrame(geometry=[geom for geom in merged.geoms], crs=gdf.crs)
else:
    merged_gdf = gpd.GeoDataFrame(geometry=[merged], crs=gdf.crs)

# Save to GeoPackage
merged_gdf.to_file("paris_buildings_merged.gpkg", driver="GPKG")
print(f"✅ Merged {len(gdf)} buildings into {len(merged_gdf)} merged shapes.")


✅ Merged 2789 buildings into 261 merged shapes.


In [14]:
import requests
import geopandas as gpd
from shapely.geometry import shape
import osm2geojson  # pip install osm2geojson

# Overpass query: includes all building geometries
overpass_url = "https://overpass-api.de/api/interpreter"
query = """
[out:json][timeout:25];
(
  way["building"](48.8519,2.3266,48.8659,2.3449);
  relation["building"](48.8519,2.3266,48.8659,2.3449);
);
out body;
>;
out skel qt;
"""

# Fetch data
response = requests.get(overpass_url, params={"data": query})
data = response.json()

# Convert to GeoJSON (handles relations properly)
geojson = osm2geojson.json2geojson(data)

# Create GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(geojson["features"])
gdf.set_crs("EPSG:4326", inplace=True)

# Merge all geometries into one MultiPolygon
gdf_merged = gdf.dissolve()

# Save
gdf_merged.to_file("paris_buildings_merged.gpkg", driver="GPKG")
print("✅ Done! Saved merged building geometry.")


ModuleNotFoundError: No module named 'osm2geojson'

In [16]:
import requests
import geopandas as gpd
from shapely.geometry import shape
from shapely.geometry import MultiPolygon

# Overpass Turbo-style query
overpass_url = "https://overpass-api.de/api/interpreter"
query = """
[out:json][timeout:25];
(
  way["building"](48.8519,2.3266,48.8659,2.3449);
  relation["building"](48.8519,2.3266,48.8659,2.3449);
);
out body;
>;
out skel qt;
"""


# Send request
response = requests.get(overpass_url, params={"data": query})
data = response.json()

# Convert to GeoDataFrame
elements = data['elements']
nodes = {el['id']: (el['lon'], el['lat']) for el in elements if el['type'] == 'node'}

features = []
for el in elements:
    if el['type'] == 'way' and 'nodes' in el:
        coords = [nodes[nid] for nid in el['nodes'] if nid in nodes]
        if len(coords) >= 3:
            try:
                features.append(shape({"type": "Polygon", "coordinates": [coords]}))
            except Exception:
                pass


gdf = gpd.GeoDataFrame(geometry=features, crs="EPSG:4326")


# Optional: clean geometries (fix invalid ones)
gdf["geometry"] = gdf["geometry"].buffer(0)

# Merge all touching (adjacent or overlapping) buildings
merged = gdf.unary_union  # returns a single MultiPolygon or Polygon

# If result is a MultiPolygon, turn it into separate rows
if isinstance(merged, MultiPolygon):
    merged_gdf = gpd.GeoDataFrame(geometry=[geom for geom in merged.geoms], crs=gdf.crs)
else:
    merged_gdf = gpd.GeoDataFrame(geometry=[merged], crs=gdf.crs)


merged_gdf.to_file("paris_buildings_fast_merged.gpkg", driver="GPKG")
print("✅ Done! Like Overpass Turbo, but in Python.")


  merged = gdf.unary_union  # returns a single MultiPolygon or Polygon


✅ Done! Like Overpass Turbo, but in Python.


In [13]:
import requests
import geopandas as gpd
from shapely.geometry import shape

# Overpass Turbo-style query
overpass_url = "https://overpass-api.de/api/interpreter"
query = """
[out:json][timeout:25];
(
  way["building"](48.8519,2.3266,48.8659,2.3449);
  relation["building"](48.8519,2.3266,48.8659,2.3449);
);
out body;
>;
out skel qt;
"""


# Send request
response = requests.get(overpass_url, params={"data": query})
data = response.json()

# Convert to GeoDataFrame
elements = data['elements']
nodes = {el['id']: (el['lon'], el['lat']) for el in elements if el['type'] == 'node'}

features = []
for el in elements:
    if el['type'] == 'way' and 'nodes' in el:
        coords = [nodes[nid] for nid in el['nodes'] if nid in nodes]
        if len(coords) >= 3:
            try:
                features.append(shape({"type": "Polygon", "coordinates": [coords]}))
            except Exception:
                pass

gdf = gpd.GeoDataFrame(geometry=features, crs="EPSG:4326")
gdf.to_file("paris_buildings_fast.gpkg", driver="GPKG")
print("✅ Done! Like Overpass Turbo, but in Python.")


✅ Done! Like Overpass Turbo, but in Python.


In [11]:
import osmnx as ox
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm

# Settings
ox.settings.log_console = False
ox.settings.use_cache = True

# Bounding box for Paris area
north = 48.865924303683215
south = 48.85196374476617
west = 2.326650288914438
east = 2.3448515182972

# Tile size in degrees (smaller = more tiles, but safer)
step = 0.001  # ~100 m at this latitude

# Generate grid
lats = np.arange(south, north, step)
lons = np.arange(west, east, step)

# Collect buildings
all_buildings = []

# Loop with tqdm
for lat in tqdm(lats, desc="Latitude tiles"):
    for lon in lons:
        bbox = (lat + step, lat, lon + step, lon)
        try:
            gdf = ox.features_from_bbox(bbox=bbox, tags={"building": True})
            if not gdf.empty:
                all_buildings.append(gdf)
        except Exception as e:
            print(f"⚠️ Failed at tile ({lat:.4f}, {lon:.4f}): {e}")

# Merge and save
if all_buildings:
    gdf_all = gpd.GeoDataFrame(pd.concat(all_buildings, ignore_index=True))
    gdf_all.to_file("paris_buildings.gpkg", driver="GPKG")
    print("✅ Done! Saved to paris_buildings.gpkg")
else:
    print("❌ No buildings found or all tiles failed.")


  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)
Latitude tiles:   0%|                                    | 0/14 [03:41<?, ?it/s]


KeyboardInterrupt: 

In [8]:
import osmnx as ox

# Define bounding box for the area in Paris
north = 48.865924303683215
south = 48.85196374476617
west = 2.326650288914438
east = 2.3448515182972
bbox = (north, south, east, west)

# Define the tag for OSM buildings
tags = {"building": True}

# Retrieve building footprints within the bounding box
gdf = ox.features_from_bbox(bbox=bbox, tags=tags)

# Save to GeoPackage
gdf.to_file("paris_buildings.gpkg", driver="GPKG")
print("✅ Done! Exported to paris_buildings.gpkg")


  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


KeyboardInterrupt: 

In [3]:
import osmnx as ox
print(ox.__version__)


2.0.2
