In [32]:
import pandas as pd
import geopandas as gpd
import mercantile
from tqdm import tqdm
import os
import tempfile
import requests

In [None]:
def get_bounds(geom):
    """
    Get bounds of a GeoJSON geometry.
    """
    from shapely import geometry
    aoi_shape = geometry.shape(aoi_geom)
    minx, miny, maxx, maxy = aoi_shape.bounds
    return (minx, miny, maxx, maxy)

# Geometry copied from https://geojson.io
# aoi_geom = {
#     "coordinates": [
#         [
#             [-122.16484503187519, 47.69090474454916],
#             [-122.16484503187519, 47.6217555345674],
#             [-122.06529607517405, 47.6217555345674],
#             [-122.06529607517405, 47.69090474454916],
#             [-122.16484503187519, 47.69090474454916],
#         ]
#     ],
#     "type": "Polygon",
# }
aoi_geom = {
    "coordinates": [
        [
            [-76.15741548689954, 43.05692144640927], 
            [-76.15741548689954, 43.05635088078997],  
            [-76.15648427005196, 43.05635088078997],  
            [-76.15648427005196, 43.05692144640927],  
            [-76.15741548689954, 43.05692144640927], 
        ]
    ],
    "type": "Polygon"
}
aoi_shape = geometry.shape(aoi_geom)

minx, miny, maxx, maxy = aoi_shape.bounds

In [None]:
# zoom value 9 to match https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv 
quad_keys = set()
for tile in list(mercantile.tiles(minx, miny, maxx, maxy, zooms=9)):
    quad_keys.add(mercantile.quadkey(tile))
quad_keys = list(quad_keys)
print(f"The input area spans {len(quad_keys)} tiles: {quad_keys}")

The input area spans 1 tiles: ['030232211']


In [21]:
df = pd.read_csv(
    "https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv", dtype=str
)
df.head()

Unnamed: 0,Location,QuadKey,Url,Size,UploadDate
0,Abyei,122320113,https://minedbuildings.z5.web.core.windows.net...,74.5KB,2025-02-28
1,Abyei,122320131,https://minedbuildings.z5.web.core.windows.net...,8.3KB,2025-02-28
2,Abyei,122321002,https://minedbuildings.z5.web.core.windows.net...,392.2KB,2025-02-28
3,Abyei,122321003,https://minedbuildings.z5.web.core.windows.net...,72.8KB,2025-02-28
4,Abyei,122321020,https://minedbuildings.z5.web.core.windows.net...,1.2MB,2025-02-28


In [None]:
idx = 0
combined_gdf = gpd.GeoDataFrame()
with tempfile.TemporaryDirectory() as tmpdir:
    # Download the GeoJSON files for each tile that intersects the input geometry
    tmp_fns = []
    for quad_key in tqdm(quad_keys):
        rows = df[df["QuadKey"] == quad_key]
        if rows.shape[0] == 1:
            url = rows.iloc[0]["Url"]

            # Save JSON in the same directory as the notebook
            json_fn = os.path.join('/Users/huajunchen/Library/Project/Python/segment-geo/segment_geospatial_api/notebook/global_buildings', f"{quad_key}_raw.json")
            if not os.path.exists(json_fn):
                response = requests.get(url)
                # 检查响应状态
                response.raise_for_status()
                # 验证内容是否为空
                if not response.text.strip():
                    raise ValueError(f"Empty response from URL: {url}")
                # 保存原始响应
                with open(json_fn, 'w') as f:
                    f.write(response.text)
            # 尝试逐行读取JSON
            try:
                df2 = pd.read_json(json_fn, lines=True)
            except ValueError as e:
                print(f"Error reading {json_fn}: {str(e)}")
                # 如果读取失败，尝试重新下载
                response = requests.get(url)
                response.raise_for_status()
                with open(json_fn, 'w') as f:
                    f.write(response.text)
                df2 = pd.read_json(json_fn, lines=True)
            df2["geometry"] = df2["geometry"].apply(geometry.shape)

            gdf = gpd.GeoDataFrame(df2, crs=4326)
            fn = os.path.join(tmpdir, f"{quad_key}.geojson")
            tmp_fns.append(fn)
            if not os.path.exists(fn):
                gdf.to_file(fn, driver="GeoJSON")
        elif rows.shape[0] > 1:
            raise ValueError(f"Multiple rows found for QuadKey: {quad_key}")
        else:
            raise ValueError(f"QuadKey not found in dataset: {quad_key}")

    # Merge the GeoJSON files into a single file
    for fn in tmp_fns:
        gdf = gpd.read_file(fn)  # Read each file into a GeoDataFrame
        gdf = gdf[gdf.geometry.within(aoi_shape)]  # Filter geometries within the AOI
        gdf['id'] = range(idx, idx + len(gdf))  # Update 'id' based on idx
        idx += len(gdf)
        combined_gdf = pd.concat([combined_gdf,gdf],ignore_index=True)

  0%|          | 0/1 [00:02<?, ?it/s]


ValueError: Expected object or value

In [29]:
output_fn = "example_building_footprints.geojson"
combined_gdf = combined_gdf.to_crs('EPSG:4326')
combined_gdf.to_file(output_fn, driver='GeoJSON')