In [5]:
import geopandas as gpd
import networkx as nx
import osmnx as ox

In [6]:
gdf = gpd.read_file("../data/Roads/edges_clean.gpkg")
gdf = gdf.copy()

In [7]:
# filtered = gdf[
#     (
#         gdf["access"].isna() |
#         gdf["access"].isin(['yes', 'permissive', 'designated', 'customers', 'destination']) |
#         gdf["access"].astype(str).str.contains("yes", na=False) |
#         gdf["access"].astype(str).str.contains("permissive", na=False) |
#         gdf["access"].astype(str).str.contains("destination", na=False)
#     ) &
#     (gdf["tunnel"] != 'yes') &
#     ~((gdf["highway"] == 'unclassified') & (gdf["access"].isna()))
# ]

# print(f"属性筛选后剩余边数: {len(filtered)}")

In [8]:
# 构建 NetworkX 图并保留最大联通分量
# 确保有 u 和 v 列（节点 ID）
if "u" not in gdf.columns or "v" not in gdf.columns:
    raise ValueError("边数据中必须包含 'u' 和 'v' 列来表示网络节点")

G = nx.Graph()
for idx, row in gdf.iterrows():
    G.add_edge(row['u'], row['v'], index=idx)

# 提取所有连通分量
subgraphs = list(nx.connected_components(G))

# 设定阈值，比如：节点数 ≥ 300
kept_nodes = set()
for component in subgraphs:
    if len(component) >= 300:
        kept_nodes.update(component)

# 保留属于这些子图的边
filtered_final = gdf[
    gdf['u'].isin(kept_nodes) & gdf['v'].isin(kept_nodes)
]

print(f"最大联通子图内的边数: {len(filtered_final)}")

最大联通子图内的边数: 248066


In [9]:
output_path = "../data/Roads/edges_final.gpkg"
filtered_final.to_file(output_path, driver="GPKG")

print(f"已导出为 GPKG 文件：{output_path}")

已导出为 GPKG 文件：../data/Roads/edges_final.gpkg
