<a href="https://colab.research.google.com/github/Van-Wu1/cycle/blob/main/scr/py/s3_index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ============= 安装依赖（Colab 第一格运行） =============
!pip -q install geopandas shapely pyproj fiona rtree python-igraph tqdm

In [2]:
# ============= 导入包 =============
import geopandas as gpd
import numpy as np
import igraph as ig
from tqdm.auto import tqdm
import random

In [15]:
from google.colab import drive
drive.mount('/content/drive')
!ls '/content/drive/MyDrive/CASA0004_Cycling/data'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
BoroughShp  GreatLondonShp  s1	s2_Env	s3


In [16]:
# ============= 参数区（按需改） =============
# 输入：伦敦范围已裁剪好的路网（线要素）
IN_GPKG = "/content/drive/MyDrive/CASA0004_Cycling/data/s3/emptyroad/edges_s3.gpkg"
# test
#IN_GPKG = "/content/drive/MyDrive/CASA0004_Cycling/data/s3/emptyroad/CQIwithoutslop9.geojson"
IN_LAYER = None  # 如果是 GPKG，多图层时填具体图层名；单图层或 geojson 填 None

# 输出
OUT_GEOJSON = "/content/drive/MyDrive/CASA0004_Cycling/data/s3/export/cen_s3.gpkg"

In [17]:
ziduan = gpd.read_file(IN_GPKG)
print(ziduan.columns)

Index(['id', 'name', 'way_type', 'geometry'], dtype='object')


In [19]:
# 长度权重字段名（如果没有，就用 geometry.length）
LEN_FIELD = "metres"    # 若没有该字段，会自动改用 geometry.length
TOL = 1.0               # 端点量化容差（米），用于“吸附”断点
SEED = 42               # 随机种子

# betweenness 计算模式
BET_MODE = "approx"     # "exact" 或 "approx"
K_SAMPLES = 1200        # 近似模式下采样源点数量（建议：500~3000 之间按机器调）

# closeness 选项
CLOSENESS_HARMONIC = True   # 非连通图建议用 harmonic 口径

In [20]:
# ============= 读取与预处理 =============
if IN_LAYER:
    roads = gpd.read_file(IN_GPKG, layer=IN_LAYER)
else:
    roads = gpd.read_file(IN_GPKG)

# 修正/设置 CRS（OpenMapping 通常是 EPSG:27700）
if roads.crs is None:
    roads = roads.set_crs(27700)
elif str(roads.crs).endswith("4326"):
    # 若误读成经纬度，通常需要改回 27700；你也可以根据 bounds 判断再 set_crs
    roads = roads.set_crs(27700, allow_override=True)

# 单部件化，清理空几何
roads = roads.explode(index_parts=False, ignore_index=True)
roads = roads[~roads.geometry.is_empty & roads.geometry.notna()].copy()

# 生成长度字段
if LEN_FIELD in roads.columns:
    roads["length_m"] = roads[LEN_FIELD].astype(float)
else:
    roads["length_m"] = roads.geometry.length

In [21]:
# ============= 建图：端点 -> 节点；路段 -> 边（带 orig_road 映射） =============
def qpt(xy, tol=TOL):
    return (round(xy[0] / tol) * tol, round(xy[1] / tol) * tol)

# 清理无效/零长度几何
roads = roads[roads.geometry.notna() & (~roads.geometry.is_empty)].copy()
geom_len = roads.geometry.length
bad_zero = (geom_len <= 0)
if bad_zero.any():
    print(f"[WARN] 发现 {bad_zero.sum()} 条零长度几何，已剔除。")
    roads = roads[~bad_zero].copy()
roads = roads.reset_index(drop=True)

node_index = {}
nodes_xy = []
edges_uv = []
edge_weights = []
edge_orig_row = []

for i, (geom, w) in enumerate(tqdm(zip(roads.geometry, roads["length_m"]), total=len(roads), desc="Build graph")):
    coords = list(geom.coords)
    u_xy = qpt(coords[0])
    v_xy = qpt(coords[-1])
    # 顶点去重并编号
    for xy in (u_xy, v_xy):
        if xy not in node_index:
            node_index[xy] = len(nodes_xy)
            nodes_xy.append(xy)
    u = node_index[u_xy]; v = node_index[v_xy]
    edges_uv.append((u, v))
    edge_weights.append(float(w) if float(w) > 0 else 1e-6)  # 防 0
    edge_orig_row.append(i)

# 建图（允许多重边）
g = ig.Graph()
g.add_vertices(len(nodes_xy))
g.add_edges(edges_uv)
g.es["length"] = edge_weights
g.es["orig_road"] = edge_orig_row
g.vs["xy"] = nodes_xy

# 是否只保留最大连通子图
USE_GIANT = False  # ← 如需减少计算量可设 True，但非 giant 部分会成为 NaN
if USE_GIANT:
    comps = g.clusters()
    gi = comps.giant()
    # giant 子图仍然保留了 es/ vs 的属性（包括 orig_road / xy）
    G = gi
    print(f"[INFO] 使用 giant 子图：V={G.vcount()} E={G.ecount()} / 原图 V={g.vcount()} E={g.ecount()}")
else:
    G = g
    print(f"[INFO] 使用全图：V={G.vcount()} E={G.ecount()}")

Build graph:   0%|          | 0/164138 [00:00<?, ?it/s]

[INFO] 使用全图：V=184964 E=164138


In [None]:
# ============= Closeness（健壮版：优先 harmonic_centrality；否则分量内 closeness） =============
import numpy as np

try:
    # 新版 igraph：直接有 harmonic_centrality
    closeness_nodes = np.array(G.harmonic_centrality(weights="length"), dtype=float)
    print("[INFO] 使用 G.harmonic_centrality(weights='length')")
except Exception as e:
    # 兼容老版本：按连通分量分别计算标准 closeness，再拼回整图
    print(f"[INFO] harmonic_centrality 不可用，改为分连通分量的 closeness；原因：{type(e).__name__}: {e}")
    comps = G.clusters()  # VertexClustering
    closeness_nodes = np.zeros(G.vcount(), dtype=float)
    for comp in comps:  # comp 是该分量内的顶点索引列表
        if len(comp) == 1:
            # 单点分量，closeness 置 0
            closeness_nodes[comp[0]] = 0.0
            continue
        H = G.induced_subgraph(comp)
        vals = np.array(H.closeness(weights="length"), dtype=float)
        # 把分量内结果写回整图对应顶点
        closeness_nodes[np.array(comp, dtype=int)] = vals

# 映射到边：两端点均值
s_idx = np.fromiter((e.tuple[0] for e in G.es), dtype=int, count=G.ecount())
t_idx = np.fromiter((e.tuple[1] for e in G.es), dtype=int, count=G.ecount())
closeness_edges = ((closeness_nodes[s_idx] + closeness_nodes[t_idx]) / 2.0).astype(float)

# 写到边属性，字段名保持与你之前一致
G.es["closeness_hc"] = closeness_edges.tolist()

In [11]:
# ============= Betweenness（边） =============
random.seed(SEED)
np.random.seed(SEED)

BET_MODE = BET_MODE  # 沿用你参数区
K_SAMPLES = min(int(K_SAMPLES), max(1, G.vcount()))

if BET_MODE == "exact":
    with tqdm(total=1, desc="Edge betweenness (exact)") as pbar:
        eb = G.edge_betweenness(weights="length")
        G.es["betweenness_edge"] = eb
        pbar.update(1)
else:
    # 近似：随机抽取 K_SAMPLES 个源点，统计最短路经过次数
    counts = np.zeros(G.ecount(), dtype=np.float64)
    all_nodes = list(range(G.vcount()))
    sources = random.sample(all_nodes, K_SAMPLES)

    for s in tqdm(sources, desc=f"Edge betweenness approx (K={K_SAMPLES})"):
        epaths = G.get_shortest_paths(s, to=all_nodes, weights="length", output="epath")
        for epath in epaths:
            if not epath:  # 自身或不可达
                continue
            counts[epath] += 1.0

    counts /= K_SAMPLES
    G.es["betweenness_edge"] = counts.tolist()

Edge betweenness approx (K=1200):   0%|          | 0/1200 [00:00<?, ?it/s]

  epaths = G.get_shortest_paths(s, to=all_nodes, weights="length", output="epath")


In [12]:
# ============= 回写到 GeoDataFrame（基于 orig_road 一一对应） =============
# 先初始化为 NaN
roads["edge_betweenness"] = np.nan
roads["edge_closeness_hc"] = np.nan

# 用图中边的 orig_road 进行精准定位回写
orig_idx = np.array(G.es["orig_road"])
bet_vals = np.array(G.es["betweenness_edge"], dtype=float)
clo_vals = np.array(G.es["closeness_hc"], dtype=float)

# 有些边可能因为 giant/过滤等不在 G 中，这里只回写存在的
roads.loc[orig_idx, "edge_betweenness"] = bet_vals
roads.loc[orig_idx, "edge_closeness_hc"] = clo_vals

# 统计报告
tot = len(roads)
nb_nan_bet = roads["edge_betweenness"].isna().sum()
nb_nan_clo = roads["edge_closeness_hc"].isna().sum()
print(f"[STAT] 回写完成：总边 {tot:,}")
print(f"       betweenness  NaN: {nb_nan_bet:,}  ({nb_nan_bet/tot:.1%})")
print(f"       closeness_hc NaN: {nb_nan_clo:,}  ({nb_nan_clo/tot:.1%})")

# 额外提示：如果 NaN 仍然很多，请检查：
# 1) 是否开启了 USE_GIANT=True（非 giant 部分必为 NaN）
# 2) 是否仍存在零长度几何（已做剔除/兜底）
# 3) CRS 是否为 EPSG:27700（否则长度失真）
# 4) 是否还有 MultiLineString 未 explode（上面已 explode 过）

[STAT] 回写完成：总边 6,555
       betweenness  NaN: 0  (0.0%)
       closeness_hc NaN: 0  (0.0%)


In [13]:
# ============= 导出 =============
roads.to_file(OUT_GEOJSON, driver="gpkg")
print("Saved:", OUT_GEOJSON)
print("Rows:", len(roads))

Saved: /content/drive/MyDrive/CASA0004_Cycling/data/s3/export/cen_s3.gpkg
Rows: 6555


In [14]:
print(roads.columns)

Index(['id', 'name', 'way_type', 'index', 'index_10', 'stress_level', 'offset',
       'side', 'proc_width', 'proc_surface', 'proc_smoothness', 'proc_oneway',
       'proc_sidepath', 'proc_highway', 'proc_maxspeed',
       'proc_traffic_mode_left', 'proc_traffic_mode_right',
       'proc_separation_left', 'proc_separation_right', 'proc_buffer_left',
       'proc_buffer_right', 'proc_mandatory', 'proc_traffic_sign', 'fac_width',
       'fac_surface', 'fac_highway', 'fac_maxspeed', 'base_index', 'fac_1',
       'fac_2', 'fac_3', 'fac_4', 'data_bonus', 'data_malus',
       'data_incompleteness', 'data_missing', 'data_missing_width',
       'data_missing_surface', 'data_missing_smoothness',
       'data_missing_maxspeed', 'data_missing_parking', 'data_missing_lit',
       'filter_usable', 'filter_way_type', 'proc_slope', 'fac_5', 'geometry',
       'length_m', 'edge_betweenness', 'edge_closeness_hc'],
      dtype='object')
