<a href="https://colab.research.google.com/github/Van-Wu1/cycle/blob/main/scr/py/s1_road_speed_trans.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')
!ls '/content/drive/MyDrive/CASA0004_Cycling/data'

Mounted at /content/drive
BoroughShp  GreatLondonShp  s1	s2_Env	s3


In [15]:
import json
import re
import os
import geopandas as gpd

from pyproj import CRS

In [19]:
# 输入/输出文件夹路径
in_dir = "/content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTcleanedV2"
out_dir = "/content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted"
os.makedirs(out_dir, exist_ok=True)

shp = gpd.read_file("/content/drive/MyDrive/CASA0004_Cycling/data/GreatLondonShp/GreatLondonShp.shp")

In [20]:
# ========= 坐标系 =========
TARGET_CRS = CRS("EPSG:27700")   # 目标：英国国家格网（米制）
ASSUME_SRC = CRS("EPSG:4326")    # 若数据缺失CRS，默认按 WGS84

def to_target_crs(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    if gdf.crs is None:
        gdf = gdf.set_crs(ASSUME_SRC.to_string(), allow_override=True)
    if gdf.crs.to_string() != TARGET_CRS.to_string():
        gdf = gdf.to_crs(TARGET_CRS.to_string())
    return gdf

# 先把裁剪边界也统一到目标 CRS
shp = to_target_crs(shp)

# ========= mph -> km/h =========
mph_re = re.compile(r'(\d+(?:\.\d+)?)\s*mph\b', re.IGNORECASE)
def convert_maxspeed(val):
    if val is None:
        return val
    s = str(val)
    m = mph_re.search(s)          # 用 search，防止前面还有别字符
    if not m:
        return s
    kmh = round(float(m.group(1)) * 1.60934)
    return str(kmh)

# ========= 只保留线 =========
def keep_only_lines(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    gdf = gdf[gdf.geometry.notna() & ~gdf.geometry.is_empty]
    gdf = gdf[gdf.geometry.geom_type.isin(["LineString", "MultiLineString"])].copy()
    if gdf.empty:
        return gdf
    # MultiLine → Line
    gdf = gdf.explode(index_parts=False, ignore_index=True)
    return gdf

# ========= 遍历处理 =========
for fname in sorted(os.listdir(in_dir)):
    if not fname.lower().endswith(".geojson"):
        continue

    in_path  = os.path.join(in_dir, fname)
    out_path = os.path.join(out_dir, fname.replace(".geojson", "_converted_clipped.geojson"))

    gdf = gpd.read_file(in_path)

    # 1) maxspeed 转换
    if "maxspeed" in gdf.columns:
        gdf["maxspeed"] = gdf["maxspeed"].apply(convert_maxspeed)

    # 2) 统一坐标系
    gdf = to_target_crs(gdf)

    # 3) 只保留线（去点/面）
    gdf = keep_only_lines(gdf)

    # 4) 裁剪（可直接用多边形图层）
    gdf_clip = gpd.clip(gdf, shp)

    # 5) 保存
    gdf_clip.to_file(out_path, driver="GeoJSON")
    print(f"{fname}: {len(gdf_clip)} features → {out_path}")

print("✅ 全部文件完成")


export (1).geojson: 5079 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (1)_converted_clipped.geojson
export (2).geojson: 15729 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (2)_converted_clipped.geojson
export (3).geojson: 2857 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (3)_converted_clipped.geojson
export (4).geojson: 25379 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (4)_converted_clipped.geojson
export (5).geojson: 58731 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (5)_converted_clipped.geojson
export (6).geojson: 15570 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (6)_converted_clipped.geojson
export (7).geojson: 10153 features → /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/exp

In [4]:
# 正则匹配 mph
pattern = re.compile(r'(\d+(?:\.\d+)?)\s*mph', re.IGNORECASE)

# 遍历文件夹下所有 .geojson
for filename in os.listdir(in_dir):
    if not filename.lower().endswith('.geojson'):
        continue

    in_path = os.path.join(in_dir, filename)
    out_path = os.path.join(out_dir, filename.replace('.geojson', '_converted.geojson'))

    with open(in_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    count = 0
    for feat in data.get('features', []):
        props = feat.get('properties', {})
        ms = props.get('maxspeed', '')
        m = pattern.match(str(ms))
        if m:
            mph = float(m.group(1))
            kmh = mph * 1.60934
            props['maxspeed'] = f'{round(kmh)}'
            count += 1

    with open(out_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f'{filename} 处理完成：共转换 {count} 条 maxspeed，输出到 {out_path}')

print("✅ 全部文件处理完成！")


export (1).geojson 处理完成：共转换 6052 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (1)_converted.geojson
export (2).geojson 处理完成：共转换 7974 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (2)_converted.geojson
export (3).geojson 处理完成：共转换 1927 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (3)_converted.geojson
export (4).geojson 处理完成：共转换 11993 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (4)_converted.geojson
export (5).geojson 处理完成：共转换 40852 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (5)_converted.geojson
export (6).geojson 处理完成：共转换 6931 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data/s1/Roads_OT/OTSpeedConverted/export (6)_converted.geojson
export (7).geojson 处理完成：共转换 8055 条 maxspeed，输出到 /content/drive/MyDrive/CASA0004_Cycling/data