In [1]:
import copy
import time
from pathlib import Path

import utils
from loguru import logger
from retrying import retry

In [2]:
data_dirpath = Path("../data")
data_dirpath.mkdir(parents=True, exist_ok=True)

district_filepath = data_dirpath / "cn_district.json"
nine_line_filepath = data_dirpath / "nine_line.json"

高德 web API 查询行政区域：[https://lbs.amap.com/api/webservice/guide/api/district](https://lbs.amap.com/api/webservice/guide/api/district)

- 按入门指南注册并申请密钥（key）
- 拼接 HTTP 请求的 URL
- 解析返回的数据，转换成 GeoJSON 格式并保存
- 下载区县数据，后续用来合成省市数据

In [3]:
amap_key = ""

In [4]:
@retry(wait_fixed=1000, stop_max_attempt_number=5)
def get_amap_json(url: str) -> dict:
    """请求高德 API，失败后重试。"""
    data = utils.get_api_json(url)
    if data["status"] == "0":
        raise RuntimeError

    return data

收集所有区县的元数据。没有区县的省市用它自己代替区县。

In [5]:
url = f"https://restapi.amap.com/v3/config/district?key={amap_key}&subdistrict=3"
data = get_amap_json(url)

properties_list = []

# 遍历省
for districts1 in data["districts"][0]["districts"]:
    # 台湾只有省级数据
    if not districts1["districts"]:
        properties = {
            "province_name": districts1["name"],
            "province_adcode": districts1["adcode"],
            "city_name": districts1["name"],
            "city_adcode": districts1["adcode"],
            "district_name": districts1["name"],
            "district_adcode": districts1["adcode"],
        }
        properties_list.append(properties)
        continue

    # 遍历市
    for districts2 in districts1["districts"]:
        # 香港和澳门下一级就是区
        if districts2["level"] == "district":
            properties = {
                "province_name": districts1["name"],
                "province_adcode": districts1["adcode"],
                "city_name": districts1["name"],
                "city_adcode": districts1["adcode"],
                "district_name": districts2["name"],
                "district_adcode": districts2["adcode"],
            }
            properties_list.append(properties)
            continue

        # 遍历区县
        for districts3 in districts2["districts"]:
            # 部分城市和省直辖县下一级就是街道
            if districts3["level"] == "street":
                properties = {
                    "province_name": districts1["name"],
                    "province_adcode": districts1["adcode"],
                    "city_name": districts2["name"],
                    "city_adcode": districts2["adcode"],
                    "district_name": districts2["name"],
                    "district_adcode": districts2["adcode"],
                }
                properties_list.append(properties)
                break

            properties = {
                "province_name": districts1["name"],
                "province_adcode": districts1["adcode"],
                "city_name": districts2["name"],
                "city_adcode": districts2["adcode"],
                "district_name": districts3["name"],
                "district_adcode": districts3["adcode"],
            }
            properties_list.append(properties)

for properties in properties_list:
    district_name = properties["district_name"]
    if district_name == "澳门大学横琴校区(由澳门实施管辖)":
        district_name = properties["district_name"] = "澳门大学横琴校区"
    properties["short_name"] = utils.shorten_district_name(district_name)
    for key, value in properties.items():
        if "adcode" in key:
            properties[key] = int(value)

properties_list.sort(key=lambda x: x["district_adcode"])

高德 API 返回的 `polyline` 字符串表示多边形坐标序列，经度和纬度用 `,` 分隔，点与点之间用 `;` 分隔，`MultiPolygon` 类型的多个多边形用 `|` 分隔坐标序列。问题是：如何明确指定多边形的洞？

测试后发现，`polyline` 不是用单独的环表示洞，而是将带洞的多边形切成两个独立的多边形，当这两个多边形拼在一起时就会凑出一个洞。这里直接用 `shapely.union_all` 合并一条 `polyline` 里含有的所有多边形，就自然能产生洞。具体实现见 `utils.polyline_to_polygon` 函数。

将 `polyline` 转换成 `shapely` 的多边形对后，外环坐标沿顺时针方向（CW）排列，内环坐标沿逆时针方向（CCW）排列，这刚好和 GeoJSON 推荐的方向相反。为此在 `utils.polygon_to_geometry` 函数中用 shapely 自带的调整方向的函数进行调节。

高德 API 的数据应该是 GCJ-02 坐标系的，这里采用 PRCoords 库转换成 WGS84 坐标系。

In [None]:
features = []
for properties in properties_list:
    url = f"https://restapi.amap.com/v3/config/district?key={amap_key}&keywords={properties['district_adcode']}&subdistrict=0&extensions=all"
    data = get_amap_json(url)
    polyline = data["districts"][0]["polyline"]
    polygon = utils.polyline_to_polygon(polyline)
    geometry_dict = utils.geometry_to_geometry_dict(polygon)
    geometry_dict = utils.gcj_geometry_dict_to_wgs(
        geometry_dict, decimals=6, check_validity=True
    )

    properties = copy.deepcopy(properties)
    lon, lat = utils.get_geometry_center(polygon)
    properties["lon"] = round(lon, 6)
    properties["lat"] = round(lat, 6)

    feature = utils.make_feature(geometry_dict, properties)
    features.append(feature)
    logger.info(properties)
    time.sleep(0.25)

district_data = utils.make_geojson(features)
utils.dump_json(district_filepath, district_data)
logger.info("区县数据下载完成")

https://datav.aliyun.com/portal/school/atlas/area_selector

阿里云的全国 GeoJSON 数据里含有多边形类型的九段线数据，以此制作九段线的 GeoJSON 文件。

In [None]:
url = "https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json"
data = utils.get_api_json(url)
geometry_dict = data["features"][-1]["geometry"]
geometry_dict = utils.gcj_geometry_dict_to_wgs(
    geometry_dict, decimals=6, check_validity=True
)
properties = {"name": "九段线"}
feature = utils.make_feature(geometry_dict, properties)
nine_line_data = utils.make_geojson([feature])
utils.dump_json(nine_line_filepath, nine_line_data)
logger.info("九段线数据下载完成")