In [1]:
import json
import time
from pathlib import Path

import requests
import shapefile

from amap import (
    polygon_center,
    shorten_dt_name,
    polyline_to_polygon,
    polygon_to_polys,
    gcj_to_wgs,
    make_prj_file,
)

In [2]:
data_dirpath = Path('../data')
if not data_dirpath.exists():
    data_dirpath.mkdir()

temp_filepath = data_dirpath / 'temp.shp'
dt_shp_filepath = data_dirpath / 'cn_district.shp'
dt_prj_filepath = data_dirpath / 'cn_district.prj'
nl_shp_filepath = data_dirpath / 'nine_line.shp'
nl_prj_filepath = data_dirpath / 'nine_line.prj'

高德 web API 查询行政区域：[https://lbs.amap.com/api/webservice/guide/api/district](https://lbs.amap.com/api/webservice/guide/api/district)

按入门指南注册并申请密钥（key），拼接 HTTP 请求的 URL，接收并解析返回的数据。

In [None]:
key = '1145141919810'

收集 `level=district` 的记录。没有区县的省市用它自己代替。

DBF 文件字段名称长度限制为 10 个字符。

In [None]:
url = f'https://restapi.amap.com/v3/config/district?key={key}&subdistrict=3'
response = requests.get(url)
content = json.loads(response.content.decode())

In [None]:
records = []
for district1 in content['districts'][0]['districts']:
    # 台湾只有省级数据
    if not district1['districts']:
        record = {
            'pr_name': district1['name'],
            'pr_adcode': district1['adcode'],
            'ct_name': district1['name'],
            'ct_adcode': district1['adcode'],
            'dt_name': district1['name'],
            'dt_adcode': district1['adcode'],
        }
        records.append(record)
        continue

    for district2 in district1['districts']:
        # 香港和澳门下一级就是区
        if district2['level'] == 'district':
            record = {
                'pr_name': district1['name'],
                'pr_adcode': district1['adcode'],
                'ct_name': district1['name'],
                'ct_adcode': district1['adcode'],
                'dt_name': district2['name'],
                'dt_adcode': district2['adcode'],
            }
            records.append(record)
            continue

        for district3 in district2['districts']:
            # 省直管市等，下一级是街道
            if district3['level'] == 'street':
                record = {
                    'pr_name': district1['name'],
                    'pr_adcode': district1['adcode'],
                    'ct_name': district2['name'],
                    'ct_adcode': district2['adcode'],
                    'dt_name': district2['name'],
                    'dt_adcode': district2['adcode'],
                }
                records.append(record)
                break

            # 普通省和直辖市的区县
            record = {
                'pr_name': district1['name'],
                'pr_adcode': district1['adcode'],
                'ct_name': district2['name'],
                'ct_adcode': district2['adcode'],
                'dt_name': district3['name'],
                'dt_adcode': district3['adcode'],
            }
            records.append(record)

# 按 adcode 排序
records.sort(key=lambda x: x['dt_adcode'])
len(records)

处理特殊名称，添加简称。

In [None]:
for record in records:
    if record['dt_name'] == '澳门大学横琴校区(由澳门实施管辖)':
        record['dt_name'] = '澳门大学横琴校区'
    record['short_name'] = shorten_dt_name(record['dt_name'])

`polyline` 用字符串表示多边形坐标序列。不同多边形用 `|` 分隔，不同点用 `;` 分割，xy 用 `,` 分隔。多边形环的绕行方向都是顺时针，那么问题来了，怎么判断多边形的洞，怎么判断 `MultiPolygon` 呢？

测试后大致发现，`polyline` 里不是用单独的多边形表示洞，而是将带洞的多边形切成两个独立的多边形，当这两个多边形拼在一起时，就会凑出一个洞。因此这里的策略是直接用 `unary_union` 合并 `polyline` 里的所有多边形。

关于多边形绕行方向：

- shapely 里构造 `Polygon` 时方向无所谓，因为已经通过 `shell` 和 `holes` 参数明确指定了外环和内环。但经过运算后会变成外环顺时针内环逆时针。
- Shapefile 要求外环顺时针，内环逆时针；GeoJSON 要求外环逆时针，内环顺时针，不过并不强制。
- PyShp 的 `__geo_interface__` 接口能将 shapefile 转为 GeoJSON，但除了 2.2.0 版本外不会改变底层数据的绕行方向。
- `sgeom.shape` 会用到 `__geo_interface__` 接口。
- `mpath.Path` 要求内外环方向不一致即可。

总结：用 shapely 和 PyShp 处理全为顺时针的 `polyline` 数据，最后能得到外环顺时针，内环逆时针的 shapefile 文件。后续用于 Matplotlib 时能区分出洞。

In [None]:
with shapefile.Writer(temp_filepath, shapeType=5) as writer:
    writer.fields = [
        ['pr_name', 'C', 80, 0],
        ['pr_adcode', 'N', 6, 0],
        ['ct_name', 'C', 80, 0],
        ['ct_adcode', 'N', 6, 0],
        ['dt_name', 'C', 80, 0],
        ['dt_adcode', 'N', 6, 0],
        ['short_name', 'C', 80, 0],
        ['lon', 'N', 7, 3],
        ['lat', 'N', 7, 3],
    ]
    for record in records:
        adcode = record['dt_adcode']
        url = f'https://restapi.amap.com/v3/config/district?key={key}&keywords={adcode}&subdistrict=0&extensions=all'
        while True:
            try:
                response = requests.get(url)
                break
            except Exception as e:
                print(repr(e))
                time.sleep(3)

        content = json.loads(response.content.decode())
        polyline = content['districts'][0]['polyline']
        polygon = polyline_to_polygon(polyline)
        polys = polygon_to_polys(polygon)

        record = record.copy()
        record['lon'], record['lat'] = polygon_center(polygon)

        writer.poly(polys)
        writer.record(**record)
        print(record)
        time.sleep(0.2)

# 火星坐标系变换为 WGS84 坐标系
gcj_to_wgs(temp_filepath, dt_shp_filepath)
make_prj_file(dt_prj_filepath)

[https://datav.aliyun.com/portal/school/atlas/area_selector](https://datav.aliyun.com/portal/school/atlas/area_selector)

全国的 GeoJSON 数据里含多边形表示的九段线，以此制作九段线的 shapefile 文件。注意 `writer.shape` 会自动将 GeoJSON 里逆时针的外环改为顺时针。

In [3]:
with shapefile.Writer(str(temp_filepath), shapeType=5) as writer:
    url = 'https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json'
    response = requests.get(url)
    content = json.loads(response.content.decode())
    geometry = content['features'][-1]['geometry']
    writer.fields = [
        ['cn_adcode', 'N', 6, 0],
        ['cn_name', 'C', 80, 0],
    ]
    writer.record(cn_adcode=100000, cn_name='九段线')
    writer.shape(geometry)

gcj_to_wgs(temp_filepath, nl_shp_filepath)
make_prj_file(nl_prj_filepath)

清理临时文件。

In [4]:
for filepath in data_dirpath.iterdir():
    if filepath.stem == 'temp':
        filepath.unlink()