[国土数値情報 \| 道路データ](https://nlftp.mlit.go.jp/ksj/gmlold/datalist/gmlold_KsjTmplt-N01.html)

東京：https://nlftp.mlit.go.jp/ksj/gmlold/data/N01/N01-07L/N01-07L-13-01.0a_GML.zip

In [None]:
import pandas as pd
import geopandas as gpd
import os
import urllib.request as rq
import plotly.express as px
import matplotlib.pyplot as plt

# zipファイルの取得


In [None]:
def get_data(n):
    n = f'{n:02}'
    url = f"https://nlftp.mlit.go.jp/ksj/gmlold/data/N01/N01-07L/N01-07L-{n}-01.0a_GML.zip"
    path = f"raw/{n}.zip"
    
    with open(path, "wb") as f:
        f.write(rq.urlopen(url).read())


In [None]:
def get_all():
    for i in range(1, 48):
        get_data(i)

In [None]:
get_all()

# 前処理

In [None]:
def pre_proc(gdf):
    '''
    路線名としてname列にまとめる
    '''
    gdf["N01_004"] = "(" + gdf["N01_004"] + ")"
    gdf.fillna("", inplace=True)
    gdf["name"] = gdf["N01_002"] + gdf["N01_003"] + gdf["N01_004"]
    return gdf.filter(items=["name", "geometry"])

## linestringをまとめる

In [None]:
from enum import Enum
from shapely.geometry import LineString
def reduce_lines(gdf, to, name):
    '''
    parameters
        name: 戻り値のDataFrameのname列に使われる
        short_name: り値のDataFrameのgroup列の値のprefixとなる
        gdf: geopandas.DataFrame
            geometry列がLineString型
        to: str
            "pandas" or "geopandas"
        
    return 
     pandas.DataFrame
     columns: ['name', 'group', 'lon', 'lat' ]
    '''
    
    class Direction(Enum):
        FORWARD = 0
        BACKWARD = 1   
        
        
    def list_to_gdf(l):
        '''
        parameters
            l: list of tuple of lon/lat
        return
            geopandas.GeoDataFrame
            columns: ['geometry' ]
        '''
        
        gdf = gpd.GeoDataFrame({"geometry": [LineString(l)]})
        return gdf
        
    def list_to_df(l):
        '''
        parameters
            l: list of tuple of lon/lat
        return
            pandas.DataFrame
            columns: ['group', 'lon', 'lat' ]
        '''
        
        df = pd.DataFrame(l, columns=['lon', 'lat'])
        df['group'] = f'{name}_{group_count}'
        return df
    
    if to == "pandas":
        list_to = list_to_df
    elif to == "geopandas":
        list_to = list_to_gdf
    else:
        raise TypeError("'to' argument must be 'pandas' or 'geopandas'")
        return
    
    def line_to_tuple(iterable, list_of_tuple):
        for i in iterable:
            if isinstance(i, LineString):
                list_of_tuple.append(tuple(i.coords))
            elif isinstance(i, MultiLineString): 
                line_to_tuple(i, list_of_tuple)
        
    work = list()
    dfs = list()
    lines = list()
    line_to_tuple(gdf.geometry, lines)
    
    lines = list(set(lines))
    
    dir = Direction.BACKWARD
    group_count = 1
    
    
    while lines:
        if not work:
            work.extend(lines.pop(0))
            continue
        
        if dir == Direction.BACKWARD:
            for i, line in enumerate(lines):
                if work[-1] in line:
                    l = list(lines.pop(i))
                    if l[0] != work[-1]:
                         l.reverse()
                    work.extend(l[1:])
                    break
            else:
                dir = Direction.FORWARD
                continue
        else:
            for i, line in enumerate(lines):
                if work[0] in line:
                    l = list(lines.pop(i))
                    if l[-1] != work[0]:
                        l.reverse()
                    work[0:0] = l[:-1]
                    break
            else:
                dfs.append(list_to(work))
                group_count += 1
                work = list()
                dir = Direction.BACKWARD
    
    if work:
        dfs.append(list_to(work))
        
    all = pd.concat(dfs)
    
    if to == "geopandas":
        all = all.dissolve()
    else:
        all.reset_index(drop=True, inplace=True)
        
    all["name"] = name
    return all

In [None]:
def convert_all(gdf, to):
    all = list()
    for name in gdf.name.unique():
        all.append(reduce_lines(gdf.query(f'name.str.startswith("{name}")', engine='python'), to, name))

    all_df = pd.concat(all)
    all_df.sort_values(by="name", inplace=True)
    all_df.reset_index(drop=True, inplace=True)
    return all_df

# geojsonに変換

In [None]:
def main():
    for i in range(1, 48):
        i = f"{i:02}"
        df = convert_all(pre_proc(gpd.read_file(f'raw/{i}.zip')), "geopandas")
        with open(f"{i}.geo.json", "w") as f:
            f.write(df.to_json())
        
main()

In [None]:
# 確認
foo = gpd.read_file("13.geo.json")
foo

# work

In [None]:
oki = gpd.read_file("raw/47.zip")

In [None]:
oki = pre_proc(oki)

In [None]:
oki

In [None]:
oki.query('name == "糸満与那原線"').plot(); plt.show()