# Cel
Wyciągnięcie danych na temat dróg i POI w województwie śląskim.

In [1]:
import json
import glob

import pandas as pd
import geopandas as gpd
from OSMPythonTools.nominatim import Nominatim
from tqdm import tqdm

In [2]:
%%writefile overpass.py

import urllib

from OSMPythonTools.overpass import Overpass


class CustomOverpass(Overpass):
    
    # make sure it is GET method with properly (from the point of overpass api server) 
    # escaped query strings
    def _queryRequest(self, endpoint, queryString, params=None):
        # is the the overpass server broken?
        # qstr = urllib.parse.urlencode({'data': queryString, **(params or {})})
        qstr = "&".join(f"{k}={v}" for k, v in {'data': queryString, **(params or {})}.items())
        print( endpoint + f"interpreter?{qstr}")
        return urllib.request.Request(
            endpoint + f"interpreter?{qstr}", 
            method="GET"
        )
    
    # add util method for building sample queries
    def from_statements(self, statements, **kwargs):
        query = (
            "(" + 
            ";".join(statement for statement in statements) +
            ";);" +
            "out+skel+body+meta+geom;"
        )
        
        return self.query(query, **kwargs)

Overwriting overpass.py


In [3]:
from overpass import *
nominatim = Nominatim()          
overpass = CustomOverpass()

In [4]:
silesia = nominatim.query("województwo śląskie", params={"polygon_geojson": 1}).toJSON()[0]
bbox = silesia["boundingbox"]

In [5]:
%%time
# get road segments with all possible tags
road_segments = overpass.from_statements(
    statements = [
        "way[highway](bbox)",
        "node(w)",
    ],
    params={"bbox": ",".join([bbox[2], bbox[0], bbox[3], bbox[1]])},
    timeout=3600
)

CPU times: user 45.5 s, sys: 5.73 s, total: 51.2 s
Wall time: 52.8 s


In [6]:
def build_feature(element, coords, shape_type, values_to_ignore=None):
    tags = {
        k: v
        for k, v in element.get("tags", {}).items()
        if v not in values_to_ignore
    }
    
    
    return {
        "type": "Feature",
        "id": f"{element['type']}/{element['id']}",
        "properties": {
            "id": f"{element['type']}/{element['id']}",
            **tags
        },
        "geometry": {
            "type": shape_type,
            "coordinates": coords
        }
    }


def transform_to_geojson(overpass_elements, types=None, values_to_ignore=None, **kwargs):
    if not types:
        types = []
    
    nodes = [element for element in overpass_elements if element["type"] == "node"]
    ways = [element for element in overpass_elements if element["type"] == "way"]
    relations = [element for element in overpass_elements if element["type"] == "relation"]
    
    return {
        "type": "FeatureCollection",
        "features": (
            ([
                build_feature(
                    element, 
                    (element["lon"], element["lat"]),
                    shape_type="Point",
                    values_to_ignore=to_remove
                )
                for element in nodes if kwargs.get("node_condition", lambda x: x)(element)
            ] if "nodes" in types else [])
            + ([
                build_feature(
                    element, 
                    [(node["lon"], node["lat"]) for node in element["geometry"]],
                    shape_type="LineString",
                    values_to_ignore=values_to_ignore
                )
                for element in ways            
            ] if "ways" in types else [])
            + ([
                build_feature(
                    element, 
                    [
                        tuple([
                            (point["lon"], point["lat"]) 
                            for point in node["geometry"]
                        ])
                        for node in element["members"]
                        if node["type"] == "way"
                    ],
                    shape_type="MultiLineString",
                    values_to_ignore=to_remove
                )
                for element in relations 
            ] if "relations" in types else [])
        )
    }

In [7]:
%%time

def batch_entries(collection, batch_size=10000):
    ix = 0

    while collection[ix * batch_size: (ix + 1) * batch_size ]:
        yield collection[ix * batch_size: (ix + 1) * batch_size]
        ix += 1


ix = 0
for collection in tqdm(batch_entries(road_segments.toJSON()["elements"])):
    data = transform_to_geojson(
        collection, 
        types=["ways"],
        values_to_ignore=["2017-09-31"]
    )
    if not data["features"]:
        continue
    
    with open(f"../output/silesia_{ix:04}.geojson", "w") as f:
        f.write(json.dumps(data))
    ix += 1

444it [00:20, 21.34it/s]

CPU times: user 15.3 s, sys: 4.02 s, total: 19.3 s
Wall time: 20.9 s





In [8]:
%%time
shape_gdf = gpd.read_file(json.dumps(silesia))
roads_gdfs = []

for file in tqdm(sorted(glob.glob("../output/silesia*.geojson"))):
    roads_gdfs.append(
        gpd.sjoin(
            gpd.read_file(file, crs="epsg:4326"), 
            shape_gdf, 
            op="within"
        )
    )

100%|██████████| 66/66 [04:10<00:00,  3.79s/it]

CPU times: user 3min 51s, sys: 10.7 s, total: 4min 2s
Wall time: 4min 10s





In [9]:
%%time
roads = pd.concat(roads_gdfs)

CPU times: user 36.8 s, sys: 2.59 s, total: 39.4 s
Wall time: 40.3 s


In [11]:
%%time

import pickle

def save_pickle(fname, data):
    with open(f"../output/{fname}.pickle", "wb") as f:
        pickle.dump(data, f, protocol=4)

save_pickle("roads", roads)

CPU times: user 15.9 s, sys: 4.11 s, total: 20 s
Wall time: 21.2 s
