In [2]:
'''
03_create-paths.ipynb
Create a dataset of shortest paths between all destinations
'''

import requests
import urllib.request
import pathlib
import os
import shutil
import subprocess
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import nearest_points, unary_union
import math
import swifter

import json
import geojson
import gdal
import h3

import networkit as nk

import multiprocessing
from joblib import Parallel, delayed
num_cores = multiprocessing.cpu_count()

PATH_ROOT = os.path.join(pathlib.Path().absolute(), '../..' )
PATH_IN = PATH_ROOT + '/data/02_processed/'
PATH_OUT = PATH_ROOT + '/data/03_paths/'

# constants
CONST_graph_detail_lvl = 8
CONST_speed_day_km = 40
CONST_slope_effect_multiplier = 2
CONST_distance_bridge = 10 / 111
CONST_river_value = 9999

# critical slope algorithm
CONST_critical_slope = 4

# for tobler application
CONST_elevation_coefficient = 1

def slope_coeff(slope):
    #return math.exp(-3.5 * abs(slope + 0.05)) ** CONST_elevation_coefficient
    #rise = (value_to - value_from) / 1000
    #slope = rise / hex_distance
    return 1 / (1 + ((abs(slope) * 100) / CONST_critical_slope) ** 2)

def calculate_path_time(value_from, value_to, dist):
    rise = (value_to - value_from) / 1000
    slope = rise / dist
    slope_c = slope_coeff(slope)
    #print(slope_c)
    time = CONST_speed_day_km * abs(slope_c ** CONST_slope_effect_multiplier)

    el_dist = math.sqrt((rise ** 2) + (dist ** 2))
    path_time = (el_dist / time)

    #print(value_to, value_from, slope, time, el_dist, path_time)
    
    return path_time

#print(calculate_path_time(100, 300, 2))
#print(calculate_path_time(100, 200, 1))
print(calculate_path_time(50, 200, 10))
print(calculate_path_time(200, 50, 10))

crs4326 = {'init': 'epsg:4326'}
hex_distance = 2 * h3.edge_length(CONST_graph_detail_lvl)
print('hex_distance', hex_distance)

# get distance of hexes in grades
CONST_hex_dist = hex_distance / 111

'''
Load datasets
'''

destinations = gpd.read_file(PATH_IN + 'destinations.geojson')

rivers_df = gpd.read_file(PATH_IN + 'rivers.geojson') 
rivers = unary_union(
    [river['geometry'] for ri, river in rivers_df.iterrows()]
)

bridges_df = gpd.read_file(PATH_IN + 'bridges.geojson')
bridges = unary_union(
    [bridge['geometry'].buffer(hex_distance / 222) for bi, bridge in bridges_df.iterrows()]
)

bbox = gpd.read_file(PATH_IN + 'bbox.geojson') 
bb_xy = bbox.total_bounds

bounds_json = dict(
    geojson.Polygon(
        [[
            [bb_xy[0], bb_xy[1]],
            [bb_xy[0], bb_xy[3]],
            [bb_xy[2], bb_xy[3]],
            [bb_xy[2], bb_xy[1]],
            [bb_xy[0], bb_xy[1]],
        ]]
    ))


0.3252929369373301
0.3252929369373301
hex_distance 0.922709368


In [3]:
# load elevation

elevation = gdal.Open(PATH_IN + 'elevation.tif')
band = elevation.GetRasterBand(1)
cols = elevation.RasterXSize
rows = elevation.RasterYSize

transform = elevation.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = -transform[5]

elevation_data = band.ReadAsArray(0, 0, cols, rows)

# check whether there is river or a bridge
def path_value_point (hex_center):
    #return elevation_point(hex_center)
    #on_river = hex.intersects(rivers)
    on_river = hex_center.distance(rivers) < CONST_hex_dist / 2
    if on_river: 
        on_bridge = hex_center.distance(bridges) < CONST_hex_dist / 2
        if not on_bridge:
            value = CONST_river_value
        else:
            value = elevation_point(hex_center)
    else:
        value = elevation_point(hex_center)
    return value

# get elevation value for the given geographical point
def elevation_point (point):
    row = int((yOrigin - point.y ) / pixelHeight)
    col = int((point.x - xOrigin) / pixelWidth)
    
    if elevation_data.shape[0] > row and elevation_data.shape[1] > col:
        return int(elevation_data[row][col])
    else:
        return CONST_river_value

In [4]:
# construct hexes dataframe

hex_ids = list(h3.polyfill(bounds_json, CONST_graph_detail_lvl))

hexes_df = pd.DataFrame(hex_ids, columns=['id'])
hexes_df['center'] = hexes_df.swifter.apply(
    lambda x: Point(h3.h3_to_geo(x['id'])),
    axis = 1
)
hexes_df["value"] = hexes_df.swifter.apply(
    lambda x: path_value_point(x['center']),
    axis = 1
)
hexes_df.set_index('id', inplace=True)

# find neighboring hexes
hexes_df['neighbors'] = hexes_df.swifter.apply(
    lambda x: \
        [
            h3.get_destination_h3_index_from_unidirectional_edge(n) 
            for n in h3.get_h3_unidirectional_edges_from_hexagon(x.name)\
        ],
    axis = 1
)

# save hexes_df
hexes_df.to_csv(PATH_OUT + 'hexes.csv')

Pandas Apply: 100%|██████████| 1711237/1711237 [00:49<00:00, 34859.88it/s]
Pandas Apply: 100%|██████████| 1711237/1711237 [03:29<00:00, 8155.04it/s]


In [5]:
hexes_df

Unnamed: 0_level_0,center,value
id,Unnamed: 1_level_1,Unnamed: 2_level_1
8852ebb2cbfffff,POINT (13.19953559539785 48.82979234725212),695
8863a95995fffff,POINT (12.34096338024553 55.01436353936992),0
8852661a0dfffff,POINT (20.87966753906016 49.92540026666737),9999
8852633443fffff,POINT (19.52047183970761 51.72534509867218),196
88526ab457fffff,POINT (18.72553839568407 52.95208387694156),46
...,...,...
88524c6199fffff,POINT (17.03542974907975 52.68680324413737),103
8852544c3bfffff,POINT (17.05209686683553 47.82612975280578),121
885248d253fffff,POINT (16.90772593505346 52.62834335432925),88
8852509801fffff,POINT (16.85600152557521 48.2814518533422),149


In [11]:
# create weighted non-directional graph for all combinations of neighboring hexes

hexes_df = pd.read_csv(PATH_OUT + 'hexes.csv')
hexes_df.set_index('id', inplace=True)

g = nk.Graph(directed=False, weighted=True)

hexes_df['node'] = hexes_df.apply(
    lambda x: g.addNode(),
    axis = 1
)

# node_id -> hex_id shorthand dictionary
node_to_hex = {node_id: hex_id for (hex_id, node_id) in hexes_df[['node']].itertuples()}

for (hex_from_id, value_from, node_from, neighbors) in hexes_df[['value', 'node', 'neighbors']].itertuples():
    for hex_to_id in neighbors:
        if hex_to_id in hexes_df.index:
            node_to = hexes_df.at[hex_to_id, 'node']
            value_to = hexes_df.at[hex_to_id, 'value']
            g.addEdge(node_from, node_to, calculate_path_time(value_from, value_to, hex_distance))



                                                      center  value  \
id                                                                    
8852ebb2cbfffff  POINT (13.19953559539785 48.82979234725212)    695   
8863a95995fffff  POINT (12.34096338024553 55.01436353936992)      0   
8852661a0dfffff  POINT (20.87966753906016 49.92540026666737)   9999   
8852633443fffff  POINT (19.52047183970761 51.72534509867218)    196   
88526ab457fffff  POINT (18.72553839568407 52.95208387694156)     46   
...                                                      ...    ...   
88524c6199fffff  POINT (17.03542974907975 52.68680324413737)    103   
8852544c3bfffff  POINT (17.05209686683553 47.82612975280578)    121   
885248d253fffff  POINT (16.90772593505346 52.62834335432925)     88   
8852509801fffff   POINT (16.85600152557521 48.2814518533422)    149   
8863167b13fffff  POINT (15.08871614983771 53.84410139479593)     28   

                                                         neighbors  
id     

In [12]:
# run dijkstra algorithm for all destinations

destinations['hex_id'] = destinations.apply(
    lambda x: h3.geo_to_h3(x['geometry'].x, x['geometry'].y, CONST_graph_detail_lvl),
    axis=1
)
destinations['node_id'] = destinations.apply(
    lambda x: hexes_df.at[x['hex_id'], 'node'],
    axis=1
)


# def calculate_paths(hex_id):
#     dij = nk.distance.Dijkstra(g, hexes_df.at[hex_id, 'node'], True, False)
#     dij.run()
#     return dij

# destinations['dij'] = destinations.swifter.apply(
#     lambda x: calculate_paths(x['hex_id']),
#     axis=1
# )
destinations

Unnamed: 0,id,name,no_alle,no_activity,no_reise,no_bi,no_gg,x,y,prazision,typ,noten,region1,region2,region3,geometry,hex_id,node_id
0,7.0,Altenburg,6.0,1.0,1.0,4.0,0.0,48.65,15.59,3.0,gemeinde,,Niederösterreich,Nordwestniederösterreich,Nordwestniederösterreich,POINT (15.59000 48.65000),88525310e7fffff,545553
1,14.0,Arnoldstein,2.0,1.0,1.0,0.0,0.0,46.55,13.75,1.0,gemeinde,,Kärnten,Ostkärnten,Ostkärnten,POINT (13.75000 46.55000),8852e32f69fffff,1348611
2,21.0,Balga,2.0,1.0,1.0,0.0,0.0,54.56,19.97,1.0,gemeinde,,Preussen (Ausland),Preussen (Ausland),Preussen (Ausland),POINT (19.97000 54.56000),8843969553fffff,1414481
3,30.0,Beroun,4.0,2.0,2.0,0.0,0.0,49.96,14.07,1.0,gemeinde,,Böhmen,Mittelböhmen,Mittelböhmen,POINT (14.07000 49.96000),88525a75d7fffff,63085
4,41.0,Bratislava,29.0,12.0,12.0,5.0,0.0,48.14,17.10,1.0,gemeinde,,Ungarn,Westungarn,Westungarn,POINT (17.10000 48.14000),8852547529fffff,1150170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,652.0,Žatec,4.0,1.0,1.0,1.0,1.0,50.33,13.54,1.0,gemeinde,,Böhmen,Nordwestböhmen,Nordwestböhmen,POINT (13.54000 50.33000),8863b4a865fffff,1142308
128,653.0,Zbraslav,3.0,3.0,0.0,0.0,0.0,49.98,14.39,1.0,gemeinde,,Böhmen,Mittelböhmen,Mittelböhmen,POINT (14.39000 49.98000),88525a070bfffff,1490973
129,667.0,Znojmo,59.0,27.0,27.0,3.0,2.0,48.86,16.05,1.0,gemeinde,,Mähren,Znaimer Teil,Znaimer Teil,POINT (16.05000 48.86000),885251593bfffff,1509496
130,668.0,Zvíkov,5.0,2.0,2.0,1.0,0.0,49.44,14.19,1.0,gemeinde,,Böhmen,Südböhmen,Südböhmen,POINT (14.19000 49.44000),88525a58c5fffff,514661


In [13]:
paths = []
for (id_from, name_from, hex_from, node_from) in destinations[['name', 'hex_id', 'node_id']].itertuples():
    for (id_to, name_to, hex_to, node_to) in destinations[['name', 'hex_id', 'node_id']].itertuples():
        paths.append({
            "name_from": name_from,
            "name_to": name_to,
            "hex_from": hex_from,
            "hex_to": hex_to,
            "node_from": node_from,
            "node_to": node_to,
        })
paths_df = pd.DataFrame(paths)

def calculate_path(path):
    dij = nk.distance.Dijkstra(g, path['node_from'], True, False, target = path['node_to'])
    dij.run()
    return dij

paths_df['dij'] = paths_df.apply(
    lambda x: calculate_path(x),
    axis=1
)

In [1]:
paths_df

NameError: name 'paths_df' is not defined

In [2]:
 # calculate hex id for each destination

paths = []

for (id_from, name_from, hex_from, node_from) in destinations[['name', 'hex_id', 'node_id']].itertuples():
    for (id_to, name_to, hex_to, node_to) in destinations[['name', 'hex_id', 'node_id']].itertuples():
         
        dij = nk.distance.Dijkstra(g, node_from, True, False, target=node_to)
        dij.run()

        dist = dij.distance(node_to)
        path = dij.getPath(node_to)

        path_hexes = LineString([hexes_df.at[node_to_hex[i], 'center'] for i in path])

        paths.append(
            {
                "from": name_from,
                "to": name_to,
                "dist": dist,
                "geometry": path_hexes
            }
        )


NameError: name 'destinations' is not defined

In [1]:
# calculate hex id for each destination

paths = []

for (id_from, name_from, hex_from, node_from, dij) in destinations[['name', 'hex_id', 'node_id', 'dij']].itertuples():
    for (id_to, name_to, hex_to, node_to) in destinations[['name', 'hex_id', 'node_id']].itertuples():
        
        dist = dij.distance(node_to)
        path = dij.getPath(node_to)

        #path_hexes = LineString([hexes_df.at[node_to_hex[i], 'center'] for i in path])

        paths.append(
            {
                "from": name_from,
                "to": name_to,
                "dist": dist,
                #"geometry": path_hexes
            }
        )

In [2]:
# export paths

paths_df = gpd.GeoDataFrame(paths, crs="epsg:4326")
paths_df.to_file(PATH_OUT + 'paths.shp', driver="ESRI Shapefile", encoding="utf-8")
paths_df.to_file(PATH_OUT + 'paths.geojson', driver="GeoJSON")

NameError: name 'gpd' is not defined

In [111]:
# create a destination metrix table

paths_text = "".join([line.strip() for line in open(PATH_OUT + 'paths.geojson')])
paths_dict = json.loads(paths_text)

origins = {}
for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from'] 
    origins[orig] = {'origin': orig}

for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from']
    dest = feat['properties']['to']
    dist = feat['properties']['dist']
    origins[orig][dest] = dist

dist_df = pd.DataFrame(origins.values())


dist_df.set_index('origin', inplace=True)
dist_df.to_csv(PATH_OUT + 'dist_m.csv')

print(dist_df.at['Brno', 'Praha'])
print(dist_df.at['Praha', 'Brno'])


8.65660766104275
8.65660766104275
