In [10]:
'''
03_create-paths.ipynb
Create a dataset of shortest paths between all destinations
'''

import pathlib
import os
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import nearest_points, unary_union
import math
import swifter
import json
import geojson
import h3
import networkit as nk
from osgeo import gdal

PATH_ROOT = os.path.join(pathlib.Path().absolute(), '../..' )
PATH_IN = PATH_ROOT + '/data/02_processed/'
PATH_OUT = PATH_ROOT + '/data/03_paths/'
os.makedirs(PATH_OUT) if not os.path.exists(PATH_OUT) else False

# constants
CONST_graph_detail_lvl = 8
CONST_speed_day_km = 40
CONST_slope_effect_multiplier = 2
CONST_distance_bridge = 10 / 111
CONST_river_value = 9999

# critical slope algorithm
CONST_critical_slope = 4

# for tobler application
CONST_elevation_coefficient = 1

def slope_coeff(slope):
    return 1 / (1 + ((abs(slope) * 100) / CONST_critical_slope) ** 2)

def calculate_path_time(value_from, value_to, dist):
    rise = (value_to - value_from) / 1000
    if value_from == CONST_river_value and value_to == CONST_river_value:
        return 10000
    else:
        slope = rise / dist
        slope_c = slope_coeff(slope)
        time = CONST_speed_day_km * abs(slope_c ** CONST_slope_effect_multiplier)

        el_dist = math.sqrt((rise ** 2) + (dist ** 2))
        path_time = (el_dist / time)

        #print(value_to, value_from, slope, time, el_dist, path_time)
        return path_time

#print(calculate_path_time(100, 300, 2))
#print(calculate_path_time(100, 200, 1))
print(calculate_path_time(CONST_river_value, CONST_river_value, 10))
print(calculate_path_time(50, 200, 40))
print(calculate_path_time(200, 50, 40))

hex_distance = 2 * h3.edge_length(CONST_graph_detail_lvl)
print('hex_distance', hex_distance)

# load datasets

destinations = gpd.read_file(PATH_IN + 'destinations.geojson')

rivers_df = gpd.read_file(PATH_IN + 'rivers.geojson') 
rivers = unary_union(
    [river['geometry'] for ri, river in rivers_df.iterrows()]
)

bridges_df = gpd.read_file(PATH_IN + 'bridges.geojson')
bridges = unary_union(
    [bridge['geometry'].buffer(hex_distance / 222) for bi, bridge in bridges_df.iterrows()]
)

bbox = gpd.read_file(PATH_IN + 'bbox.geojson') 
bb_xy = bbox.total_bounds

bounds_json = dict(
    geojson.Polygon(
        [[
            [bb_xy[0], bb_xy[1]],
            [bb_xy[0], bb_xy[3]],
            [bb_xy[2], bb_xy[3]],
            [bb_xy[2], bb_xy[1]],
            [bb_xy[0], bb_xy[1]],
        ]]
    ))


10000
1.017662527983812
1.017662527983812
hex_distance 0.922709368


In [11]:
# load elevation
elevation = gdal.Open(PATH_IN + 'elevation.tif')
band = elevation.GetRasterBand(1)
cols = elevation.RasterXSize
rows = elevation.RasterYSize

transform = elevation.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = -transform[5]

elevation_data = band.ReadAsArray(0, 0, cols, rows)

# check whether there is river or a bridge
def path_value_point (hex_center):
    #return elevation_point(hex_center)
    #on_river = hex.intersects(rivers)
    on_river = hex_center.distance(rivers) < hex_distance / 200
    if on_river: 
        on_bridge = hex_center.distance(bridges) < hex_distance / 200
        if not on_bridge:
            value = CONST_river_value
        else:
            value = elevation_point(hex_center)
    else:
        value = elevation_point(hex_center)
    return value

# get elevation value for the given geographical point
def elevation_point (point):
    row = int((yOrigin - point.y ) / pixelHeight)
    col = int((point.x - xOrigin) / pixelWidth)
    
    if elevation_data.shape[0] > row and elevation_data.shape[1] > col:
        el_value = int(elevation_data[row][col])
        if el_value <= 0: # sea
            return CONST_river_value
        else:
            return el_value
    else:
        return CONST_river_value

In [12]:
# construct hexes dataframe

hex_ids = list(h3.polyfill(bounds_json, CONST_graph_detail_lvl))

hexes_df = pd.DataFrame(hex_ids, columns=['id'])
hexes_df['center'] = hexes_df.swifter.apply(
    lambda x: Point(h3.h3_to_geo(x['id'])),
    axis = 1
)
hexes_df["value"] = hexes_df.swifter.apply(
    lambda x: path_value_point(x['center']),
    axis = 1
)
hexes_df.set_index('id', inplace=True)

# find neighboring hexes
hexes_df['neighbors'] = hexes_df.swifter.apply(
    lambda x: \
        [
            h3.get_destination_h3_index_from_unidirectional_edge(n) 
            for n in h3.get_h3_unidirectional_edges_from_hexagon(x.name)\
        ],
    axis = 1
)

# save hexes_df
hexes_df.to_csv(PATH_OUT + 'hexes.csv')

Dask Apply: 100%|██████████| 16/16 [00:33<00:00,  2.08s/it]
Dask Apply: 100%|██████████| 16/16 [00:55<00:00,  3.49s/it]
Pandas Apply: 100%|██████████| 1709641/1709641 [00:29<00:00, 57887.34it/s]


In [13]:
hexes_df

Unnamed: 0_level_0,center,value,neighbors
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
885240acd9fffff,POINT (17.8961824010076 51.25719386424838),185,"[885241db65fffff, 885240acd1fffff, 885240acdbf..."
885256c637fffff,POINT (16.79759787357567 47.22245680002387),177,"[885256c633fffff, 885256c45dfffff, 885256c459f..."
885251c9bdfffff,POINT (16.27404624365018 48.95542206754432),237,"[885251c9abfffff, 885251c987fffff, 885251c9b9f..."
885260566dfffff,POINT (20.56098346523435 50.79819521510542),226,"[885260cdbbfffff, 885260cdb3fffff, 8852605665f..."
8863adc0a3fffff,POINT (13.57264222972097 55.0287826928452),9999,"[8863adc0a1fffff, 8863adce49fffff, 8863adc0bdf..."
...,...,...,...
88535a2831fffff,POINT (20.86569124443697 48.25892511831216),225,"[88535a2835fffff, 88535a283bfffff, 88535a283df..."
8863a01999fffff,POINT (13.88838576022147 52.53377500903377),68,"[8863a01991fffff, 8863a018a5fffff, 8863a0199bf..."
8863849295fffff,POINT (12.77036712139547 52.23874374043172),56,"[8863849297fffff, 8863849291fffff, 88638492b9f..."
88520b7267fffff,POINT (16.5646954418298 46.30323703474963),149,"[88520b7263fffff, 88520b722dfffff, 88520b7261f..."


In [14]:
# create weighted non-directional graph for all combinations of neighboring hexes

from ast import literal_eval

hexes_df = gpd.read_file(
    PATH_OUT + 'hexes.csv'
)
hexes_df['center'] = hexes_df['center'].apply(wkt.loads)
hexes_df['neighbors'] = hexes_df['neighbors'].apply(literal_eval)
hexes_df["value"] = pd.to_numeric(hexes_df["value"])
hexes_df.set_index('id', inplace=True)

g = nk.Graph(directed=False, weighted=True)

hexes_df['node'] = hexes_df.apply(
    lambda x: g.addNode(),
    axis = 1
)

# node_id -> hex_id shorthand dictionary
node_to_hex = {node_id: hex_id for (hex_id, node_id) in hexes_df[['node']].itertuples()}

for (hex_from_id, value_from, node_from, neighbors) in hexes_df[['value', 'node', 'neighbors']].itertuples():
    for hex_to_id in neighbors:
        if hex_to_id in hexes_df.index:
            node_to = hexes_df.at[hex_to_id, 'node']
            value_to = hexes_df.at[hex_to_id, 'value']
            g.addEdge(node_from, node_to, calculate_path_time(value_from, value_to, hex_distance))

g.indexEdges()

In [15]:
# run dijkstra algorithm for all destinations
destinations['hex_id'] = destinations.apply(
    lambda x: h3.geo_to_h3(x['geometry'].x, x['geometry'].y, CONST_graph_detail_lvl),
    axis=1
)
destinations['node_id'] = destinations.apply(
    lambda x: hexes_df.at[x['hex_id'], 'node'],
    axis=1
)

In [16]:
# calculate hex id for each destination

paths = []

for (id_from, name_from, hex_from, node_from) in destinations[['name', 'hex_id', 'node_id']].itertuples():

    print('finding paths for destinations {}/{} ({}%)'.format(id_from, len(destinations), int(id_from / len(destinations) * 100)))

    dij = nk.distance.Dijkstra(g, node_from, True, False)
    dij.run()
    for (id_to, name_to, hex_to, node_to) in destinations[['name', 'hex_id', 'node_id']].itertuples():
         

        dist = dij.distance(node_to)
        path = dij.getPath(node_to)
        path_hexes = LineString([hexes_df.at[node_to_hex[i], 'center'] for i in path])

        paths.append(
            {
                "from": name_from,
                "to": name_to,
                "dist": dist,
                "geometry": path_hexes
            }
        )


finding paths for destinations 0/126 (0%)
finding paths for destinations 1/126 (0%)
finding paths for destinations 2/126 (1%)
finding paths for destinations 3/126 (2%)
finding paths for destinations 4/126 (3%)
finding paths for destinations 5/126 (3%)
finding paths for destinations 6/126 (4%)
finding paths for destinations 7/126 (5%)
finding paths for destinations 8/126 (6%)
finding paths for destinations 9/126 (7%)
finding paths for destinations 10/126 (7%)
finding paths for destinations 11/126 (8%)
finding paths for destinations 12/126 (9%)
finding paths for destinations 13/126 (10%)
finding paths for destinations 14/126 (11%)
finding paths for destinations 15/126 (11%)
finding paths for destinations 16/126 (12%)
finding paths for destinations 17/126 (13%)
finding paths for destinations 18/126 (14%)
finding paths for destinations 19/126 (15%)
finding paths for destinations 20/126 (15%)
finding paths for destinations 21/126 (16%)
finding paths for destinations 22/126 (17%)
finding pat

In [17]:
# export paths

# load paths
paths_df = gpd.GeoDataFrame(paths, crs="epsg:4326")

# simplify geometries
paths_df['geometry'] = paths_df.apply(
      lambda x: wkt.loads(
          wkt.dumps(
              x['geometry'].simplify(0.01, preserve_topology=True), 
              rounding_precision=3
            )
        ),
      axis=1
  )
#paths_df.to_file(PATH_OUT + 'paths.shp', driver="ESRI Shapefile", encoding="utf-8")
paths_df.to_file(PATH_OUT + 'paths.geojson', driver="GeoJSON")

In [18]:
# create a destination metrix table

paths_text = "".join([line.strip() for line in open(PATH_OUT + 'paths.geojson')])
paths_dict = json.loads(paths_text)

origins = {}
for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from'] 
    origins[orig] = {'origin': orig}

for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from']
    dest = feat['properties']['to']
    dist = feat['properties']['dist']
    origins[orig][dest] = dist

dist_df = pd.DataFrame(origins.values())


dist_df.set_index('origin', inplace=True)
dist_df.to_csv(PATH_OUT + 'dist_m.csv')

print(dist_df.at['Brno', 'Praha'])
print(dist_df.at['Praha', 'Brno'])


9.298953769272481
9.298953769272513
