In [79]:
'''
03_create-paths.ipynb
Create a dataset of shortest paths between all destinations
'''

import requests
import urllib.request
import pathlib
import os
import shutil
import subprocess
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import nearest_points, unary_union
import math
import swifter

import json
import geojson
import gdal
import h3

import networkit as nk

import multiprocessing
from joblib import Parallel, delayed
num_cores = multiprocessing.cpu_count()

PATH_ROOT = os.path.join(pathlib.Path().absolute(), '../..' )
PATH_IN = PATH_ROOT + '/data/02_processed/'
PATH_OUT = PATH_ROOT + '/data/03_paths/'

# constants
CONST_graph_detail_lvl = 8
CONST_speed_day_km = 40
CONST_slope_effect_multiplier = 2
CONST_distance_bridge = 10 / 111
CONST_river_value = 9999

# critical slope algorithm
CONST_critical_slope = 4

# for tobler application
CONST_elevation_coefficient = 1

def slope_coeff(slope):
    #return math.exp(-3.5 * abs(slope + 0.05)) ** CONST_elevation_coefficient
    #rise = (value_to - value_from) / 1000
    #slope = rise / hex_distance
    return 1 / (1 + ((abs(slope) * 100) / CONST_critical_slope) ** 2)

def calculate_path_time(value_from, value_to, dist):
    rise = (value_to - value_from) / 1000
    slope = rise / dist
    slope_c = slope_coeff(slope)
    #print(slope_c)
    time = CONST_speed_day_km * abs(slope_c ** CONST_slope_effect_multiplier)

    el_dist = math.sqrt((rise ** 2) + (dist ** 2))
    path_time = (el_dist / time)

    #print(value_to, value_from, slope, time, el_dist, path_time)
    
    return path_time

#print(calculate_path_time(100, 300, 2))
#print(calculate_path_time(100, 200, 1))
print(calculate_path_time(50, 200, 10))
print(calculate_path_time(200, 50, 10))

crs4326 = {'init': 'epsg:4326'}
hex_distance = 2 * h3.edge_length(CONST_graph_detail_lvl)
print('hex_distance', hex_distance)

# get distance of hexes in grades
CONST_hex_dist = hex_distance / 111

'''
Load datasets
'''

destinations = gpd.read_file(PATH_IN + 'destinations.geojson')

rivers_df = gpd.read_file(PATH_IN + 'rivers.geojson') 
rivers = unary_union(
    [river['geometry'] for ri, river in rivers_df.iterrows()]
)

bridges_df = gpd.read_file(PATH_IN + 'bridges.geojson')
bridges = unary_union(
    [bridge['geometry'].buffer(hex_distance / 222) for bi, bridge in bridges_df.iterrows()]
)

bbox = gpd.read_file(PATH_IN + 'bbox.geojson') 
bb_xy = bbox.total_bounds

bounds_json = dict(
    geojson.Polygon(
        [[
            [bb_xy[0], bb_xy[1]],
            [bb_xy[0], bb_xy[3]],
            [bb_xy[2], bb_xy[3]],
            [bb_xy[2], bb_xy[1]],
            [bb_xy[0], bb_xy[1]],
        ]]
    ))


0.3252929369373301
0.3252929369373301
hex_distance 0.922709368


In [80]:
# load elevation

elevation = gdal.Open(PATH_IN + 'elevation.tif')
band = elevation.GetRasterBand(1)
cols = elevation.RasterXSize
rows = elevation.RasterYSize

transform = elevation.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = -transform[5]

elevation_data = band.ReadAsArray(0, 0, cols, rows)

# check whether there is river or a bridge
def path_value_point (hex_center):
    #return elevation_point(hex_center)
    #on_river = hex.intersects(rivers)
    on_river = hex_center.distance(rivers) < CONST_hex_dist / 2
    if on_river: 
        on_bridge = hex_center.distance(bridges) < CONST_hex_dist / 2
        if not on_bridge:
            value = CONST_river_value
        else:
            value = elevation_point(hex_center)
    else:
        value = elevation_point(hex_center)
    return value

# get elevation value for the given geographical point
def elevation_point (point):
    row = int((yOrigin - point.y ) / pixelHeight)
    col = int((point.x - xOrigin) / pixelWidth)
    
    if elevation_data.shape[0] > row and elevation_data.shape[1] > col:
        el_value = int(elevation_data[row][col])
        if el_value <= 0: # sea
            return CONST_river_value
        else:
            return el_value
    else:
        return CONST_river_value

In [81]:
# construct hexes dataframe

hex_ids = list(h3.polyfill(bounds_json, CONST_graph_detail_lvl))

hexes_df = pd.DataFrame(hex_ids, columns=['id'])
hexes_df['center'] = hexes_df.swifter.apply(
    lambda x: Point(h3.h3_to_geo(x['id'])),
    axis = 1
)
hexes_df["value"] = hexes_df.swifter.apply(
    lambda x: path_value_point(x['center']),
    axis = 1
)
hexes_df.set_index('id', inplace=True)

# find neighboring hexes
hexes_df['neighbors'] = hexes_df.swifter.apply(
    lambda x: \
        [
            h3.get_destination_h3_index_from_unidirectional_edge(n) 
            for n in h3.get_h3_unidirectional_edges_from_hexagon(x.name)\
        ],
    axis = 1
)

# save hexes_df
hexes_df.to_csv(PATH_OUT + 'hexes.csv')

Pandas Apply: 100%|██████████| 1711237/1711237 [01:23<00:00, 20616.54it/s]
Pandas Apply: 100%|██████████| 1711237/1711237 [05:09<00:00, 5530.55it/s]
Pandas Apply: 100%|██████████| 1711237/1711237 [02:07<00:00, 13391.96it/s]


In [82]:
hexes_df

Unnamed: 0_level_0,center,value,neighbors
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8863abc46dfffff,POINT (11.98753205650361 54.6062090099005),0,"[8863abc717fffff, 8863abc733fffff, 8863abc469f..."
885243744bfffff,POINT (16.63318451837275 51.12748356732553),133,"[8852437637fffff, 885243745dfffff, 8852437441f..."
8863120921fffff,POINT (14.35346628184218 54.06483614073171),0,"[8863120929fffff, 8863120927fffff, 886312092df..."
88525078a5fffff,POINT (16.02774787360705 48.25449179193012),222,"[88525078a7fffff, 88525078a1fffff, 88525079d3f..."
8843b232a9fffff,POINT (21.13037741250674 53.95912358150742),140,"[8843b23285fffff, 8843b232a1fffff, 8843b232e3f..."
...,...,...,...
885240c205fffff,POINT (18.08612404648967 50.5061869149276),227,"[885240c207fffff, 885240c229fffff, 885240c201f..."
8852564acdfffff,POINT (16.33577573641552 46.98197703675595),9999,"[8852564127fffff, 8852564ac9fffff, 8852564a1bf..."
8852440357fffff,POINT (18.85013231874983 50.54521500297682),294,"[8852440319fffff, 885244030bfffff, 8852440355f..."
8863a2b8e7fffff,POINT (13.34503156747253 52.17901731590549),37,"[8863a2b8adfffff, 8863a2b8e1fffff, 8863a2b8e5f..."


In [83]:
# create weighted non-directional graph for all combinations of neighboring hexes

from ast import literal_eval

hexes_df = gpd.read_file(
    PATH_OUT + 'hexes.csv'
)
hexes_df['center'] = hexes_df['center'].apply(wkt.loads)
hexes_df['neighbors'] = hexes_df['neighbors'].apply(literal_eval)
hexes_df["value"] = pd.to_numeric(hexes_df["value"])
hexes_df.set_index('id', inplace=True)

g = nk.Graph(directed=False, weighted=True)

hexes_df['node'] = hexes_df.apply(
    lambda x: g.addNode(),
    axis = 1
)

# node_id -> hex_id shorthand dictionary
node_to_hex = {node_id: hex_id for (hex_id, node_id) in hexes_df[['node']].itertuples()}

for (hex_from_id, value_from, node_from, neighbors) in hexes_df[['value', 'node', 'neighbors']].itertuples():
    for hex_to_id in neighbors:
        if hex_to_id in hexes_df.index:
            node_to = hexes_df.at[hex_to_id, 'node']
            value_to = hexes_df.at[hex_to_id, 'value']
            g.addEdge(node_from, node_to, calculate_path_time(value_from, value_to, hex_distance))

g.indexEdges()

In [84]:
# run dijkstra algorithm for all destinations

destinations['hex_id'] = destinations.apply(
    lambda x: h3.geo_to_h3(x['geometry'].x, x['geometry'].y, CONST_graph_detail_lvl),
    axis=1
)
destinations['node_id'] = destinations.apply(
    lambda x: hexes_df.at[x['hex_id'], 'node'],
    axis=1
)


# def calculate_paths(hex_id):
#     dij = nk.distance.Dijkstra(g, hexes_df.at[hex_id, 'node'], True, False)
#     dij.run()
#     return dij

# destinations['dij'] = destinations.swifter.apply(
#     lambda x: calculate_paths(x['hex_id']),
#     axis=1
# )
#destinations.dtypes

In [85]:
# calculate hex id for each destination

paths = []

for (id_from, name_from, hex_from, node_from) in destinations[['name', 'hex_id', 'node_id']].itertuples():

    print('finding paths for destinations {}/{} ({}%)'.format(id_from, len(destinations), int(id_from / len(destinations) * 100)))

    dij = nk.distance.Dijkstra(g, node_from, True, False)
    dij.run()
    for (id_to, name_to, hex_to, node_to) in destinations[['name', 'hex_id', 'node_id']].itertuples():
         

        dist = dij.distance(node_to)
        path = dij.getPath(node_to)
        path_hexes = LineString([hexes_df.at[node_to_hex[i], 'center'] for i in path])

        paths.append(
            {
                "from": name_from,
                "to": name_to,
                "dist": dist,
                "geometry": path_hexes
            }
        )


finding paths for destinations 0/132 (0%)
finding paths for destinations 1/132 (0%)
finding paths for destinations 2/132 (1%)
finding paths for destinations 3/132 (2%)
finding paths for destinations 4/132 (3%)
finding paths for destinations 5/132 (3%)
finding paths for destinations 6/132 (4%)
finding paths for destinations 7/132 (5%)
finding paths for destinations 8/132 (6%)
finding paths for destinations 9/132 (6%)
finding paths for destinations 10/132 (7%)
finding paths for destinations 11/132 (8%)
finding paths for destinations 12/132 (9%)
finding paths for destinations 13/132 (9%)
finding paths for destinations 14/132 (10%)
finding paths for destinations 15/132 (11%)
finding paths for destinations 16/132 (12%)
finding paths for destinations 17/132 (12%)
finding paths for destinations 18/132 (13%)
finding paths for destinations 19/132 (14%)
finding paths for destinations 20/132 (15%)
finding paths for destinations 21/132 (15%)
finding paths for destinations 22/132 (16%)
finding path

In [86]:
# export paths

paths_df = gpd.GeoDataFrame(paths, crs="epsg:4326")
# simplify geometries
paths_df['geometry'] = paths_df.apply(
      lambda x: wkt.loads(
          wkt.dumps(
              x['geometry'].simplify(0.01, preserve_topology=True), 
              rounding_precision=3
            )
        ),
      axis=1
  )
#paths_df.to_file(PATH_OUT + 'paths.shp', driver="ESRI Shapefile", encoding="utf-8")
paths_df.to_file(PATH_OUT + 'paths.geojson', driver="GeoJSON")

ERROR:fiona._env:/home/adam/projects/itinerary-analysis_premysl-otakar/src/processing/../../data/03_paths/paths.geojson: No such file or directory


In [87]:
# create a destination metrix table

paths_text = "".join([line.strip() for line in open(PATH_OUT + 'paths.geojson')])
paths_dict = json.loads(paths_text)

origins = {}
for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from'] 
    origins[orig] = {'origin': orig}

for fi, feat in enumerate(paths_dict['features']):
    orig = feat['properties']['from']
    dest = feat['properties']['to']
    dist = feat['properties']['dist']
    origins[orig][dest] = dist

dist_df = pd.DataFrame(origins.values())


dist_df.set_index('origin', inplace=True)
dist_df.to_csv(PATH_OUT + 'dist_m.csv')

print(dist_df.at['Brno', 'Praha'])
print(dist_df.at['Praha', 'Brno'])


9.614468463881705
9.614468463881702
