# Elevation Cleaning and Assignment to Links
Future features if there was time:
- Add in more topology checks to identify bridges
    - see if road crosses tunnel/river/railroad/etc

In [None]:
import geopandas as gpd
import pandas as pd
import rasterio
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from shapely.geometry import box, mapping
from shapely.ops import Point
from tqdm import tqdm
import pickle

from bikewaysim.paths import config
from bikewaysim.network import elevation_tools

## Import non-network version of osm

In [None]:
#import network
links = gpd.read_file(config['osmdwnld_fp'] / f"osm.gpkg",layer='raw')

#reproject network to DEM crs
with (config['network_fp']/'dem_crs.txt').open('r') as fh:
    dem_crs = fh.read()
links.to_crs(dem_crs,inplace=True)

## Import sampled elevation data

In [None]:
#for storing the interpolated points with sampled elevation data
with (config['network_fp']/'elevation_w_lidar.pkl').open('rb') as fh:
    interpolated_points_dict = pickle.load(fh)

## Remove links if no elevation data found

In [None]:
#some below zero elevations near the airport
error = []
for linkid, item in interpolated_points_dict.items():
    if (item['elevations'] < 0).any():
        error.append(linkid)
len(error)
# links[links['osmid'].isin(error)].explore()

In [None]:
interpolated_points_dict = {key:item for key,item in interpolated_points_dict.items() if key not in error}
links = links[links['osmid'].isin(interpolated_points_dict.keys())]

In [None]:
max_elev = np.max(np.array([item['elevations'].max() for key, item in interpolated_points_dict.items()]))
min_elev = np.min(np.array([item['elevations'].min() for key, item in interpolated_points_dict.items()]))
print('Max Elevation:',max_elev,'m','Min Elevation:',min_elev,'m')

In [None]:
# many of the low elevation points near rivers
# min_elevs = [key for key, item in interpolated_points_dict.items() if any(item['elevations'] < 250) ]
# links[links['osmid'].isin(min_elevs)].explore()

# Selected Examples

In [None]:
# linkid = 79424672
# grade_threshold = 10
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
# linkid = 42106698
# grade_threshold = 20
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
# linkid = 26800206
# grade_threshold = 20
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
# interpolated_points_dict[linkid] = elevation_tools.point_knockout(interpolated_points_dict[linkid],8)
# interpolated_points_dict[linkid]['elevations']
# linkid = 26800206
# grade_threshold = 8
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)
# x = interpolated_points_dict[linkid]['distances']
# y = interpolated_points_dict[linkid]['elevations']
# test = elevation_tools.elevation_stats(x,y,80)
# test.keys()
# test['descent_grade']
# test['bad_ascent_grades']
# test['bad_descent_grades']
# test['distance_deltas']

<!-- Local road that I know has steep grades: -->

In [None]:
# linkid = 340365816
# grade_threshold = 15
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True,lidar=True)

<!-- Northside Drive as "tertiary" road with a small segment above 15% -->

In [None]:
# linkid = 352003174
# grade_threshold = 15
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True,lidar=True)

<!-- Unpaved trail -->

In [None]:
# linkid = 1087991070
# grade_threshold = 30
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

<!-- BeltLine -->

In [None]:
# linkid = 226119768
# grade_threshold = 15
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
# linkid = 741964053
# grade_threshold = 30
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

<!-- Circular golf course loop -->

In [None]:
# linkid = 1087991070
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
# linkid = 569529892
# grade_threshold = 4
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True,lidar=True)

# Replace dem elevation values with lidar where available

In [None]:
for key, item in interpolated_points_dict.items():
    if 'lidar' in item.keys():
        lidar_elev = item['lidar']
        dem_elev = item['elevations']
        new_elev = [a if not np.isnan(a) else b for a, b in zip(lidar_elev,dem_elev)]
        interpolated_points_dict[key]['elevations'] = np.array(new_elev)

# Point Knockout
- Calculate segment grades (rise/10m or rise/length if link length was less than 10m)
- Define a segment grade threshold by OSM highway type (e.g., motorway, local, secondary, etc.)
    - Highways/Interstates > 8%
    - Most local roads > 15%
    - Few local roads > 25%
- Knockout elevations where the associated grade changes exceeds the threshold
- Repeat until no grade changes are above the set threshold
    - If threshold is too low this will remove too many points
    - Just start and end will be used 
- Spline fit on the remaining data for the interpolation step

How it differs from Hongyu's Method:
- Find first grade change (ascent or descent) exceeding threshold
- Search 30m, 100m, or 150m after and find the last opposite grade change exceeding threshold
- Remove first to last point
- Infill with the spline fit
- Our segments are too short for this but this approach could be explored in the future

## Define Grade Thresholds
Don't set too high of a threshold

In [None]:
grade_thresholds = {
    'tunnel': 8,
    'bridge': 8,
    'roads': 8,
    'local': 20,
    'bikeped': 20,
    'everything_else': 25
}

In [None]:
#initialize the label field
links['label'] = None

#tunnel
links.loc[links['tunnel'].notna() & links['label'].isna(),'label'] = 'tunnel'

#bridge
links.loc[links['bridge'].notna() & links['label'].isna(),'label'] = 'bridge'

#motorway/major arterials
motorway = ['motorway','motorway_link',
            'trunk', 'trunk_link',
            'primary','primary_link',
            'secondary','secondary_link',
            'raceway', 'proposed','tertiary','tertiary_link','service', 'unclassified','living_street']
links.loc[links['highway'].isin(motorway) & links['label'].isna(),'label'] = 'roads'

#local/service roads
local = ['residential']
links.loc[links['highway'].isin(local) & links['label'].isna(),'label'] = 'local'

#pedestrian paths/steps may not follow grade thresholds
bikeped = ['path','footway','pedestrian','cycleway']
links.loc[links['highway'].isin(bikeped) & links['label'].isna(),'label'] = 'bikeped'

#label everything else as exclude or place a high value
links.loc[links['label'].isna(),'label'] = 'everything_else'

In [None]:
links['label'].value_counts()

In [None]:
from importlib import reload
reload(elevation_tools)
for label, grade_threshold in grade_thresholds.items():
    #identify links with grades exceeding the threshold
    labelled_links = links.loc[links['label']==label,'osmid'].tolist()
    exceeds = elevation_tools.exceeds_threshold(labelled_links,interpolated_points_dict,grade_threshold)
    print(len(exceeds),'/',len(interpolated_points_dict),label,'links exceed the threshold')
    #for the links that exceed the threshold, do point knockout
    for linkid in tqdm(exceeds):
        item = interpolated_points_dict.get(linkid,0)
        item = elevation_tools.point_knockout(item,grade_threshold)
        interpolated_points_dict[linkid] = item
check = [key for key, item in interpolated_points_dict.items() if np.isnan(item['elevations']).any()]
print(len(check),'links had at least one point knocked out')

Check some of the results

In [None]:
import random
linkid = random.choice(check)
print(linkid)
print(interpolated_points_dict[linkid]['elevations'])
grade_threshold = 10
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

Export into QGIS to examine

In [None]:
export = {linkid:elevation_tools.simple_elevation_stats(item['distances'],item['elevations']) for linkid, item in interpolated_points_dict.items()}
export = pd.DataFrame.from_dict(export,orient='index')

df = pd.merge(links,export,left_on='osmid',right_index=True)
df['ascent_ft'] = df['ascent_m'] * 3.28084
df['descent_ft'] = df['descent_m'] * 3.28084

df['max_grade'] = np.max(np.abs(df[['ascent_grade_%','descent_grade_%']].values),axis=1)
gdf = gpd.GeoDataFrame(df,crs=dem_crs)
gdf.to_crs(config['projected_crs_epsg'],inplace=True)
gdf.to_file(Path.home()/'Downloads/scratch.gpkg',layer='raw_grade')

In [None]:
# gdf[(gdf['ascent_m']>100) | (gdf['descent_m']>100)].explore()

In [None]:
gdf.loc[gdf['max_grade']>20].shape[0]

# Spline Fit
For all the links, fit a spline for the next step.

In [None]:
# #spline fit
# import numpy as np
# from scipy.interpolate import splrep, splev, BSpline

# key = random.choice(list(interpolated_points_dict.keys()))
# item = interpolated_points_dict[key]

# too_short = [ ]

# df = pd.DataFrame({'distance':item['distances'],'elevation':item['elevations']})

# #remove na values
# df = df[df.notna().all(axis=1)]

# #in this case, just do linear interpolation between the two values
# # if df.shape[0] <= 3:
# #     too_short.append(key)
# #     continue

# #fit a spline
# spline = splrep(df['distance'], df['elevation'], s=0.5)

# #add spline to dict
# # interpolated_points_dict[key]['spline'] = spline

# #TODO add this feature
# #get smoothed elevations
# #get new elevation values
# # new_xs = np.arange(0,xs[-1],1)

# new_elevations = splev(item['distances'], spline)

# fig, ax = plt.subplots()
# ax.plot(df['distance'],df['elevation'],'-')
# ax.plot(item['distances'],new_elevations,'-.')
# ax.set_ylim(min_elev,max_elev)

# interpolated_points_dict[key]['smoothed'] = new_elevations

# grade_threshold = 15
# linkid = key
# elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True,lidar=True)

In [None]:
#spline fit
import numpy as np
from scipy.interpolate import splrep, splev, BSpline

too_short = [ ]

for key, item in tqdm(interpolated_points_dict.items()):
    df = pd.DataFrame({'distance':item['distances'],'elevation':item['elevations']})

    #remove na values
    df = df[df.notna().all(axis=1)]

    #in this case, just do linear interpolation between the two values
    if df.shape[0] <= 3:
        too_short.append(key)
        continue

    #fit a spline
    spline = splrep(df['distance'], df['elevation'], s=0.5)

    #add spline to dict
    interpolated_points_dict[key]['spline'] = spline

    #TODO add this feature
    #get smoothed elevations

In [None]:
print(len(too_short))

In [None]:
interpolated_points_dict[too_short[3]]

In [None]:
# links[links['osmid'].isin(too_short)].explore()

In [None]:
with (config['network_fp'] / "spline_fit_elevation.pkl").open('wb') as fh:
    pickle.dump(interpolated_points_dict,fh)

# Move on to step 7

In [None]:
# ## Find underpasses/tunnels and knockout elevated segments
# - Should have a distinct section that's high above everything else
# - Need to also bring in railroads
# - Can also be tagged as tunnels
# # #grab ones that are not bridges and don't exceed the threshold
# # not_bridges = exceeds_threshold.loc[exceeds_threshold['bridge'].isna(),['id','geometry']]
# # print(len(not_bridges),'of thse are not tagged as bridges')
# # bridges = links.loc[~links['bridge'].isna(),['id','geometry']]
# Check if bridge crosses non-bridge (won't include where a link connects to a bridge)

# # crossing_links = []
# # for idx, row in bridges.iterrows():
# #     bridge = row['geometry']
# #     crosses = not_bridges.loc[not_bridges.crosses(bridge),'id'].tolist()
# #     if len(crosses) > 0:
# #         crossing_links = crossing_links + crosses
# # mask = list(set(crossing_links))

# ## Smooth with [Whittaker-Eilers Method](https://towardsdatascience.com/the-perfect-way-to-smooth-your-noisy-data-4f3fe6b44440)

# Need fitted function for interpolation
# # for linkid, item in interpolated_points_dict.items():
    
# #     distances = np.array(item['distances'])
    
# #     if linkid in lidar_found:
# #         elevations = np.array(item['lidar'])
# #     else: 
# #         elevations = np.array(item['elevations'])

# #     whittaker_smoother = WhittakerSmoother(
# #     lmbda=150, order=2, data_length=len(elevations)
# #     )

# #     smoothed = whittaker_smoother.smooth(elevations)

# #     output = elevation_tools.elevation_stats(distances,smoothed,grade_threshold)

# #     #assign new entry in the dictionary
# #     interpolated_points_dict[linkid].update({
# #         'smoothed': smoothed,
# #         'smoothed_ascent': output['ascent'],
# #         'smoothed_descent': output['descent'], 
# #         'smoothed_ascent_grade': output['ascent_grade'],
# #         'smoothed_descent_grade': output['descent_grade'],
# #         'smoothed_bad_ascent_grades': output['bad_ascent_grades'],
# #         'smoothed_bad_descent_grades': output['bad_descent_grades'],
# #         'smoothed_segment_grades': output['segment_grades']
# #     })

# # #for storing the interpolated points with sampled elevation data
# # with (export_fp/'smoothed_elevation.pkl').open('wb') as fh:
# #     pickle.dump(interpolated_points_dict,fh)
# ## Measuring Grade
# Length of grade + grade matters. Grade is change in elevation over a length of road. 
# - Grade can be averaged over the entire link (seperating up and down)
# - Grade can be averaged over the just the section of up or down (expected grade going uphill)
# - Grade can be categorized (>3%, >6,% >10,etc) and the length in each category could be calculated

# If we're just concerned about the impact on travel time then an average value is probably fine, but if we're more concerned about a preference then categorizing and finding the lengths of each is probably more useful.

# In route choice literature, they're either look at the average grade of the entire route (Hood 2011, Prato 2018) or break it into categories (proportion of the route 2-4% grade) (Broach 2012). Since we're estimating link level impedances, we can be flexible and avoid taking averages if desired.

# Broach:
# - 2-4%
# - 4-6%
# - more than 6%