# Elevation Cleaning
- Resample bridge deck elevation from labelled point cloud data if points exceed grade change and link is a bridge
- For underpass or tunnel links that intersect with bridge links, remove points representing elevated section

### Updated: 4/22/24:
Instead of using the network graph version, use the full network version. This means that when finally assigning the elevation stats, we'll need to interpolate using the dictionary results

# Imports

In [None]:
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import geometry_mask
from rasterio.plot import show
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from shapely.geometry import box, mapping
from shapely.ops import Point
from tqdm import tqdm
import pyproj
import math
from shapely.ops import LineString
import pickle
import contextily as cx

from whittaker_eilers import WhittakerSmoother

import src.elevation_tools as elevation_tools

In [None]:
import json
config = json.load((Path.cwd().parent / 'config.json').open('rb'))
network_fp = Path(config['project_directory']) / 'OSM_Download'
export_fp = Path(config['project_directory']) / 'Network'
if network_fp.exists() == False:
    network_fp.mkdir()
config

## Import non-network version of osm

In [None]:
#import network
network_filepath= Path(config['project_directory']) / 'Network'
links = gpd.read_file(Path(config['project_directory']+f"/OSM_Download/osm_{config['geofabrik_year']}.gpkg"),layer='raw')

## Reproject network to DEM crs

In [None]:
prev_crs = links.crs
tiff_links = list((Path(config['usgs']) / 'dem_files').glob('*.tif'))

#open the first one to just get the crs
src = rasterio.open(tiff_links[0])
dem_crs = src.crs
src.close()

links.to_crs(dem_crs,inplace=True)

## Import sampled elevation data

In [None]:
#for storing the interpolated points with sampled elevation data
with (network_filepath/'elevation.pkl').open('rb') as fh:
    interpolated_points_dict = pickle.load(fh)

# Selected Examples

Local road that I know has steep grades:

In [None]:
linkid = 340365816
grade_threshold = 15
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

Northside Drive as "tertiary" road with a small segment above 15%

In [None]:
linkid = 352003174
grade_threshold = 15
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
linkid = 44097075
grade_threshold = 30
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

BeltLine

In [None]:
linkid = 226119768
grade_threshold = 15
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

In [None]:
linkid = 741964053
grade_threshold = 30
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

Circular golf course loop

In [None]:
linkid = 1087991070
elevation_tools.visualize(links,dem_crs,interpolated_points_dict,[linkid],grade_threshold,None,config['maptilerapikey'],one_off=True)

# Point Knockout
- Calculate segment grade changes using the sampling distance
- Define a segment grade threshold by OSM highway type (e.g., motorway, local, secondary, etc.)
    - Highways/Interstates > 8%
    - Most local roads > 15%
    - Few local roads > 25%
- Knockout all grade changes that exceed threshold
- Repeat until no grade changes are above threshold
    - If this removes too many points 
- Spline fit on the remaining data for the interpolation step

Hongyu Method:
- Find first grade change (ascent or descent) exceeding threshold
- Search 30m, 100m, or 150m after and find the last opposite grade change exceeding threshold
- Remove first to last point
- Infill with the spline fit



## First, set elevation points to nan for large grade changes (> 30%)

In [None]:
import importlib
from importlib import reload

In [None]:
reload(elevation_tools)
import src.elevation_tools as elevation_tools

In [None]:
large_threshold = 30

#indentifies which links exceed threshold
exceeds_1 = elevation_tools.exceeds_threshold(links['osmid'].tolist(),interpolated_points_dict,large_threshold)

#exceeds_threshold[['id','highway','tunnel','bridge','geometry']].explore(popup=True)

#knockout points
for linkid in tqdm(exceeds_1):
    item = interpolated_points_dict.get(linkid,0)
    elevation_tools.point_knockout(item,grade_threshold)
    interpolated_points_dict[linkid] = item


## Set different grade thresholds by the assigned lables
Lists are collectively exhaustive of all the highway tags in the study area.

In [None]:
#initialize the label field
links['label'] = None

#tunnel
links.loc[links['tunnel'].notna() & links['label'].isna(),'label'] = 'tunnel'

#bridge
links.loc[links['bridge'].notna() & links['label'].isna(),'label'] = 'bridge'

#motorway/major arterials
motorway = ['motorway','motorway_link',
            'trunk', 'trunk_link',
            'primary','primary_link',
            'secondary','secondary_link',
            'raceway', 'proposed']
links.loc[links['highway'].isin(motorway) & links['label'].isna(),'label'] = 'motorway'

#local/service roads
local = ['tertiary','tertiary_link','residential','service', 'unclassified','living_street']
links.loc[links['highway'].isin(local) & links['label'].isna(),'label'] = 'local'

#pedestrian paths/steps may not follow grade thresholds
ped = ['path','footway','track','pedestrian','cycleway','platform']
links.loc[links['highway'].isin(ped) & links['label'].isna(),'label'] = 'ped'

#ways where grade doesn't matter
exclude = ['steps','construction','disused', 'corridor','services']
links.loc[links['highway'].isin(exclude) & links['label'].isna(),'label'] = 'exclude'

# links[links['label'].isna()]
# links['highway'].nunique()#.tolist()

In [None]:
links['label'].unique()

Knockout points on motorways

In [None]:
motorway_threshold = 8
motorway_ids = links.loc[links['label']=='motorway','osmid'].tolist()

#indentifies which links exceed threshold
exceeds_2 = elevation_tools.exceeds_threshold(motorway_ids,interpolated_points_dict,motorway_threshold)

#exceeds_threshold[['id','highway','tunnel','bridge','geometry']].explore(popup=True)

#knockout points
for linkid in tqdm(exceeds_2):
    item = interpolated_points_dict.get(linkid,0)
    elevation_tools.point_knockout(item,motorway_threshold)
    interpolated_points_dict[linkid] = item

Knockout points on local roads and other facilities

In [None]:
local_threshold = 20
local_ids = links.loc[links['label']!='highway','osmid'].tolist()

#indentifies which links exceed threshold
exceeds_2 = elevation_tools.exceeds_threshold(local_ids,interpolated_points_dict,local_threshold)

#exceeds_threshold[['id','highway','tunnel','bridge','geometry']].explore(popup=True)

#knockout points
for linkid in tqdm(exceeds_2):
    item = interpolated_points_dict.get(linkid,0)
    elevation_tools.point_knockout(item,local_threshold)
    interpolated_points_dict[linkid] = item

# Sample bridge decks (use lidar here)
Need to fix this

Import lidar processed lidar points

In [None]:
with Path(config['usgs']+'/lidar_points.pkl').open('rb') as fh:
    lidar_points = pickle.load(fh)
lidar_points.to_crs(dem_crs,inplace=True)
spatial_index = lidar_points.sindex

In [None]:
lidar_found = []

for linkid in tqdm(links.loc[links['label']=='bridge','osmid'].tolist()):
    
    item = interpolated_points_dict.get(linkid)

    geometry = [Point(x,y) for x,y in item['geometry']]
    gdf = gpd.GeoDataFrame({'geometry':geometry},crs=dem_crs)

    #buffer the data
    buffer_m = 20
    gdf.geometry = gdf.buffer(buffer_m)

    #get the gdf bounding box
    polygon = gdf.geometry.unary_union.convex_hull
    
    #use spatial index to only select a small number of points
    possible_matches_index = list(spatial_index.intersection(polygon.bounds))
    possible_matches = lidar_points.iloc[possible_matches_index]
    
    #add an index column for the overlay part
    gdf.reset_index(inplace=True)
    precise_matches = gpd.overlay(possible_matches,gdf,how='intersection')

    #if no matches found take no action
    if len(precise_matches) == 0:
        continue
    else:
        lidar_found.append(linkid)

    #take average of all nearby values
    new_values = precise_matches.groupby('index')['elevation_m'].mean()
    gdf['new_elevation_m'] = gdf['index'].map(new_values)
    new_values = np.array(gdf['new_elevation_m'])

    no_lidar = np.isnan(new_values).sum()

    #use nanmax
    new_values = np.nanmax([new_values,item['elevations']],axis = 0)

    #output = elevation_tools.elevation_stats(item['distances'],new_values,grade_threshold)

    #replace existing values
    interpolated_points_dict[linkid]['elevations'] = new_values

In [None]:
del lidar_points

In [None]:
len(lidar_found)

# Spline Fit
For all the links, fit a spline for the next step.

In [None]:
#spline fit
import numpy as np
from scipy.interpolate import splrep, splev, BSpline

too_short = [ ]

for key, item in tqdm(interpolated_points_dict.items()):
    df = pd.DataFrame({'distance':item['distances'],'elevation':item['elevations']})

    #remove na values
    df = df[df.notna().all(axis=1)]

    #in this case, just do linear interpolation between the two values
    if df.shape[0] <= 3:
        too_short.append(key)
        continue

    #fit a spline
    spline = splrep(df['distance'], df['elevation'], s=0.5)

    #add spline to dict
    interpolated_points_dict[key]['spline'] = spline

In [None]:
# 9245034 goes under a railraod track

In [None]:
# # Add a check here
# import random

# osmid = 751119047#random.choice(list(interpolated_points_dict.keys()))
# item = interpolated_points_dict[osmid]

# spline = item.get('spline',0)
# xs = item.get('distances',0)
# elevations = item.get('elevations',0)

# #get new elevation values
# new_xs = np.arange(0,xs[-1],1)
# new_elevations = splev(new_xs, spline)

# fig, ax = plt.subplots()
# ax.plot(xs,elevations,'-')
# ax.plot(new_xs,new_elevations,'-.')

In [None]:
with (export_fp / "spline_fit_elevation.pkl").open('wb') as fh:
    pickle.dump(interpolated_points_dict,fh)

# Move on to step 7

<!-- ## Find underpasses/tunnels and knockout elevated segments
- Should have a distinct section that's high above everything else
- Need to also bring in railroads
- Can also be tagged as tunnels
# #grab ones that are not bridges and don't exceed the threshold
# not_bridges = exceeds_threshold.loc[exceeds_threshold['bridge'].isna(),['id','geometry']]
# print(len(not_bridges),'of thse are not tagged as bridges')
# bridges = links.loc[~links['bridge'].isna(),['id','geometry']]
Check if bridge crosses non-bridge (won't include where a link connects to a bridge)

# crossing_links = []
# for idx, row in bridges.iterrows():
#     bridge = row['geometry']
#     crosses = not_bridges.loc[not_bridges.crosses(bridge),'id'].tolist()
#     if len(crosses) > 0:
#         crossing_links = crossing_links + crosses
# mask = list(set(crossing_links))

## Smooth with [Whittaker-Eilers Method](https://towardsdatascience.com/the-perfect-way-to-smooth-your-noisy-data-4f3fe6b44440)

Need fitted function for interpolation
# for linkid, item in interpolated_points_dict.items():
    
#     distances = np.array(item['distances'])
    
#     if linkid in lidar_found:
#         elevations = np.array(item['lidar'])
#     else: 
#         elevations = np.array(item['elevations'])

#     whittaker_smoother = WhittakerSmoother(
#     lmbda=150, order=2, data_length=len(elevations)
#     )

#     smoothed = whittaker_smoother.smooth(elevations)

#     output = elevation_tools.elevation_stats(distances,smoothed,grade_threshold)

#     #assign new entry in the dictionary
#     interpolated_points_dict[linkid].update({
#         'smoothed': smoothed,
#         'smoothed_ascent': output['ascent'],
#         'smoothed_descent': output['descent'], 
#         'smoothed_ascent_grade': output['ascent_grade'],
#         'smoothed_descent_grade': output['descent_grade'],
#         'smoothed_bad_ascent_grades': output['bad_ascent_grades'],
#         'smoothed_bad_descent_grades': output['bad_descent_grades'],
#         'smoothed_segment_grades': output['segment_grades']
#     })

# #for storing the interpolated points with sampled elevation data
# with (export_fp/'smoothed_elevation.pkl').open('wb') as fh:
#     pickle.dump(interpolated_points_dict,fh)
## Measuring Grade
Length of grade + grade matters. Grade is change in elevation over a length of road. 
- Grade can be averaged over the entire link (seperating up and down)
- Grade can be averaged over the just the section of up or down (expected grade going uphill)
- Grade can be categorized (>3%, >6,% >10,etc) and the length in each category could be calculated

If we're just concerned about the impact on travel time then an average value is probably fine, but if we're more concerned about a preference then categorizing and finding the lengths of each is probably more useful.

In route choice literature, they're either look at the average grade of the entire route (Hood 2011, Prato 2018) or break it into categories (proportion of the route 2-4% grade) (Broach 2012). Since we're estimating link level impedances, we can be flexible and avoid taking averages if desired.

Broach:
- 2-4%
- 4-6%
- more than 6% -->