# Step 0: OSM Network Download
Start here. OSM data is downloaded from Geofabrik in the '.pbf' format to ensure that all tags are retrieved. The OSM data is then turned into network routing format. Lastly, OSM cycling infrastructure is extracted.

In [None]:
from pathlib import Path
import geopandas as gpd
import osmnx as ox
import json
from shapely.ops import LineString, Point
import pandas as pd
import numpy as np
import pickle

In [None]:
#custom
from bikewaysim.paths import config
from bikewaysim.network import osm_download_functions

# Download Geofabrik Extract(s)
Use `download_geofabrik` to download Geofabrik '.pbf' extract(s) for desired US state(s) for the specified year provided in 'YY' format. Type 'current' instead of year to get the most current extract. Alternatively, provide your own extract.

**NOTE:**
- If you change which year you used or downloaded a more current extract, be sure to delete the old ones
- If downloading multiple states put the names in all lowercase (full name) seperated by commas

## TODO:
- Get turn restrictions from the extracts
- Add a merging feature if multiple states are needed
- Finish .bat scripts

In [None]:
# osm_download_functions.download_geofabrik(config['geofabrik_state'],config['geofabrik_year'],config['geofabrik_fp'])

## Import / export the study area for osmium script

In [None]:
studyarea_geo = gpd.read_file(config['studyarea_fp']).to_crs('epsg:4326')

#export to geojson for the script steps
studyarea_geo.to_file(config['geofabrik_fp']/'studyarea.geojson')

# Osmium Scripts
In these next steps, we'll have to exit the notebook to run some lines of code on conda command line interface. A example ".bat" and ".sh" script have been prepared for running all the years/locations downloaded.

- Install osmium in its own conda environment:
    - `conda create --name osmium && conda install -c conda-forge osmium-tool`

- Change directory to where you downloaded the geofrabrik extracts:
    - On Windows: `chdir /D D:\RAW\OSM\Geofabrik_GA_Extracts`
    - On MacOS/Linux: `cd /D D:\RAW\OSM\Geofabrik_GA_Extracts`

- If windows run the osmium.bat script

- If mac/linux run the osmium.sh script (make sure to make them executable)

# Use OSMnx to process the raw '.osm' version into a network graph

In [None]:
G = ox.graph.graph_from_xml(config['geofabrik_fp']/'osm_data.osm',simplify=False,retain_all=False)
#simplify graph unless different osm ids
#can change columns to change this behavior (i.e., )
G = ox.simplification.simplify_graph(G, edge_attrs_differ=['osmid'])

# Experimenting with the consolidate intersections feature
# G_proj = ox.projection.project_graph(G,to_crs=config['projected_crs_epsg'])
# G_consolidate = ox.simplification.consolidate_intersections(G_proj,tolerance=25,rebuild_graph=True,reconnect_edges=True)
# nodes_consolidate, links_consolidate = ox.convert.graph_to_gdfs(G_consolidate)
# links_consolidate.to_file(Path.home()/'Downloads/consolidate.gpkg',layer='links')
# nodes_consolidate.to_file(Path.home()/'Downloads/consolidate.gpkg',layer='nodes')

# Convert graph to links
Note that OSMnx creates three columns to identify the new links: u, v, key.
- u: starting node
- v: ending node
- key: number assigned if are multiple links with the same u and v

In [None]:
links = ox.convert.graph_to_gdfs(G,nodes=False)
links.reset_index(inplace=True)

#project links
links.to_crs(config['projected_crs_epsg'],inplace=True)

#drop reverse links
links = links[links['reversed']==False]

# re-calculate the length of the links using the new geometry
links['length_ft'] = links.length

#remove loops as we can't use these for routing unless we split the self loop in half
print((links['u'] == links['v']).sum(),'self-loops in the network')
links = links[links['u'] != links['v']]

In [None]:
# examining sidewalks
# sidewalks = raw_links[raw_links['all_tags'].apply(lambda x: x.get('footway',None)).notna()]
# sidewalks = sidewalks[sidewalks['footway'] == 'sidewalk']
# sidewalks.explore()
# # links[links['highway']=='footway'].columns

# Create a raw gpkg version of the OSM data.

In [None]:
osm_extract = config['geofabrik_fp']/'osm_data.geojson'
#include these in the main dataframe columns
include_tags = ['@id','@timestamp','@version','@type',
                'highway','oneway','name',
                'bridge','tunnel',
                'cycleway','service',
                'footway','sidewalk',
                'bicycle','foot','access','area','surface']
#remove these from the all tags dict
remove_tags = ['@id','@timestamp','@version','@type']
raw_links, raw_nodes = osm_download_functions.import_raw_osm_from_geojson(osm_extract,include_tags,remove_tags)

# returns a dict of the node sequence for each way (used for elevation)
line_node_ids = osm_download_functions.get_way_node_seq(raw_links)

# deletes the node sequence from the all tags field
raw_links['all_tags'] = raw_links['all_tags'].apply(lambda x: {key:item for key,item in x.items() if key != '@way_nodes'})

# Filter the raw links to remove disconnected features and self-loops

In [None]:
raw_links = raw_links[raw_links['osmid'].isin(set(links['osmid'].tolist()))]

# Get start and end node distances
Get distance from start of OSM way for the start node and end node of each new OSM edge. In a few cases, the end point will start before the start point because the edge loops back on itself.

In [None]:
links = osm_download_functions.add_start_end_dists(links,raw_links,line_node_ids)

# Add attributes from raw links to osmnx links

In [None]:
# add attributes from the raw links
links = pd.merge(links[['u','v','osmid','length_ft','start_dist','end_dist','geometry']],raw_links.drop(columns=['geometry']),on="osmid")

# sort values so it's the same order everytime we import
links.sort_values(['u','v','osmid','length_ft'],inplace=True)

# assign a unique linkid in sequential order
links['linkid'] = range(0,len(links))


# Export

In [None]:
links.to_file(config['osmdwnld_fp']/f"osm.gpkg",layer='edges')
raw_links.to_file(config['osmdwnld_fp']/f'osm.gpkg',layer='raw')
raw_nodes.to_file(config['osmdwnld_fp']/f'osm.gpkg',layer='highway_nodes')

In [None]:
# #get the last two digits for year
# year = config['geofabrik_year'][-2::]
# extract_fp = list(Path(config['geofabrik_fp']).glob(f'*-{year}0101.osm.pbf'))[0]
# extract_fp
# fileyear = extract_fp.stem.split('-')[1][0:2]
# print(f"Processing 20{fileyear} data")
# osm = OSM(str(extract_fp), bounding_box=studyarea_geo)

# #OSMNX Simplification Next
# nodes, edges = osm.get_network(network_type='all',nodes=True)
# G = osm.to_graph(nodes,edges,graph_type='networkx')

# #simplify graph unless different osm ids
# #can change columns to change this behavior (i.e., )
# G = ox.simplification.simplify_graph(G, edge_attrs_differ=['osmid'])

# #remove directed links (do this seperately)
# #this doesn't seem to be working at the moment
# #G = ox.convert.to_undirected(G)

# #this will create an unsimplified graph
# nodes, links = ox.convert.graph_to_gdfs(G)
# del G

# links.reset_index(inplace=True)
# nodes.reset_index(drop=True,inplace=True)
#add attributes from raw?
# raw = gpd.read_file(config['osmdwnld_fp']/f"osm_{config['geofabrik_year']}.gpkg",layer="raw",ignore_geometry=True)
#raw.drop(columns=["length"],inplace=True)
# import sys
# sys.path.insert(0,str(Path.cwd().parent))
# from network.src import network_filter
# nodes['osm_N'] = nodes['osmid']
# import pandas as pd
# links = pd.merge(links[['osmid','geometry']],raw,on="osmid")
# links = network_filter.add_ref_ids(links,nodes,'osm')
# #remove loops can't use these for routing
# print((links['osm_A'] == links['osm_B']).sum(),'loops in the network')
# links = links[links['osm_A'] != links['osm_B']]
# import numpy as np
# non_reverse_link = []

# for row in links.itertuples():
#     a = row.osm_A
#     b = row.osm_B
#     node_ids = np.array(json.loads(row.all_tags)['@way_nodes'])

#     a_check = (node_ids == a).sum()
#     b_check = (node_ids == b).sum()

#     a_idx = (node_ids == a).argmax()
#     b_idx = (node_ids == b).argmax()
    
#     if a_idx < b_idx:
#         non_reverse_link.append(row.Index)
# len(non_reverse_link)
# links = links.loc[non_reverse_link]
# Remove duplicated links using the assigned start and end nodes and linkid
# # import sys
# # sys.path.append(config["code_directory"])
# # from network.src.network_filter import remove_directed_links

# # links = remove_directed_links(links,"u","v","osmid")
# links.drop(columns=['osm_A','osm_B'],inplace=True)
# nodes.drop(columns=['osm_N'],inplace=True)

#deprecated version that use pyrosm but ditched becuase it edited the osm geometry
# extract_fps = list(Path(config['geofabrik_fp']).glob('*.osm.pbf'))
# extract_fps
# osm = OSM(str(extract_fps[-2]), bounding_box=studyarea_geo)

# #6 mins for current network
# #older versions take less time
# raw_edges = osm.get_network(network_type='all')

# #export raw ways
# raw_edges.to_file(config['osmdwnld_fp']/f"osm_2023.gpkg",layer="raw")
# this function appears to modify the osm geometry somewhat
# for extract_fp in extract_fps:
#     fileyear = extract_fp.stem.split('-')[1][0:2]
#     print(f"Processing 20{fileyear} data")
#     osm = OSM(str(extract_fp), bounding_box=studyarea_geo)
    
#     #6 mins for current network
#     #older versions take less time
#     raw_edges = osm.get_network(network_type='all')

#     #export raw ways
#     raw_edges.to_file(config['osmdwnld_fp']/f"osm_20{fileyear}.gpkg",layer="raw")
    
#     del raw_edges
#     del osm