# GATIS Sample Data (using OpenStreetMap as a base)
This notebook converts OSM data into a sample GATIS dataset.

Need to decide:
- What to do about isolated features that aren't connected?
- Polygons?

In [None]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import random

from importlib import reload
from gatis_sample_data import utils, osm_functions, create_maps

## Define a bounding box

In [None]:
bboxes = {
    "carmel, in": {
        "bbox_center_lonlat": [39.975280251411064, -86.127094177054],
        "bbox_length_ft": 5280,
        "features": ["roundabouts"],
        "type": "suburban"
    },
    "washington, dc": {
        "bbox_center_lonlat": [38.92878332898851, -77.03263427740389],
        "bbox_length_ft": 2640,
        "features": ["ped scramble", "cycletrack", "sidewalks"],
        "type": "urban"
    },
    "detroit, or": {
        "bbox_center_lonlat": [44.735233200754266, -122.15372770979087],
        "bbox_length_ft": 1760,
        "features": ["ped lane"],
        "type": "rural"
    },
    "seattle, wa": {
        "bbox_center_lonlat": [47.62089297947648, -122.34240301233211],
        "bbox_length_ft": 2640,
        "features": ["protected intersection"],
        "type": "urban"
    },
    "chamblee, ga": {
        "bbox_center_lonlat": [33.84903465591753, -84.321979334293],
        "bbox_length_ft": 1760,
        "features": ["midblock crossing"],
        "type": "suburban"
    }
}

## Download OSM Data Using Overpass and OSMnx (only need to run this once per bounding box)
For this example we'll use Seattle, WA

In [6]:
edges, nodes, zones = osm_functions.download_osm_from_bbox(
    bboxes["seattle, wa"]['bbox_center_lonlat'],
    bboxes["seattle, wa"]["bbox_length_ft"]
)

# export
if (Path.cwd()/"data").is_dir() == False:
    (Path.cwd()/"data").mkdir()

if edges.shape[0] > 0:
    with (Path.cwd()/"data/osm_edges.geojson").open('w') as f:
        f.write(edges.to_json(na="drop",indent=4))
if nodes.shape[0] > 0:
    with (Path.cwd()/"data/osm_nodes.geojson").open('w') as f:
        f.write(nodes.to_json(na="drop",indent=4))
if zones.shape[0] > 0:        
    with (Path.cwd()/"data/osm_zones.geojson").open('w') as f:
        f.write(zones.to_json(na="drop",indent=4))

Retrieving OSM data...


## Load OSM Data

In [11]:
edges = gpd.read_file(Path.cwd()/"data/osm_edges.geojson")

# process the column names
new_cols = []
for x in edges.columns:
    split = x.split('tags.',maxsplit=1)
    if len(split) == 1:
        new_cols.append(split[0])
    elif split[1] in new_cols:
        new_cols.append("dup_tag."+x)
    else:
        new_cols.append(split[1])
edges.columns = new_cols

# things to throw out of the osm data
edges = edges[edges['highway'].isin(['service']) == False]

# reset index
edges.reset_index(drop=True,inplace=True)

Skipping field nodes: unsupported OGR type: 13
  return ogr_read(


## Create GATIS Edges DataFrame

In [18]:
# import tier 4 GATIS template so that all of the possible attributes are in there
import requests
gatis_url = "https://raw.githubusercontent.com/dotbts/BPA/main/draft_gatis_specification/specification_jsons"
edges_spec = requests.get(gatis_url + "/edges.json").json()

In [19]:
# regular columns
regular_cols = [x['name'] for x in edges_spec['attributes']]

# left and right columns for road aligned sidewalks and bikeways
sidewalk_cols = ["sidewalk:left:presence","sidewalk:right:presence"] + \
    ["sidewalk:left:"+x['name'] for x in edges_spec['attributes'] if ('recommended' in x['presence']['sidewalk']) | ('required' in x['presence']['sidewalk']) ] + \
    ["sidewalk:right:"+x['name'] for x in edges_spec['attributes'] if ('recommended' in x['presence']['sidewalk']) | ('required' in x['presence']['sidewalk'])]

bikeway_cols = ["bikeway:left:presence","bikeway:right:presence"] + \
    ["bikeway:left:"+x['name'] for x in edges_spec['attributes'] if ('recommended' in x['presence']['bikeway']) | ('required' in x['presence']['bikeway'])] + \
    ["bikeway:right:"+x['name'] for x in edges_spec['attributes'] if ('recommended' in x['presence']['bikeway']) | ('required' in x['presence']['bikeway'])]

# create empty dataframe
all_columns = regular_cols + sidewalk_cols + bikeway_cols
gatis_edges = pd.DataFrame(data=[[None for col in all_columns] for x in range(0,edges.shape[0])],columns=all_columns)

# assign geometry to dataframe
gatis_edges = gpd.GeoDataFrame(gatis_edges,geometry=edges["geometry"])

# infill edge ids
gatis_edges['edge_id'] = range(1,gatis_edges.shape[0]+1)

In [20]:
# http://wiki.openstreetmap.org/wiki/Key:highway#Roads

# road
is_road = edges['highway'].isin([
    "motorway", "trunk", "primary",
    "secondary", "tertiary", "unclassified",
    "residential", "motorway_link", "trunk_link",
    "primary_link", "secondary_link", "tertiary_link",
    "living_street"
])
gatis_edges.loc[is_road,'edge_type'] = 'road'

# sidewalk
is_sidewalk = (edges['highway'] == 'footway') & (edges['footway'] == 'sidewalk')
gatis_edges.loc[is_sidewalk,'edge_type'] = 'sidewalk'

# footpath
# have a surface condition too?
is_footpath1 = edges['highway'] == 'path'
is_footpath2 = (edges['highway'] == 'footway') & (edges['footway'].isin(['sidewalk','crossing','traffic_island']) == False)
is_footpath3 = edges['highway'] == 'pedestrian'
gatis_edges.loc[(is_footpath1 | is_footpath2 | is_footpath3),'edge_type'] = 'footpath'

# crossing
is_crossing = (edges['highway'] == 'footway') & (edges['footway'] == 'crossing')
gatis_edges.loc[is_crossing,'edge_type'] = 'crossing'

# traffic island
is_traffic_island = (edges['highway'] == 'footway') & (edges['footway'] == 'traffic_island')
gatis_edges.loc[is_traffic_island,'edge_type'] = 'traffic_island'

# steps
is_steps = (edges['highway'] == 'steps') & (edges['conveying'].isna())
gatis_edges.loc[is_steps,'edge_type'] = 'steps'

# escalator
is_escaltor = (edges['highway'] == 'steps') & (edges['conveying'].isin(['yes','forward','backward','reversible']))
gatis_edges.loc[is_escaltor,'edge_type'] = 'escalator'

# bikeway
is_bikeway = (edges['highway'] == 'cycleway') & (edges['foot'].isna() | (edges['foot'].isin(['no'])))
gatis_edges.loc[is_bikeway,'edge_type'] = 'bikeway'

# multi_use_path
is_multi_use_path = (edges['highway'] == 'cycleway') & ((edges['segregated']=='no') | edges['foot'].isin(['designated','yes']))
gatis_edges.loc[is_multi_use_path,'edge_type'] = 'multi_use_path'

### Classify Road aligned features

In [21]:
def road_associated_tags(string):
    return [
        string,
        string+":both",
        string+":left",
        string+":right"
    ]
def access_columns(cols,df):
    filtered = [col for col in cols if col in df.columns]
    dropped = list(set(cols)-set(filtered))
    if len(dropped) > 0:
        print(f"{dropped} are not in dataframe")
    return filtered

cycleway_cols = access_columns(road_associated_tags("cycleway"),edges)
sidewalk_cols = access_columns(road_associated_tags("sidewalk"),edges)

In [22]:
for col in cycleway_cols:
    print(col)
    print(edges[col].unique().tolist())

cycleway
[None, 'lane', 'shared_lane', 'no', 'separate', 'track', 'crossing']
cycleway:both
[None, 'no']
cycleway:left
[None, 'track', 'lane', 'separate', 'no', 'shared_lane']
cycleway:right
[None, 'lane', 'shared_lane', 'no', 'separate', 'track']


In [23]:
for col in sidewalk_cols:
    print(col)
    print(edges[col].unique().tolist())

sidewalk
[None, 'separate', 'right', 'both', 'no']
sidewalk:both
[None, 'separate']
sidewalk:left
['no', None, 'separate']
sidewalk:right
['separate', None, 'no']


In [24]:
on_road = gatis_edges['edge_type'] == 'road'
left_sidewalk = edges['sidewalk'].isin(['both','left','yes']) | edges['sidewalk:both'].isin(['yes']) | edges['sidewalk:left'].isin(['yes'])
right_sidewalk = edges['sidewalk'].isin(['both','right','yes']) | edges['sidewalk:both'].isin(['yes']) | edges['sidewalk:right'].isin(['yes'])

gatis_edges.loc[left_sidewalk & on_road,'sidewalk:left:presence'] = 'yes'
gatis_edges.loc[right_sidewalk & on_road,'sidewalk:right:presence'] = 'yes'

In [25]:
# identify separated bike lanes
# TODO two-way / contra-flow / etc
gatis_edges.loc[gatis_edges['edge_type']=='cycleway','bikeway_type'] = 'separated bike lane'

# road aligned bicycle facilities
on_road = gatis_edges['edge_type'] == 'road'

#bike lanes
left_bike_lane = edges['cycleway'].isin(['lane']) | edges['cycleway:both'].isin(['lane']) | edges['cycleway:left'].isin(['lane'])
right_bike_lane = edges['cycleway'].isin(['lane']) | edges['cycleway:both'].isin(['lane']) | edges['cycleway:right'].isin(['lane'])
gatis_edges.loc[left_bike_lane & on_road,'cycleway:left:bikeway_type'] = 'bike lane'
gatis_edges.loc[right_bike_lane & on_road,'cycleway:right:bikeway_type'] = 'bike lane'

#cycletracks
left_cycletrack = edges['cycleway'].isin(['track']) | edges['cycleway:both'].isin(['track']) | edges['cycleway:left'].isin(['track'])
right_cycletrack = edges['cycleway'].isin(['track']) | edges['cycleway:both'].isin(['track']) | edges['cycleway:right'].isin(['track'])
gatis_edges.loc[left_cycletrack & on_road,'cycleway:left:bikeway_type'] = 'separated bike lane'
gatis_edges.loc[right_cycletrack & on_road,'cycleway:right:bikeway_type'] = 'separated bike lane'

#sharrows
left_sharrow = edges['cycleway'].isin(['shared_lane']) | edges['cycleway:both'].isin(['shared_lane']) | edges['cycleway:left'].isin(['shared_lane'])
right_sharrow = edges['cycleway'].isin(['shared_lane']) | edges['cycleway:both'].isin(['shared_lane']) | edges['cycleway:right'].isin(['shared_lane'])
gatis_edges.loc[left_sharrow & on_road,'cycleway:left:bikeway_type'] = 'sharrow'
gatis_edges.loc[right_sharrow & on_road,'cycleway:right:bikeway_type'] = 'sharrow'

#share busway
left_buslane = edges['cycleway'].isin(['share_busway']) | edges['cycleway:both'].isin(['share_busway']) | edges['cycleway:left'].isin(['shared_lane'])
right_buslane = edges['cycleway'].isin(['share_busway']) | edges['cycleway:both'].isin(['share_busway']) | edges['cycleway:right'].isin(['shared_lane'])
gatis_edges.loc[left_buslane & on_road,'cycleway:left:bikeway_type'] = 'bus lane'
gatis_edges.loc[right_buslane & on_road,'cycleway:right:bikeway_type'] = 'bus lane'

#presence
left_presence = left_bike_lane | left_cycletrack | left_sharrow | left_buslane
right_presence = right_bike_lane | right_cycletrack | right_sharrow | right_buslane
gatis_edges.loc[left_presence & on_road,'cycleway:left:presence'] = 'yes'
gatis_edges.loc[right_presence & on_road,'cycleway:right:presence'] = 'yes'

In [26]:
# drop road if no sidewalk or cylceway
is_road = (gatis_edges['edge_type']=='road')
no_cycleway_or_sidewalk = gatis_edges[[
    'cycleway:left:presence','cycleway:right:presence',
    'sidewalk:left:presence',"sidewalk:right:presence"]].isna().all(axis=1)
gatis_edges = gatis_edges[(is_road & no_cycleway_or_sidewalk) == False]

In [27]:
# surface_issue = [x for x in edges_spec['attributes'] if x['name'] == 'surface_issue'][0]

# # edges for which this attribute is recommended or required for
# edge_types = [key for key, item in surface_issue['presence'].items() if ("recommended" in item) | ("required" in item)]
# cond = gatis_edges[gatis_edges['edge_type'].isin(edge_types)].index

# # hard-coded (need to change if keys change) have for each field
# #TODO change to list of tuples
# # weights are normalized so they don't need to add up to one
# # higher means more frequent, equal means equally likely
# weights = [
#     ('yes', 10),
#     ('no', 5),
#     ('cracking', 1),
#     ('scaling', 1),
#     ('spalling', 1),
#     ('overgrowth', 1),
#     ('uneven/displacement', 1),
#     ('frequent water pooling', 1),
#     ('heaving', 1),
#     ('missing bricks / stones', 1),
#     ('grates / utility covers / other surface impediments', 1),
#     ('potholes / holes', 1),
#     ('slickness', 1),
#     ('uneven joints', 1),
#     ('markings worn / missing', 1),
#     ('detectable warning surface damage', 1),
#     ('other', 1)
# ]
# if len(set.difference(set(surface_issue['listed_values']), set(list(weights.keys())))) != 0:
#     print("Key mismatch")
    
# sampleList = list(weights.keys())
# weights = [x[1] for x in weights.items()]

# randomList = random.choices(sampleList,weights=weights,k=len(cond))
# gatis_edges.loc[gatis_edges['edge_type'].isin(edge_types),'surface_issue'] = pd.Series(randomList,index=cond)

In [28]:
# gatis_edges.to_file("test.geojson")
#NOTE don't use GeoPandas to_file because there a lot of null values
#want to drop null values for each record so that the resulting geojson isn't as large
with open("osm_sample_edges.geojson","w") as f:
    f.write(gatis_edges.to_json(na="drop",indent=2))

## Create GATIS Nodes

In [30]:
nodes = gpd.read_file(Path.cwd()/"data/osm_nodes.geojson")

new_cols = []
for x in nodes.columns:
    split = x.split('tags.',maxsplit=1)
    if len(split) == 1:
        new_cols.append(split[0])
    elif split[1] in new_cols:
        new_cols.append("dup_tag."+x)
    else:
        new_cols.append(split[1])
nodes.columns = new_cols

In [33]:
import requests
gatis_url = "https://raw.githubusercontent.com/dotbts/BPA/main/draft_gatis_specification/specification_jsons"
nodes_spec = requests.get(gatis_url + "/nodes.json").json()

In [34]:
# regular columns
regular_cols = [x['name'] for x in nodes_spec['attributes']]

# create empty dataframe
gatis_nodes = pd.DataFrame(data=[[None for col in regular_cols] for x in range(0,nodes.shape[0])],columns=regular_cols)

# assign geometry to dataframe
gatis_nodes = gpd.GeoDataFrame(gatis_nodes,geometry=nodes["geometry"])

# infill edge ids
gatis_nodes['node_id'] = range(1,gatis_nodes.shape[0]+1)

In [35]:
# curb_ramp
curbs = (nodes['barrier'] == 'kerb') | (nodes['kerb'].notna())
if curbs.sum() > 0:
    gatis_nodes.loc[curbs,"node_type"] = "curb_ramp"
    gatis_nodes.loc[nodes['kerb'].notna(),"curb_type"] = nodes['kerb']

# transit_stop
bus_stops = (nodes['highway'] == 'bus_stop')
if bus_stops.sum() > 0:
    gatis_nodes.loc[bus_stops,"node_type"] = "transit_stop"
    if "gtfs:dataset_id" in nodes.columns:
        gatis_nodes.loc[gatis_nodes['node_type']=='transit_stop',"agency_id"] = nodes.loc[gatis_nodes['node_type']=='transit_stop','gtfs:dataset_id']
    if "gtfs:stop_id" in nodes.columns:
        gatis_nodes.loc[gatis_nodes['node_type']=='transit_stop','stop_id'] = nodes.loc[gatis_nodes['node_type']=='transit_stop','gtfs:stop_id']

# assign non-assigned values as generic nodes?
# gatis_nodes.loc[gatis_nodes['node_type'].isna(),'node_type'] = "generic"

# or drop entirely?
gatis_nodes = gatis_nodes.loc[gatis_nodes['node_type'].notna()]

In [36]:
with open("osm_sample_nodes.geojson","w") as f:
    f.write(gatis_nodes.to_json(na="drop",indent=2))

## GATIS Zones

In [37]:
zones = gpd.read_file(Path.cwd()/"data/osm_zones.geojson") # nodes with attributes

new_cols = []
for x in zones.columns:
    split = x.split('tags.',maxsplit=1)
    if len(split) == 1:
        new_cols.append(split[0])
    elif split[1] in new_cols:
        new_cols.append("dup_tag."+x)
    else:
        new_cols.append(split[1])
zones.columns = new_cols

Skipping field nodes: unsupported OGR type: 13


In [38]:
zones['zone_id'] = list(range(0,zones.shape[0]))

zones['zone_type'] = 'pedestrian'

In [39]:
gatis_zones = zones[['zone_id','zone_type','geometry']]

In [41]:
with open(Path.cwd()/"osm_sample_zones.geojson","w") as f:
    f.write(gatis_zones.to_json(na="drop",indent=2))

## Write everything into a leaflet map using folium

In [None]:
# m = create_maps.display_layers(gatis_edges, gatis_nodes, gatis_zones, edge_categories="edge_type", node_categories="node_type", polygon_categories="zone_type")
# m.save(paths.root / f"maps/seattle_osm.html")

# maps_dict = {
#             "key": "seattle_osm",
#             "label": "Seattle, WA (OpenStreetMap)",
#             "description": """This sample dataset was created from existing OpenStreetMap data. See the full process here."""
#         }
# # saves to maps.json and creates a new key for the path to the map for the GATIS Explorer
# create_maps.add_to_json(maps_dict)