# Scratch code for examining OSM attribute completion

In [None]:
from pathlib import Path
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd

In [None]:
fp = Path.home() / 'Documents/TransitSimData/networks'

with (fp/'osm.pkl').open('rb') as fh:
    osm = pickle.load(fh)

In [None]:
len(osm.columns)

Public roads not including Interstates

In [None]:
public_roads = ['primary','primary_link','residential','secondary','secondary_link',
                            'tertiary','tertiary_link','trunk','trunk_link']
osm = osm[osm['highway'].isin(public_roads)].copy()

Drop completely empty columns

In [None]:
osm = osm.loc[:,~osm.isna().all()].copy()
print(osm.shape[1])

In [None]:
# Get the network length and total network length
network_length = osm.length
total_network_length = osm.length.sum()

# Calculate attribute completion percentage
attr_completion = osm.notna().apply(lambda col: col * network_length, axis=0).sum() / total_network_length * 100
attr_completion.name = 'pct_complete'

# Define intervals and labels
intervals = list(range(0,110,10))
labels = list(range(0,100,10))
labels = [f'{x+1}-{x+10}' for x in labels]
labels[0] = '0-10'

# Create categories using .cut
categories = pd.cut(attr_completion, bins=intervals, labels=labels)
category_counts = categories.value_counts().sort_index()

# Create bar chart
plt.figure(figsize=(10, 5))
bar_plot = category_counts.plot(kind='bar', color='maroon')
bar_plot.set_xticklabels(bar_plot.get_xticklabels(), rotation=0)
plt.xlabel('Percent of Attribute Complete (%)')
plt.ylabel('Number of Attributes')
plt.show()

In [None]:
osm['length_ft'] = osm.length

Speed Limit

In [None]:
speed_limit_cols = [col for col in osm.columns if 'speed' in col]
speed_limit_cols

In [None]:
# speed_limit = osm[['name','highway']+speed_limit_cols+['geometry']].copy()
# speed_limit.loc[~speed_limit['speed_limit_cols'].isna().all(axis=1),:].explore()

In [None]:
osm['has_speed_limit'] = False
osm.loc[osm['maxspeed'].notna(),'has_speed_limit'] = True

Lanes

In [None]:
lane_cols = [col for col in osm.columns if 'lane' in col]
lane_cols

In [None]:
# lanes = osm[['name','highway']+lane_cols+['geometry']].copy()
# lanes.loc[~lanes[lane_cols].isna().all(axis=1),:].explore()

In [None]:
osm['has_lanes'] = False
osm.loc[osm['lanes'].notna(),'has_lanes'] = True

Sidewalks

In [None]:
sidewalk_cols = [col for col in osm.columns if 'sidewalk' in col]
sidewalk_cols

In [None]:
sidewalk_cols = ['sidewalk',
 'sidewalk:both',
 'sidewalk:left',
 'sidewalk:right',
 'sidewalk_1']

In [None]:
#osm.loc[osm['sidewalk'].isna() & osm[sidewalk_cols].notna().any(axis=1),sidewalk_cols+['sidewalk','geometry']].explore()

In [None]:
#osm.loc[osm['sidewalk_1'].notna(),['sidewalk_1','geometry']].explore()

In [None]:
osm['has_sidewalk'] = False
osm.loc[osm[sidewalk_cols].notna().all(axis=1),'has_sidewalk'] = True

Parking

In [None]:
parking_cols = [col for col in osm.columns if 'parking' in col]
parking_cols

In [None]:
#osm.loc[osm[parking_cols].notna().any(axis=1),['name','geometry']+parking_cols].explore()

In [None]:
osm['has_parking'] = False
osm.loc[osm[parking_cols].notna().any(axis=1),'has_parking'] = True

In [None]:
presence_cols = ['has_lanes','has_speed_limit','has_sidewalk','has_parking']
presence_df = osm[presence_cols+['length_ft']]

test = {}
for col in presence_cols:
    test[col] = osm.loc[osm[col]==True,'length_ft'].sum() / osm['length_ft'].sum() * 100
test = pd.Series(test)

In [None]:
test.rename('attribute completion')
test.round(1)

Important variables in figure: lanes, speed limit, sidewalk presence, parking (then mention grade)

In [None]:
#get length and total length
network_length = osm_links.length
total_network_length = osm_links.length.sum()
attr_completion = osm.notna().apply(lambda col: col*network_length,axis=0).sum() / total_network_length * 100
attr_completion.name = 'pct_complete'

# Make boxplot of attribute completion
marks = np.array(attr_completion)

fig, axis = plt.subplots(figsize =(10, 5))
axis.hist(marks, bins = np.array([x for x in range(0, 110, 10)]) / 100)
plt.xlabel('Percent of Attribute Complete')
plt.ylabel('Frequency')
# Displaying the graph
plt.show()

In [None]:
## Reduce attributes


In [None]:
#set cutoff
percent_complete_cutoff=10

#get length and total length
network_length = osm_links.length
total_network_length = osm_links.length.sum()

#get percent complete weighted by total distance
attr_completion = osm_links.notna().apply(lambda col: col*network_length,axis=0).sum() / total_network_length * 100
attr_completion.name = 'percent_complete'

#retrieve all complete* fields
complete = list(attr_completion[attr_completion>percent_complete_cutoff].index)
#less_complete = list(attr_completion[attr_completion<=percent_complete_cutoff].index)

# road attributes to retreive
road_columns = ['highway','name','highway_1','oneway','lanes','maxspeed','bridge','tunnel','crossing','sidewalk']

# parking attributes
parking_columns = [x for x in osm_links.columns.to_list() if 'parking' in x]

# bike attributes
bike_columns = [x for x in osm_links.columns.to_list() if (('cycle' in x) | ('bike' in x)) & ('motorcycle' not in x)]
foot_columns = [x for x in osm_links.columns.to_list() if ('foot' in x)]
bike_columns = bike_columns + foot_columns

#keep these columns
keep = complete+bike_columns+road_columns+parking_columns

#remove excess columns
osm_export = osm_links[set(osm_links.columns) & set(keep)]

# Make boxplot of attribute completion
marks = np.array(attr_completion)

#removed columns
removed_cols = [ x for x in osm_links.columns if x not in keep]

In [None]:
## Attribute Completion

In [None]:
fig, axis = plt.subplots(figsize =(10, 5))
axis.hist(marks, bins = np.array([x for x in range(0, 110, 10)]) / 100)
plt.xlabel('Percent of Attribute Complete')
plt.ylabel('Frequency')
# Displaying the graph
plt.show()

In [None]:
## Examining Results

In [None]:
#don't run if large network
#osm_links.explore()

In [None]:
### Which columns to keep?
1. Check for completion
1. Check for desired attributes (speed, bike lanes, etc.)
1. Filter to just those

In [None]:
### Examine which attributes are present
Try 'maxspeed' or 'lanes'

In [None]:
#others to check: 'cycleway','lanes'
attributes_to_check = ['maxspeed']
osm_links.loc[osm_links[attributes_to_check].notna().any(axis=1),attributes_to_check+['geometry']].explore()

In [None]:
## Look at value counts to look at possible attribute values

In [None]:
osm_links['lanes'].value_counts()

In [None]:
# Just Bike Infrastructure

In [None]:
#create copy
bike_lanes = osm_links.copy()

#drop if no on bicycle
bike_lanes = bike_lanes[bike_lanes['bicycle']!='no']

#drop shared lanes "sharrows"
not_sharedlane = (-bike_lanes[bike_columns].isin(['shared_lane'])).all(axis=1)
bike_lanes = bike_lanes[not_sharedlane]

#remove sidewalks unless bikes explicitly allowed
remove_sidewalks = (bike_lanes['footway'].isin(['sidewalk','crossing'])) & (bike_lanes['bicycle'] != 'yes')
bike_lanes = bike_lanes[-remove_sidewalks]

#drop unless at least one bike attribute or if cycleway is present in highway or highway_1
bike_lanes = bike_lanes[~(bike_lanes[bike_columns].isna().all(axis=1)) | (bike_lanes['highway']=='cycleway')]

#need beltline and stuff
bike_lanes[['name','highway']+bike_columns+['geometry']].explore()