In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.insert(0, '..')

In [3]:
import geopandas as gpd
import pandas as pd
from rapidfuzz import process, fuzz

from osm import get_network, process_network

## Load data

In [4]:
# load road network with osm
nodes, edges = get_network("Somerville, Massachusetts, USA", "bike")

In [5]:
edges = process_network(edges)

In [9]:
edges.reset_index().to_csv("tmp.csv")

In [25]:
# load bike facility shapefile
bike_facilities = gpd.read_file(
    "../data/Somerville 2024 bike facilities_export/" + 
    "Somerville_2024_bike_facilities_export.shp")

## Match

In [57]:
# Example mapping of abbreviations -> full names
abbr_mapping = {
    'St': 'Street',
    'Ave': 'Avenue',
    'Rd': 'Road',
    'Blvd': 'Boulevard',
    'Dr': 'Drive',
    'Ln': 'Lane',
    'Pl': 'Place',
    'Ct': 'Court',
    'Sq': 'Square',
    'Pkwy': 'Parkway',
    'Cir': 'Circle'
}

In [65]:
# Function to find best match for each street name
def match_name_fuzzy(name, choices, scorer=fuzz.token_sort_ratio, score_cutoff=95):
    match = process.extractOne(name, choices, scorer=scorer, score_cutoff=score_cutoff)
    if match:
        return match[0]  # return matched string
    else:
        return None

In [66]:
# Function to expand abbreviations in a street name
def expand_street_name(name):
    if pd.isna(name):
        return name
    words = name.split()
    expanded_words = [abbr_mapping.get(word.strip('.'), word) for word in words]
    return ' '.join(expanded_words)

In [67]:
# Create new column with expanded names
bike_facilities['STNAME_expanded'] = bike_facilities['STNAME'].apply(expand_street_name)

In [68]:
# Create a list of bike_facility names
bike_names = list(set(bike_facilities['STNAME_expanded'].tolist()))

In [70]:
streets['matched_name'] = streets['name'].apply(lambda x: match_name_fuzzy(x, bike_names))

In [75]:
# streets[streets['matched_name'].notna()]

In [79]:
merged = streets.merge(bike_facilities, left_on='matched_name', right_on='STNAME', how='outer')

In [83]:
merged.columns

Index(['u', 'v', 'key', 'osmid', 'highway', 'lanes', 'maxspeed', 'name',
       'width', 'oneway', 'reversed', 'length', 'geometry_x', 'ref', 'service',
       'access', 'bridge', 'tunnel', 'junction', 'maxspeed_int', 'width_float',
       'matched_name', 'OBJECTID', 'STNAME', 'Shape_Leng', 'Facility_T',
       'geometry_y', 'STNAME_expanded'],
      dtype='object')

In [84]:
streets

Unnamed: 0,u,v,key,osmid,highway,lanes,maxspeed,name,width,oneway,...,geometry,ref,service,access,bridge,tunnel,junction,maxspeed_int,width_float,matched_name
0,61151272,61151274,0,8603503,residential,2,20 mph,Munroe Street,15.2,False,...,"LINESTRING (-71.0939 42.38221, -71.09354 42.38...",,,,,,,20.0,15.2,
1,61151272,9588223611,0,8603503,residential,2,20 mph,Munroe Street,15.2,False,...,"LINESTRING (-71.0939 42.38221, -71.09396 42.38...",,,,,,,20.0,15.2,
2,61151272,71921695,0,172307046,residential,,20 mph,Bigelow Street,,False,...,"LINESTRING (-71.0939 42.38221, -71.09387 42.38...",,,,,,,20.0,,
3,61151274,61155754,0,8603503,residential,2,20 mph,Munroe Street,15.2,False,...,"LINESTRING (-71.09332 42.38181, -71.09324 42.3...",,,,,,,20.0,15.2,
4,61151274,61151272,0,8603503,residential,2,20 mph,Munroe Street,15.2,False,...,"LINESTRING (-71.09332 42.38181, -71.0934 42.38...",,,,,,,20.0,15.2,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6968,13295850473,7307811424,0,9429931,unclassified,2,25 mph,Joy Street,15.2,False,...,"LINESTRING (-71.08755 42.37956, -71.08745 42.3...",,,,,,,25.0,15.2,
6969,13295850473,13295850474,0,"[1449558419, 1449558420, 1449558421]",service,,,,,False,...,"LINESTRING (-71.08755 42.37956, -71.08762 42.3...",,,,,building_passage,,,,
6970,13295850474,8534819490,0,8605193,unclassified,2,25 mph,Linwood Street,15.2,False,...,"LINESTRING (-71.08852 42.37902, -71.08779 42.3...",,,,,,,25.0,15.2,
6971,13295850474,7961610380,0,8605193,unclassified,2,25 mph,Linwood Street,15.2,False,...,"LINESTRING (-71.08852 42.37902, -71.08892 42.3...",,,,,,,25.0,15.2,


In [7]:
type(streets)

geopandas.geodataframe.GeoDataFrame

In [6]:
bike_facilities

Unnamed: 0,OBJECTID,STNAME,Shape_Leng,Facility_T,geometry
0,2305,Community Path,881.813510,Shared Use Path,"LINESTRING (756668.771 2970164.673, 756728.667..."
1,2306,Community Path,1365.715764,Shared Use Path,"LINESTRING (759908.591 2969281.793, 759970.343..."
2,2309,Community Path,1379.832447,Shared Use Path,"LINESTRING (761248.045 2968853.704, 761394.822..."
3,2314,Sylvester Baxter Path,1772.411078,Shared Use Path,"LINESTRING (768661.398 2970195.182, 768738.481..."
4,2315,Great River Path,2583.355088,Shared Use Path,"LINESTRING (770316.087 2969345.606, 770315.466..."
...,...,...,...,...,...
1026,0,Somerville Ave,157.932184,Cycle Track,"LINESTRING (765863.204 2963380.038, 766009.899..."
1027,0,Boston Ave,0.000000,Bike Lane,"LINESTRING (756205.794 2977058.731, 756251.724..."
1028,0,Boston Ave,0.000000,Bike Lane,"LINESTRING (756249.988 2977017.087, 756585.926..."
1029,0,Boston Ave,0.000000,Bike Lane,"LINESTRING (756586.794 2976724.552, 756609.363..."
