In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys; sys.path.insert(0, '..')

In [None]:
import geopandas as gpd
import pandas as pd
from rapidfuzz import process, fuzz

from osm import get_network, process_network

## Load data

In [None]:
# load road network with osm
nodes, edges = get_network("Somerville, Massachusetts, USA", "bike")

In [None]:
edges = process_network(edges)

In [None]:
edges.reset_index().to_csv("tmp.csv")

In [None]:
# load bike facility shapefile
bike_facilities = gpd.read_file(
    "../data/Somerville 2024 bike facilities_export/" + 
    "Somerville_2024_bike_facilities_export.shp")

## Match

In [None]:
# Example mapping of abbreviations -> full names
abbr_mapping = {
    'St': 'Street',
    'Ave': 'Avenue',
    'Rd': 'Road',
    'Blvd': 'Boulevard',
    'Dr': 'Drive',
    'Ln': 'Lane',
    'Pl': 'Place',
    'Ct': 'Court',
    'Sq': 'Square',
    'Pkwy': 'Parkway',
    'Cir': 'Circle'
}

In [None]:
# Function to find best match for each street name
def match_name_fuzzy(name, choices, scorer=fuzz.token_sort_ratio, score_cutoff=95):
    match = process.extractOne(name, choices, scorer=scorer, score_cutoff=score_cutoff)
    if match:
        return match[0]  # return matched string
    else:
        return None

In [None]:
# Function to expand abbreviations in a street name
def expand_street_name(name):
    if pd.isna(name):
        return name
    words = name.split()
    expanded_words = [abbr_mapping.get(word.strip('.'), word) for word in words]
    return ' '.join(expanded_words)

In [None]:
# Create new column with expanded names
bike_facilities['STNAME_expanded'] = bike_facilities['STNAME'].apply(expand_street_name)

In [None]:
# Create a list of bike_facility names
bike_names = list(set(bike_facilities['STNAME_expanded'].tolist()))

In [None]:
streets['matched_name'] = streets['name'].apply(lambda x: match_name_fuzzy(x, bike_names))

In [None]:
# streets[streets['matched_name'].notna()]

In [None]:
merged = streets.merge(bike_facilities, left_on='matched_name', right_on='STNAME', how='outer')

In [None]:
merged.columns

In [None]:
streets

In [None]:
type(streets)

In [None]:
bike_facilities