# Setup

## Import packages

In [128]:
%%writefile config.py
import osmnx as ox # open street maps
import geopandas as gpd # geodata wrangling
import pandas as pd # data wrangling
import numpy as np
import pyomo.environ as pyo 
import shapely # even more geodata wrangling
import gurobipy as gp # solver 2
import json # save solution 
from gurobipy import GRB 
from collections import defaultdict 
from scipy.spatial.distance import cdist # makes the distance matrix computation fast
from shapely.geometry import LineString # creates LineString geometry objects
from shapely.geometry import Point # creates Point geometry objects
from pyomo.environ import * # import Var

Writing config.py


In [134]:
%run config.py

## Load data

In [5]:
# Lexington-Fayette-Urban County Government Urban Service Area
# https://hub.arcgis.com/datasets/4c8db8e014cb49349fd430d96d8994b9_0/about
city_boundary_gdf = (
    gpd.read_file('data/lfucg_usb/Urban_Service_Area.shp')
    .to_crs('EPSG:4326') # project to classic CRS
)
city_boundary=city_boundary_gdf.geometry.iloc[0]

In [6]:
# list of census blocks to remove
census_block_exclude_list = [
    '210670040061008', # rural; 770m from any bike network nodes; 44 ppl 
    '210670038023034', # connection issues; 3 ppl
    '210670039134000', # rural; >450m from any bike network nodes;
    '210670039161008', # rural; >450m from any bike network nodes;
    '210670039182019', # rural; >450m from any bike network nodes;
    '210670042081006', # rural; >450m from any bike network nodes;
    '210670040051003', # rural; >450m from any bike network nodes;
    '210670039182039', # rural; >450m from any bike network nodes;
    '210670039062021', # used debug arcs; try adding back later
    '210670039062023', # used debug arcs; try adding back later
    '210670038023033', # used debug arcs; try adding back later
    '210670037021000', # used debug arcs; try adding back later
    '210670037022012', # used debug arcs; try adding back later
]

In [7]:
# US census data
# census block geometry
census_blocks_gdf = (
    gpd
    .read_file('data/tl_2020_21067_tabblock20/tl_2020_21067_tabblock20.shp')
    .query('GEOID20 not in @census_block_exclude_list') # remove blocks from network
)
# population data
population_df = (
    pd
    .read_csv('data/DECENNIALPL2020.H1_2024-04-02T135509/DECENNIALPL2020.H1-Data.csv')
    .query('GEO_ID != "Geography"') # DECENNIALPL2020 has a "header" row
    .drop_duplicates() # DECENNIALPL2020 has 101 rows that are listed twice 
)

In [8]:
# list of arcs to remove from the bike network
arc_exclude_list = [
    83568, # disconnects census block 210670026005001
    126616, # golf course; disconnects census block 210670019001020
    126608, # golf course; disconnects census block 210670019001003
    126612, # golf course; disconnects census block 210670019001002
    60836, # parking lot; disconnects census block 210670019002002
    60835, # parking lot; disconnects census block 210670019002002
    62039, # parking lot; disconnects census block 210670019002002
    79572, # parking lot; disconnects census block 210670010002002
    85764, # parking lot; disconnects census block 210670010002002
    116486, # sidewalk; disconnects census block 210670039084042
    98492, # sidewalk; disconnects census block 210670039084042
    126711, # golf course; disconnects census block 210670034052004
    126398, # golf course; disconnects census block 210670034052003
    81467, # parking lot; disconnects census block 210670041053003 
    81468, # parking lot; disconnects census block 210670041053003 
    81469, # parking lot; disconnects census block 210670041053003 
    81471, # parking lot; disconnects census block 210670041053003 
    81472, # parking lot; disconnects census block 210670041053003 
    81477, # parking lot; disconnects census block 210670041053003 
    119911, # parking lot; disconnects census block 210670041053003 
    59819, # sidewalk; disconnects census block 210670001011027
    57257, # sidewalk; disconnects census block 210670001011027
    59818, # sidewalk; disconnects census block 210670001011008
    65637, # parking lot; disconnects census block 210670003003006
    50229, # sidewalk; disconnects census block 210670004002004
    50111, # sidewalk; disconnects census block 210670004002004
    59541, # sidewalk; disconnects census block 210670018002015
    54569, # sidewalk; disconnects census block 210670018002015
    54180, # sidewalk; disconnects census block 210670008022002
    73268, # parking lot; disconnects census block 210670007001013
    73296, # parking lot; disconnects census block 210670007001013
    73294, # parking lot; disconnects census block 210670007001013
    73292, # parking lot; disconnects census block 210670007001013
    71562, # parking lot; disconnects census block 210670007002002
    74034, # parking lot; disconnects census block 210670007002002
    126440, # golf course; disconnects census block 210670017002000
    126441, # golf course; disconnects census block 210670017002000
    126442, # golf course; disconnects census block 210670017002000
    126443, # golf course; disconnects census block 210670017002000
    126444, # golf course; disconnects census block 210670017002000
    126445, # golf course; disconnects census block 210670017002000
    126446, # golf course; disconnects census block 210670017002000
    126447, # golf course; disconnects census block 210670017002000
    126448, # golf course; disconnects census block 210670017002000
    126449, # golf course; disconnects census block 210670017002000
    126450, # golf course; disconnects census block 210670017002000
    81470, # parking lot; disconnects census block 210670041053003
    83569, # parking lot; disconnects census block 210670026005001
    83571, # parking lot; disconnects census block 210670026005001
    83569, # parking lot; disconnects census block 210670026005001
    62068, # parking lot; disconnects census block 210670019002002
    62071, # parking lot; disconnects census block 210670019002002
    60837, # parking lot; disconnects census block 210670019002002
    50232, # sidewalk; disconnects census block 210670004002004
    58492, # sidewalk; disconnects census block 210670004002004
    126397, # golf course; disconnects census block 210670034052003
    126396, # golf course; disconnects census block 210670034052003
    126397, # golf course; disconnects census block 210670034052003
    126710, # golf course; disconnects census block 210670034052003
    71233, # golf course; disconnects census block 210670034052003
    72867, # golf course; disconnects census block 210670034052003
    72868, # golf course; disconnects census block 210670034052003
    72869, # golf course; disconnects census block 210670034052003
    72870, # golf course; disconnects census block 210670034052003
    58109, # sidewalk; disconnects census block 210670001021003; maybe
    59254, # sidewalk;disconnects census block 210670001021003; maybe
    79919, # parking lot; disconnects census block 210670040013000; maybe
    79918, # parking lot; disconnects census block 210670040013000; maybe
    79921, # parking lot; disconnects census block 210670040013000; maybe
    79917, # parking lot; disconnects census block 210670040013000; maybe
    114815, # parking lot; disconnects census block 210670032021009; maybe
    109752, # parking lot; disconnects census block 210670032021009; maybe
    109753, # parking lot; disconnects census block 210670032021009; maybe
    106337, # parking lot; disconnects census block 210670038023034; maybe
    106339, # parking lot; disconnects census block 210670038023034; maybe
    106338, # parking lot; disconnects census block 210670038023034; maybe
    101567, # parking lot; disconnects census block 210670038023034; maybe
    106340, # parking lot; disconnects census block 210670038023034; maybe
    86396, # parking lot; disconnects census block 210670037042044; maybe
    94649, # parking lot; disconnects census block 210670037042044; maybe
    #86394, # parking lot; disconnects census block 210670037042044; maybe
    #86395, # parking lot; disconnects census block 210670037042044; maybe
]

In [9]:
# PeopleForBikes cycle network data
pfb_gdf = (
    gpd.read_file('data/people_for_bikes/neighborhood_ways/neighborhood_ways.shp')
    .query('ROAD_ID not in @arc_exclude_list') # remove disconnected edges from the network
    .query("FUNCTIONAL != 'motorway'") # remove the highways
    .query("FUNCTIONAL != 'motorway_link'") # exit ramps etc.
)

In [10]:
# list of amenities to exclude based on local knowledge
# consider editing the source file directly 
dest_exclude_list = [
    'node/12001067429', # 'The Venue Shopping Center Courtyard' is not a real park
    'node/8520821500', # Speigle Heights Park has a node and a way
    'node/12001059631', # Zandale Park has a node and a way
    'node/3197373270' # Red Mile Horse training area isn't a 'park' in the sense we care about
]

In [11]:
# amenity data
amenities_gdf = (
    gpd
    .read_file('data/lex_parks_export.geojson')
    .cx[-84.6 : -84.3,  37.9 : 38.2] # filter non-KY Lexingtons using a bounding box
    .query('id not in @dest_exclude_list') # remove individual entries
    # consider adding (or replacing) this filter with the Urban Service Boundary
)

## Define parameters

In [12]:
# connect any disconnected origin nodes to intermediate nodes within the following radii
radii = [50,100,150,200,250,300,400] 

# upgrade costs (in $million/mile)
cost_per_mile = 2 * 10**6 # $2mil/mi

# convert to $/m
cost_per_meter = cost_per_mile/1609.34

# Data wrangling

## Create nodes

### Origin nodes: census blocks

In [13]:
# merge geo and pop data
origin_nodes_df = (
    population_df
    .assign(
        geoid20=lambda x: x['GEO_ID'].str.slice(9, 24), # reformat the geoid to match geoblocks_df
        H1_001N=lambda x: x['H1_001N'].astype(int) # convert to integer
    ) 
    
    # merge with census blocks geographic data 
    .merge(census_blocks_gdf, left_on='geoid20', right_on='GEOID20', how='right') 

    # create new columns
    .assign(
        name = lambda x: x['GEOID20'].str[-8:], # last eight digits of GEOID20
        node_type = 'origin'
    )

    # rename columns 
    .rename(columns={
        'GEOID20' : 'id_string',
        'H1_001N' : 'netflow', 
        'INTPTLAT20' : 'lat', 
        'INTPTLON20' : 'lon'
    })

    # select nodes with nonzero population
    .query("netflow > 0")

    # select points inside USB
    .assign(
        # define a Point so we can apply .within()
        geometry=lambda df: df.apply(
            lambda x: Point(x['lon'], x['lat']) if (pd.notna(x['lon']) and pd.notna(x['lat'])) else None,
            axis=1
        ),
        # define a truth column
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )
    .query("in_USB == True")

    # reset index — this removes skips in the index (needed for origin_to_intermediate arc creation)
    .reset_index()
    
    # select columns
    [['id_string','name','node_type','netflow','lat','lon']]
)

In [14]:
# calculate the total population (for destination, sink netflows)
total_pop = origin_nodes_df['netflow'].sum()

# 142011

### Intermediate nodes: cycle network

In [15]:
# pull PFB cycling network intersections
# these steps should be similar to latlon.ipynb

In [16]:
# heads 
heads_identifiers_gdf = (
    pfb_gdf
    
    # select columns
    [['INTERSECTI','geometry']]
    
    # change data types, create new columns
    .assign(
        identifier = lambda x: x['INTERSECTI'].astype(int),
        geometry = lambda x: (x['geometry'].to_crs("EPSG:4326")), # convert to classic CRS
        coord = lambda x: (x['geometry'].apply(
            lambda line: (line.coords[0][1], line.coords[0][0]) if line else None) # grab first coord, switch order
        )
    )

    # select points inside USB
    .assign(
        # define a truth column
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )
    .query("in_USB == True")

    # re-select columns
    [['identifier','coord']]
)

In [17]:
# tails
tails_identifiers_gdf = (
    pfb_gdf
    
    # select columns
    [['INTERSE_01','geometry']]
    
    # change data types, create new columns
    .assign(
        identifier = lambda x: x['INTERSE_01'].astype(int),
        geometry = lambda x: (x['geometry'].to_crs("EPSG:4326")), # convert to classic CRS
        coord = lambda x: (x['geometry'].apply(
            lambda line: (line.coords[-1][1], line.coords[-1][0]) if line else None) # grab last coord, switch order
        )
    )

    # select points inside USB
    .assign(
        # define a truth column
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )
    .query("in_USB == True")

    # re-select columns
    [['identifier','coord']]
)

In [18]:
# combine into single (g)df
intermediate_nodes_df = (
    # concat
    pd.concat([heads_identifiers_gdf, tails_identifiers_gdf], axis=0, ignore_index=True)

    # remove duplicates — this can cause skips in the index
    .drop_duplicates()

    # reset index — this removes skips in the index
    .reset_index()

    # create new columns
    .assign(
        name = '', # blank for now, we just need this column to match the amenity gdf
        node_type = 'intermediate',
        netflow = 0, 
        lat = lambda x: x['coord'].apply(lambda x: x[0] if x else None),
        lon = lambda x: x['coord'].apply(lambda x: x[1] if x else None) 
    )

    # rename columns 
    .rename(columns={'identifier' : 'id_string'})

    # select columns
    [['id_string','name','node_type','netflow','lat','lon']]
)

### Destination nodes: amenities

In [19]:
# pull OSM aminities
# for now we're querying externally via Overpass Turbo
# using parks to start

# open file from overpass turbo
dest_nodes_df = (
    amenities_gdf

    # create new columns
    .assign(
        node_type = 'destination',
        rep_point = lambda x: x['geometry'].representative_point(), # define representative point
        netflow = 0, 
        lat = lambda x: x['rep_point'].apply(lambda point: point.y if point else None), # extract lat
        lon = lambda x: x['rep_point'].apply(lambda point: point.x if point else None) # extract lon
    )

    # rename columns 
    .rename(columns={'id' : 'id_string'})

    # select points inside USB
    .assign(
        # define a truth column
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )
    .query("in_USB == True")

    # reset index — this removes skips in the index
    .reset_index()

    # select columns
    [['id_string','name','node_type','netflow','lat','lon']]
)

In [20]:
# compute the total number of amenities (need for dummy node netflow)
num_amenity_nodes = dest_nodes_df.shape[0]

In [21]:
# scratch
print(num_amenity_nodes)

121


### Sink node

In [22]:
# create sink
new_row = gpd.GeoDataFrame(
    {
        'id_string': 'sink', # name it
        'name': 'sink', 
        'node_type' : 'sink',
        'netflow': -1*total_pop, # this balances the negative flows of the amenities
        'lat': [0], # put it somewhere not in the network
        'lon': [0] # put it somewhere not in the network
    }
)

### Combine into a single DataFrame

In [23]:
# combine into single gdf
nodes_df = pd.concat([origin_nodes_df, intermediate_nodes_df, dest_nodes_df, new_row], ignore_index=True)

## Create arcs

### Arcs from census blocks to bike network

In [24]:
# project nodes to Albers contiguous USA

# project origin nodes
origin_nodes_projected_gdf = (
    # need a gdf
    gpd.GeoDataFrame(
        origin_nodes_df,  # original DataFrame
        geometry=gpd.points_from_xy(origin_nodes_df['lon'], origin_nodes_df['lat']),  # create geometry column
        crs='EPSG:4326'  # define the CRS (WGS84 for latitude/longitude)
    )
    
    # project to USAC AUAC
    .to_crs('ESRI:102003')
    
    # make the index a column so we can reference it later
    .reset_index() 
)

# project intermediate network nodes
intermediate_nodes_projected_gdf = (
    # need a gdf
    gpd.GeoDataFrame(
        intermediate_nodes_df,  # original dataframe
        geometry=gpd.points_from_xy(intermediate_nodes_df['lon'], intermediate_nodes_df['lat']),
        crs='EPSG:4326'  # define the CRS (WGS84 for latitude/longitude)
    )
    
    # project to USAC AUAC
    .to_crs('ESRI:102003')
    
    # make the index a column so we can reference it later
    .reset_index() 
)

In [25]:
# extract coordinates of geometries
origin_coords = np.array([geom.coords[0] for geom in origin_nodes_projected_gdf.geometry])
intermediate_coords = np.array([geom.coords[0] for geom in intermediate_nodes_projected_gdf.geometry])

# compute pairwise distances
oi_distance_matrix = cdist(origin_coords, intermediate_coords) 

In [26]:
# identify (origin,intermediate) pairs within stratified radii

# stack matrix into a DataFrame
oi_distance_df = (
    # convert matrix to df object
    pd.DataFrame(
        oi_distance_matrix,
        index = origin_nodes_projected_gdf.index,  # use census block indices
        columns = intermediate_nodes_projected_gdf.index  # use cycle network node indices
    )
    .stack() # change shape so distance is a column
    .reset_index()  # unstack to have row-column pairs 
    .rename(columns={'level_0': 'row_index', 'level_1': 'col_index', 0: 'distance'})
)

# initialize list to fill with connected pairs (origin, destination, radius)
# use list instead of DataFrame for the sake of memory
oi_index_pairs_list = []

for r in radii:
    # identify idex pairs with distance <= r
    new_oi_pairs_list = (
        oi_distance_df
        .query('distance <= @r')  # filter distances 
        .assign(radius=r) # keep track of radius
        .values.tolist() # convert to list
    )
    
    # concatenate new index pairs and radius to oi_index_pairs_list
    oi_index_pairs_list = oi_index_pairs_list + new_oi_pairs_list 
    
    # determine which origin nodes have been connected
    new_origins_list = list({tup[0] for tup in new_oi_pairs_list})

    # update oi_distance_df to exclude newly connected origin nodes
    oi_distance_df = oi_distance_df[~oi_distance_df['row_index'].isin(new_origins_list)]
    
    # debugging stuff
    #print(f"radius: {r}")
    #print(f"length of new_oi_pairs_list: {len(new_oi_pairs_list)}")
    #print(f"length of oi_index_pairs_list: {len(oi_index_pairs_list)}")
    #print(f"length of new_origins_list: {len(new_origins_list)}")
    #print(f"remaining rows in oi_distance_df: {len(oi_distance_df)}")
    #print('')

In [27]:
# use oi_index_pairs_list to create a new gdf of edges 

# construct a DataFrame
origin_to_intermediate_arcs_df = (
    # convert to DataFrame
    pd.DataFrame(
        oi_index_pairs_list, 
        columns=['row_index','col_index','distance','radius']
    )
    
    # merge with original node data 
    .merge(origin_nodes_projected_gdf, left_on='row_index', right_on='index') # merge with census data
    .merge(intermediate_nodes_projected_gdf, left_on='col_index', right_on='index') # merge with cycle data
)

# construct a GeoDataFrame
origin_to_intermediate_arcs_gdf = (
    gpd.GeoDataFrame(
        origin_to_intermediate_arcs_df, # original dataframe
        geometry=origin_to_intermediate_arcs_df.apply(lambda row: LineString([row['geometry_x'], row['geometry_y']]), axis=1),
        crs='ESRI:102003' # define the CRS 
    )

    # project back to classic CRS 
    .to_crs('EPSG:4326')
    
    # create new columns
    .assign(
        arc_type = 'origin_to_intermediate',
        in_H = 0, # all low stress
        in_H2 = 0, # all mono-directional
        dist = 0, # all 0 distance
        tail_id = lambda x: x['id_string_x'],
        head_id = lambda x: x['id_string_y']
    )

    # rename columns 
    .rename(columns={
        #'id_string_x' : 'tail_id',
        'lat_x' : 'tail_lat',
        'lon_x' : 'tail_lon',
        #'id_string_y' : 'head_id',
        'lat_y' : 'head_lat',
        'lon_y' : 'head_lon',
        #'radius' : 'cxn_radius'
    })
    
    # select columns
    #[['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','cxn_radius','in_H','in_H2','dist','geometry']]
    [['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

### Bike network arcs

In [28]:
# bike network
# a note on arcs OSM, PFB direction

# arc (x,y) has tail x and head y
# OSM arcs drawn in direction (from, to) so tail = from and head = to
# PFB has from-to edges (INTERSECTI,INTERSE_01), and to-from edges (INTERSE_01,INTERSECTI)

In [29]:
# gather from-to rows
pfb_ft_gdf = ( 
    pfb_gdf
    
    # select rows from PFB that have From-To data
    .query('FT_SEG_STR.isnull() == False')
    
    # change data types, create new columns
    .assign(
        arc_type = 'intermediate',
        geometry = lambda x: (x['geometry'].to_crs("EPSG:4326")), # convert to classic CRS 
        tail_id = lambda x: x['INTERSECTI'],
        tail_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[0][1], x.coords[0][0]) if x else None) # grab first coord, switch order
        ),
        tail_lat = lambda x: x['tail_coord'].apply(lambda x: x[0] if x else None), 
        tail_lon = lambda x: x['tail_coord'].apply(lambda x: x[1] if x else None),
        head_id = lambda x: x['INTERSE_01'],
        head_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[-1][1], x.coords[-1][0]) if x else None) # grab last coord, switch order
        ),
        head_lat = lambda x: x['head_coord'].apply(lambda x: x[0] if x else None),
        head_lon = lambda x: x['head_coord'].apply(lambda x: x[1] if x else None),
        in_H = lambda x: np.where(x['FT_SEG_STR'] == 1, 0, 1), # if pfb stress == 1, then not in H, else in H
        in_H2 = lambda x: np.where(
            (x['ONE_WAY'].isnull()) & (x['FT_SEG_STR'] > 1), 
            1, # if ONE_WAY is null and high stress, then bi-directional
            0 # else 
        ),
        dist = (
            pfb_gdf.to_crs("ESRI:102003") # project for distance calculation
            .geometry.length
        ),
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )

    # select rows inside USB
    #.query("in_USB == True")

    # select columns
    [['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

In [30]:
# gather to-from rows
pfb_tf_gdf = (
    pfb_gdf
    
    # select rows from PFB that have To-From data
    .query('TF_SEG_STR.isnull() == False')
    
    # change data types, create new columns
    .assign(
        arc_type = 'intermediate',
        geometry = lambda x: (
            x['geometry']
            .to_crs("EPSG:4326") # convert to classic CRS
            .reverse() # reverse order of LineString since To-From is backwards 
        ),   
        tail_id = lambda x: x['INTERSE_01'],
        tail_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[0][1], x.coords[0][0]) if x else None) # grab first coord, switch order
        ),
        tail_lat = lambda x: x['tail_coord'].apply(lambda x: x[0] if x else None), 
        tail_lon = lambda x: x['tail_coord'].apply(lambda x: x[1] if x else None),
        head_id = lambda x: x['INTERSECTI'],
        head_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[-1][1], x.coords[-1][0]) if x else None) # grab last coord, switch order
        ),
        head_lat = lambda x: x['head_coord'].apply(lambda x: x[0] if x else None),
        head_lon = lambda x: x['head_coord'].apply(lambda x: x[1] if x else None),
        in_H = lambda x: np.where(x['TF_SEG_STR'] == 1, 0, 1), # if pfb stress == 1, then not in H, else in H
        in_H2 = lambda x: np.where(
            (x['ONE_WAY'].isnull()) & (x['TF_SEG_STR'] > 1), 
            1, # if ONE_WAY is null and high stress, then bi-directional
            0 # else 
        ),
        dist = (
            pfb_gdf.to_crs("ESRI:102003") # project for distance calculation
            .geometry.length
        ),
        in_USB=lambda df: df['geometry'].apply(lambda point: city_boundary.contains(point))
    )

    # select rows inside USB
    #.query("in_USB == True")
    
    # select columns
    [['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

In [31]:
# create a list of bike network nodes 
intermediate_id_list = intermediate_nodes_df['id_string'].tolist()

In [32]:
# combine From-To and To-From
intermediate_arcs_gdf = (
    pd.concat([pfb_ft_gdf, pfb_tf_gdf], axis=0, ignore_index=True)
    # only keep arcs that are incident to nodes in the USB
    .query("tail_id in @intermediate_id_list")
    .query("head_id in @intermediate_id_list")
)

### Arcs from bike network to amenities

In [33]:
# project nodes to Albers contiguous USA
# note that we already made intermediate_nodes_projected_gdf in the first arcs subsetction

# project destination nodes
dest_nodes_projected_gdf = (
    # need a gdf
    gpd.GeoDataFrame(
        dest_nodes_df,  # original DataFrame
        geometry=gpd.points_from_xy(dest_nodes_df['lon'], dest_nodes_df['lat']),  # create geometry column
        crs='EPSG:4326'  # define the CRS (WGS84 for latitude/longitude)
    )
    
    # project to USAC AUAC
    .to_crs('ESRI:102003')
    
    # make the index a column so we can reference it later
    .reset_index() 
)

In [34]:
# extract coordinates of geometries
#intermediate_coords = np.array([geom.coords[0] for geom in intermediate_nodes_projected_gdf.geometry])
dest_coords = np.array([geom.coords[0] for geom in dest_nodes_projected_gdf.geometry])

# compute pairwise distances
id_distance_matrix = cdist(intermediate_coords, dest_coords) 

In [35]:
# identify (intermediate,destination) pairs within stratified radii

# stack matrix into a DataFrame
id_distance_df = (
    # convert matrix to df object
    pd.DataFrame(
        id_distance_matrix,
        index = intermediate_nodes_projected_gdf.index,  # use cycle network indices
        columns = dest_nodes_projected_gdf.index  # use amenity indices
    )
    .stack() # change shape so distance is a column
    .reset_index()  # unstack to have row-column pairs 
    .rename(columns={'level_0': 'row_index', 'level_1': 'col_index', 0: 'distance'})
)

# initialize list to fill with connected pairs (origin, destination, radius)
# use list instead of DataFrame for the sake of memory
id_index_pairs_list = []

for r in radii:
    # identify idex pairs with distance <= r
    new_id_pairs_list = (
        id_distance_df
        .query('distance <= @r')  # filter distances 
        .assign(radius=r) # keep track of radius
        .values.tolist() # convert to list
    )
    
    # concatenate new index pairs and radius to oi_index_pairs_list
    id_index_pairs_list = id_index_pairs_list + new_id_pairs_list 
    
    # determine which origin nodes have been connected
    new_dest_list = list({tup[1] for tup in new_id_pairs_list})

    # update oi_distance_df to exclude newly connected origin nodes
    id_distance_df = id_distance_df[~id_distance_df['col_index'].isin(new_dest_list)]
    # currently not enough data is removed from id_distance_df
    # many parks have connections at multiple strata
    
    # debugging stuff
    #print(f"radius: {r}")
    #print(f"length of new_id_pairs_list: {len(new_id_pairs_list)}")
    #print(f"length of id_index_pairs_list: {len(id_index_pairs_list)}")
    #print(f"new_dest_list head: {new_dest_list[0:10]}")
    #print(f"remaining rows in id_distance_df: {len(id_distance_df)}")
    #print('')

In [36]:
# use id_index_pairs_list to create a new gdf of edges 

# construct a DataFrame
intermediate_to_dest_arcs_df = (
    # convert to DataFrame
    pd.DataFrame(
        id_index_pairs_list, 
        columns=['row_index','col_index','distance','radius']
    )
    
    # merge with original node data 
    .merge(intermediate_nodes_projected_gdf, left_on='row_index', right_on='index') # merge with cycle data
    .merge(dest_nodes_projected_gdf, left_on='col_index', right_on='index') # merge with amenity data
)

# construct a GeoDataFrame
intermediate_to_dest_arcs_gdf = (
    gpd.GeoDataFrame(
        intermediate_to_dest_arcs_df, # original dataframe
        geometry=intermediate_to_dest_arcs_df.apply(lambda row: LineString([row['geometry_x'], row['geometry_y']]), axis=1),
        crs='ESRI:102003' # define the CRS 
    )

    # project back to classic CRS 
    .to_crs('EPSG:4326')
    
    # create new columns
    .assign(
        arc_type = 'intermediate_to_destination',
        in_H = 0, # all low stress
        in_H2 = 0, # all mono-directional
        dist = 0, # all 0 distance
        tail_id = lambda x: x['id_string_x'],
        head_id = lambda x: x['id_string_y'],
    )

    # rename columns 
    .rename(columns={
        #'id_string_x' : 'tail_id',
        'lat_x' : 'tail_lat',
        'lon_x' : 'tail_lon',
        #'id_string_y' : 'head_id',
        'lat_y' : 'head_lat',
        'lon_y' : 'head_lon',
        #'radius' : 'cxn_radius'
    })
    
    # select columns
    #[['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','cxn_radius','in_H','in_H2','dist','geometry']]
    [['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

### Arcs from amenities to sink

In [37]:
# create a gdf with arcs from each amenity to the dummy node 

dest_to_sink_arcs_gdf = (
    # initialize gdf
    gpd.GeoDataFrame(
        dest_nodes_df,  # original DataFrame
        geometry=dest_nodes_df.apply(lambda row: LineString([(row['lon'], row['lat']), (0, 0)]), axis=1),
        crs='EPSG:4326'  # define the CRS (WGS84 for latitude/longitude)
    )
    # rename columns
    .rename(columns={
        #'id_string' : 'tail_id',
        'lat' : 'tail_lat', 
        'lon' : 'tail_lon'
    })

    # create columns
    .assign(
        tail_id = lambda x: x['id_string'],
        head_id = 'sink',
        head_lat = 0,
        head_lon = 0,
        arc_type = 'amenity_to_sink',
        in_H = 0, # all low stress
        in_H2 = 0, # all mono-directional
        dist = 0 # all 0 distance
    )

    # select columns
    [['tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

### Combine into a single GeoDataFrame

In [38]:
# combine into single gdf
arcs_gdf = (
    pd.concat(
    [origin_to_intermediate_arcs_gdf, intermediate_arcs_gdf, intermediate_to_dest_arcs_gdf, dest_to_sink_arcs_gdf],
    ignore_index=True
    )
    .assign(arc_id = lambda x: list(zip(x['tail_id'],x['head_id'])))
    [['arc_id','tail_id','tail_lat','tail_lon','head_id','head_lat','head_lon','arc_type','in_H','in_H2','dist','geometry']]
)

## Create solver objects

In [39]:
# node-related lists
nodes = nodes_df['id_string'].tolist()
node_to_netflow = (
    nodes_df
    .set_index('id_string')['netflow']  # use id_string as keys, netflow as values
    .to_dict()  # convert to dictionary
)

In [40]:
# arc-related lists
arcs = arcs_gdf['arc_id'].tolist()
high_stress_arcs = arcs_gdf.query("in_H==1")['arc_id'].tolist()

arc_to_dist = (
    arcs_gdf
    .set_index('arc_id')['dist']  # use id_string as keys, dist as values
    .to_dict()  # convert to dictionary
)

high_stress_arc_to_cost = (
    arcs_gdf
    .query("in_H==1")
    .assign(cost=lambda x: cost_per_meter * x['dist'])
    .set_index(['tail_id', 'head_id'])['cost']  # use (tail_id, head_id) as keys
    .to_dict()  # convert to dictionary
)

In [41]:
# dictionaries of incident edges
node_to_incoming_arcs = defaultdict(set)
node_to_outgoing_arcs = defaultdict(set)

for x,y in arcs_gdf.arc_id: 
    node_to_incoming_arcs[y].add((x,y))
    node_to_outgoing_arcs[x].add((x,y))

In [42]:
# scratch 
# debugging solver error re bidihi edges

# first problem index pair
# (76993.0, 76040.0)
# look for these edges in bidihi and associated data structures
# dist_ list
# arcs
# arcs_gdf

# bidi_high_stress_arcs
#if (76993.0, 76040.0) in bidi_high_stress_arcs:
#    print('oui')
#else:
#    print('non')

# not in bidi_high_stress_arcs
# reflected by arcs_gdf H2=0

In [43]:
# scratch 
# debugging solver error re bidihi edges 

arcs_gdf.query("tail_id == 76993").query("head_id == 76040")
# something fishy is going on here
# it appears that this arc shows up in ft and tf (double check)
# but one of these versions has H2=1 and the other has H2=0

Unnamed: 0,arc_id,tail_id,tail_lat,tail_lon,head_id,head_lat,head_lon,arc_type,in_H,in_H2,dist,geometry
56741,"(76993.0, 76040.0)",76993.0,38.059879,-84.465461,76040.0,38.059846,-84.46556,intermediate,0,0,9.320275,"LINESTRING (-84.46546 38.05988, -84.46556 38.0..."


In [44]:
# scratch 
# debugging solver error re bidihi edges 

# are there are one ways for cars and bikes that are high stress? 
debug_gdf = (
    pfb_gdf
    #.query("ONE_WAY_CA.isnull() == False") # one way for cars
    #.query("ONE_WAY.isnull() == False") # one way for bikes
    #.query("FT_SEG_STR > 1") # high stress in the from-to direction (only one to-from instance)
    .query("FT_SEG_STR.isnull() == False")
    .query("TF_SEG_STR.isnull() == False")
    .query("FT_SEG_STR != TF_SEG_STR") # stress levels different
    .assign(
        dist = (
            pfb_gdf.to_crs("ESRI:102003") # project for distance calculation
            .geometry.length
        )
    )
    # min of directional stress == 1
    #[['NAME','FT_SEG_STR','TF_SEG_STR','geometry','dist']]
    .groupby(['NAME'])
    .agg(
        total_distance=('dist', 'sum')
    )
    .reset_index()
)
debug_gdf#.to_file('data/exports/stress_mismatch.shp')

Unnamed: 0,NAME,total_distance
0,Bryan Avenue,122.315252
1,East Fourth Street,87.838215
2,Hilltop Avenue,78.469943


In [45]:
summarized_df = (
    pfb_gdf
    #.query("ONE_WAY_CA == 'YES'")
    #.query("ONE_WAY != 'ft'")
    #[['ONEWAY','ONE_WAY','NAME','dist']]
    .assign(
        dist = (
            pfb_gdf.to_crs("ESRI:102003") # project for distance calculation
            .geometry.length
        )
    )
)
summarized_df#.to_file('data/exports/one_way_deduct.shp')
# why is this returning a totally NaN df?

Unnamed: 0,ROAD_ID,OSM_ID,TAG_ID,NAME,INTERSECTI,INTERSE_01,ONEWAY,TDG_ID,FUNCTIONAL,PATH_ID,...,FT_PARK,TF_PARK,FT_SEG_STR,FT_INT_STR,TF_SEG_STR,TF_INT_STR,XWALK,JOB_ID,geometry,dist
0,1.0,16169228.0,110,Lin Wal Road,5374.0,8763.0,NO,9b076314-493f-41ca-868c-9f75954f0b49,residential,,...,,,1.0,1,1.0,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (722138.333 4217491.548, 722062.723...",106.466072
1,2.0,288211647.0,112,,7894.0,23999.0,NO,da95d307-4c4f-4cc9-908a-fece94bd8ddd,path,1.0,...,,,1.0,1,1.0,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (719263.420 4211239.645, 719277.308...",19.797888
2,3.0,258150215.0,112,,44442.0,22981.0,YES,bf3de0fd-fd27-4bab-9f65-e35f5384401f,path,659.0,...,,,1.0,3,,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (719937.392 4205928.867, 719939.506...",21.663028
3,65.0,16097102.0,108,Hutchison Road,124.0,48.0,UNKNOWN,30f02070-dcdd-4d1a-b326-b70678128d4e,secondary,,...,,,3.0,1,3.0,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (731970.025 4225135.487, 731977.494...",50.542355
4,67.0,16097102.0,108,Hutchison Road,38746.0,49.0,UNKNOWN,22d4c8d5-2d14-4ef1-a167-0d8f3d062f9a,secondary,,...,,,3.0,1,3.0,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (732528.350 4224784.227, 732544.819...",123.310326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51585,2349.0,42090057.0,105,,40493.0,2122.0,YES,83e50462-d084-4d8f-8b07-2ad455e298f6,trunk_link,,...,,,3.0,1,,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (723422.616 4213644.103, 723428.744...",126.876702
51586,11961.0,42090058.0,105,,20492.0,20494.0,YES,e44777fb-562f-4e01-b1fd-c56d067cc658,trunk_link,,...,,,3.0,1,,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (723408.229 4213616.876, 723400.123...",9.532647
51587,12979.0,42090058.0,105,,20494.0,20495.0,YES,55228151-9afe-4bc5-88ad-0f4f705bda38,trunk_link,,...,,,3.0,1,,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (723400.123 4213621.963, 723389.489...",11.693574
51588,22942.0,42090058.0,105,,2292.0,20492.0,YES,d7b0fa7c-a98a-4ac9-9afb-794954af2a20,trunk_link,,...,,,3.0,1,,1,,5a751b59-ecc5-4924-b7bc-9002321614c0,"LINESTRING (723421.525 4213605.712, 723418.519...",17.340624


# Solver

In [46]:
# define budget, use 15% of total cost 
budget = 0.15*sum(high_stress_arc_to_cost.values())
#budget/ (10**6) # comes out to about $129 million

# factor by which a low stress path would need to exceed a high stress route in order for someone to choose the shorter high stress route. 
# equiv: someone would be willing f times further in order to stay on a low stress path
f = 1.25 
# see https://transweb.sjsu.edu/sites/default/files/1005-low-stress-bicycling-network-connectivity.pdf
# page 3

In [47]:
# set up the model
model = pyo.ConcreteModel()

# variables
model.x = pyo.Var(arcs, domain=pyo.NonNegativeIntegers)
model.y = pyo.Var(high_stress_arcs, domain=pyo.Binary)
model.z = pyo.Var(high_stress_arcs, domain=pyo.Binary)

# objective function
def obj_rule(model):
    return (sum(arc_to_dist[i,j]*model.x[i,j] for i,j in arcs) 
            + sum((f-1)*arc_to_dist[i,j]*model.z[i,j] for i,j in high_stress_arcs)
           )
    
model.obj = pyo.Objective(rule=obj_rule,sense=pyo.minimize)

# constraints
## flow balance
def flow_balance_rule(model,node):
    return (
        sum(model.x[node,j] for node,j in node_to_outgoing_arcs[node]) - sum(model.x[i,node] for i,node in node_to_incoming_arcs[node]) == node_to_netflow[node]
    )

model.flow_balance = pyo.Constraint(nodes,rule=flow_balance_rule)

## use only low stress arcs or upgraded high stress arcs, or incur a penalty
def low_stress_rule(model,i,j): 
    return model.z[i,j] >= model.x[i,j] - total_pop*model.y[i,j]  

model.low_stress = pyo.Constraint(high_stress_arcs,rule=low_stress_rule)

## upgrade bidirectional high stress arcs in pairs
## temporarily suspending this constraint
#def bidirectional_upgrade_rule(model,i,j):
#    return model.y[i,j] <= model.y[j,i]

#model.bidirectional_upgrade = pyo.Constraint(bidi_high_stress_arcs,rule=bidirectional_upgrade_rule)

## stay on budget
def budget_rule(model): 
    return sum(high_stress_arc_to_cost[i,j]*model.y[i,j] for i,j in high_stress_arcs) <= budget 

model.budget = pyo.Constraint(rule=budget_rule)

In [48]:
# run the solver for the stress-free model
solver_name = 'gurobi'
solver = pyo.SolverFactory(solver_name)
solver.options['TimeLimit'] = 15*60
solver.options['MIPGap'] = 0.01
results = solver.solve(model, tee=True)

Set parameter Username
Set parameter LicenseID to value 2612524
Academic license - for non-commercial use only - expires 2026-01-21
Read LP format model from file /var/folders/d7/x2vjzx3d2qd85_gll3w99lyr0000gn/T/tmpw81i72vb.pyomo.lp
Reading time = 0.15 seconds
x1: 51223 rows, 116585 columns, 232918 nonzeros
Set parameter TimeLimit to value 900
Set parameter MIPGap to value 0.01
Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (mac64[arm] - Darwin 23.5.0 23F79)

CPU model: Apple M3
Thread count: 8 physical cores, 8 logical processors, using up to 8 threads

Non-default parameters:
TimeLimit  900
MIPGap  0.01

Optimize a model with 51223 rows, 116585 columns and 232918 nonzeros
Model fingerprint: 0x3087cf34
Variable types: 0 continuous, 116585 integer (25430 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+06]
  Objective range  [1e-01, 2e+03]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+08]
Presolve removed 24901 rows and 49170 columns
Presolve time: 0.30

In [None]:
# save solution

# save as an LP file (human-readable)
#model.write("model1.lp", io_options={"symbolic_solver_labels": True})

# save as an MPS file (compact, used by solvers)
#model.write("model1.mps")

In [73]:
# save the solution as JSON
solution_data = {
    "Objective Value": model.obj(),
    "Variables": {v.name: v.value for v in model.component_data_objects(Var)}
}

#with open("solution1.json", "w") as f:
#    json.dump(solution_data, f, indent=4)

# Analyze solution

In [61]:
# what was the objective value? 
# distance travelled (meters) + penalty
model.obj()

148501913.3635659

In [62]:
# objective (distance + penalty)/person (meters)
model.obj() / total_pop

1049.5873327648383

In [None]:
# what was the objective value of the LP relaxation?

In [55]:
# which arcs were used?
nonzero_flow = [(s,pyo.value(model.x[s])) for s in arcs if pyo.value(model.x[s])>=1]

In [57]:
len(nonzero_flow)/len(arcs)

0.15026030628104384

In [51]:
# which arcs were upgraded?
upgraded = [s for s in high_stress_arcs if pyo.value(model.x[s])>=1]

In [75]:
len(upgraded)

2694

In [121]:
# create a version of intermediate_arcs_gdf that has OSM identifiers etc
# copy intermediate arcs setup

# gather from-to rows
pfb_ft_df = ( 
    pfb_gdf
    
    # select rows from PFB that have From-To data
    .query('FT_SEG_STR.isnull() == False')
    
    # change data types, create new columns
    .assign(
        geometry = lambda x: (x['geometry'].to_crs("EPSG:4326")), # convert to classic CRS 
        tail_id = lambda x: x['INTERSECTI'],
        tail_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[0][1], x.coords[0][0]) if x else None) # grab first coord, switch order
        ),
        tail_lat = lambda x: x['tail_coord'].apply(lambda x: x[0] if x else None), 
        tail_lon = lambda x: x['tail_coord'].apply(lambda x: x[1] if x else None),
        head_id = lambda x: x['INTERSE_01'],
        head_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[-1][1], x.coords[-1][0]) if x else None) # grab last coord, switch order
        ),
        head_lat = lambda x: x['head_coord'].apply(lambda x: x[0] if x else None),
        head_lon = lambda x: x['head_coord'].apply(lambda x: x[1] if x else None),
    )
)

# gather to-from rows
pfb_tf_df = (
    pfb_gdf
    
    # select rows from PFB that have To-From data
    .query('TF_SEG_STR.isnull() == False')
    
    # change data types, create new columns
    .assign(
        geometry = lambda x: (
            x['geometry']
            .to_crs("EPSG:4326") # convert to classic CRS
            .reverse() # reverse order of LineString since To-From is backwards 
        ),   
        tail_id = lambda x: x['INTERSE_01'],
        tail_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[0][1], x.coords[0][0]) if x else None) # grab first coord, switch order
        ),
        tail_lat = lambda x: x['tail_coord'].apply(lambda x: x[0] if x else None), 
        tail_lon = lambda x: x['tail_coord'].apply(lambda x: x[1] if x else None),
        head_id = lambda x: x['INTERSECTI'],
        head_coord = lambda x: (x['geometry'].apply(
            lambda x: (x.coords[-1][1], x.coords[-1][0]) if x else None) # grab last coord, switch order
        ),
        head_lat = lambda x: x['head_coord'].apply(lambda x: x[0] if x else None),
        head_lon = lambda x: x['head_coord'].apply(lambda x: x[1] if x else None),
    )
)

# combine from-to and to-from
pfb_arcs_df = (
    pd.concat([pfb_ft_df, pfb_tf_df], axis=0, ignore_index=True)
    .assign(arc_id = lambda x: list(zip(x['tail_id'],x['head_id'])))
    [['arc_id','ROAD_ID','OSM_ID','NAME','geometry']]
)

In [122]:
pfb_arcs_df

Unnamed: 0,arc_id,ROAD_ID,OSM_ID,NAME,geometry
0,"(5374.0, 8763.0)",1.0,1.616923e+07,Lin Wal Road,"LINESTRING (-84.46739 38.07794, -84.46823 38.0..."
1,"(7894.0, 23999.0)",2.0,2.882116e+08,,"LINESTRING (-84.50205 38.02235, -84.50189 38.0..."
2,"(44442.0, 22981.0)",3.0,2.581502e+08,,"LINESTRING (-84.49601 37.97436, -84.49599 37.9..."
3,"(124.0, 48.0)",65.0,1.609710e+07,Hutchison Road,"LINESTRING (-84.35292 38.14429, -84.35284 38.1..."
4,"(38746.0, 49.0)",67.0,1.609710e+07,Hutchison Road,"LINESTRING (-84.34667 38.14098, -84.34649 38.1..."
...,...,...,...,...,...
95596,"(44861.0, 44862.0)",118903.0,1.115225e+09,,"LINESTRING (-84.53035 37.99353, -84.53040 37.9..."
95597,"(44863.0, 44864.0)",118904.0,1.115225e+09,,"LINESTRING (-84.53011 37.99345, -84.53020 37.9..."
95598,"(4671.0, 99002.0)",123343.0,1.116326e+09,,"LINESTRING (-84.49963 37.99835, -84.49965 37.9..."
95599,"(24502.0, 24501.0)",27730.0,3.077454e+08,,"LINESTRING (-84.54438 38.02054, -84.54425 38.0..."


In [125]:
# recover the arcs_gdf data for the upgraded arcs
upgraded_df = (
    pd.DataFrame(upgraded, columns=['tail', 'head'])
    .assign(arc_id=lambda x: list(zip(x['tail'], x['head'])))
    .merge(arcs_gdf, left_on='arc_id', right_on='arc_id', how='left')
    .query("in_H == 1") # this should remove duplicate arc_ids from the merge
    .merge(pfb_arcs_df, left_on='geometry', right_on='geometry', how='left') # road names etc
)
upgraded_gdf = gpd.GeoDataFrame(upgraded_df, geometry='geometry')
upgraded_gdf

Unnamed: 0,tail,head,arc_id_x,tail_id,tail_lat,tail_lon,head_id,head_lat,head_lon,arc_type,in_H,in_H2,dist,geometry,arc_id_y,ROAD_ID,OSM_ID,NAME
0,20707.0,2943.0,"(20707.0, 2943.0)",20707.0,38.019148,-84.521282,2943.0,38.019218,-84.521478,intermediate,1,1,18.777107,"LINESTRING (-84.52128 38.01915, -84.52148 38.0...","(20707.0, 2943.0)",3263.0,16161096.0,Rosemont Garden
1,6705.0,7852.0,"(6705.0, 7852.0)",6705.0,38.002115,-84.548479,7852.0,38.002039,-84.550382,intermediate,1,1,165.739939,"LINESTRING (-84.54848 38.00212, -84.55038 38.0...","(6705.0, 7852.0)",8975.0,16165383.0,Wellington Way
2,44903.0,8313.0,"(44903.0, 8313.0)",44903.0,37.987876,-84.555703,8313.0,37.987904,-84.555786,intermediate,1,1,7.859293,"LINESTRING (-84.55570 37.98788, -84.55579 37.9...","(44903.0, 8313.0)",9494.0,16169132.0,Cromwell Way
3,45227.0,10206.0,"(45227.0, 10206.0)",45227.0,38.008308,-84.495813,10206.0,38.009442,-84.495565,intermediate,1,0,129.146590,"LINESTRING (-84.49581 38.00831, -84.49580 38.0...","(45227.0, 10206.0)",11515.0,339674733.0,Tates Creek Road
4,19312.0,18487.0,"(19312.0, 18487.0)",19312.0,37.963221,-84.389447,18487.0,37.963313,-84.389612,intermediate,1,0,17.722472,"LINESTRING (-84.38945 37.96322, -84.38961 37.9...","(19312.0, 18487.0)",20706.0,175936717.0,Athens Boonesboro Road
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2689,5941.0,30403.0,"(5941.0, 30403.0)",5941.0,38.034784,-84.540181,30403.0,38.035714,-84.539655,intermediate,1,1,113.840998,"LINESTRING (-84.54018 38.03478, -84.53966 38.0...","(5941.0, 30403.0)",6723.0,16166829.0,Beacon Hill Road
2690,8897.0,6012.0,"(8897.0, 6012.0)",8897.0,37.978743,-84.510105,6012.0,37.979197,-84.509769,intermediate,1,1,58.676593,"LINESTRING (-84.51010 37.97874, -84.50977 37.9...","(8897.0, 6012.0)",10157.0,16166869.0,Camelot Drive
2691,4191.0,8897.0,"(4191.0, 8897.0)",4191.0,37.977576,-84.510635,8897.0,37.978743,-84.510105,intermediate,1,1,140.107058,"LINESTRING (-84.51063 37.97758, -84.51056 37.9...","(4191.0, 8897.0)",4722.0,16166869.0,Camelot Drive
2692,9272.0,46025.0,"(9272.0, 46025.0)",9272.0,38.057281,-84.481532,46025.0,38.057131,-84.481748,intermediate,1,1,25.233551,"LINESTRING (-84.48153 38.05728, -84.48175 38.0...","(9272.0, 46025.0)",10560.0,301602138.0,North Limestone


In [127]:
# export upgraded arcs
(
    upgraded_gdf
    .drop(columns=['arc_id_x','arc_id_y']) # can't have a tuple in a .shp
    #.to_file('upgraded_arcs.shp')
)

In [112]:
# scratch
# export the duplicates
(
    arcs_gdf[arcs_gdf.duplicated(keep=False)]
    #.drop(columns=['arc_id']) # can't have a tuple in a .shp
    #.to_file('duplicate_arcs.shp')
)

Unnamed: 0,arc_id,tail_id,tail_lat,tail_lon,head_id,head_lat,head_lon,arc_type,in_H,in_H2,dist,geometry
44930,"(88460.0, 88461.0)",88460.0,37.957148,-84.500103,88461.0,37.95717,-84.500063,intermediate,0,0,4.271484,"LINESTRING (-84.50010 37.95715, -84.50006 37.9..."
44989,"(88461.0, 88460.0)",88461.0,37.95717,-84.500063,88460.0,37.957148,-84.500103,intermediate,0,0,4.271484,"LINESTRING (-84.50006 37.95717, -84.50010 37.9..."
82602,"(88461.0, 88460.0)",88461.0,37.95717,-84.500063,88460.0,37.957148,-84.500103,intermediate,0,0,4.271484,"LINESTRING (-84.50006 37.95717, -84.50010 37.9..."
82661,"(88460.0, 88461.0)",88460.0,37.957148,-84.500103,88461.0,37.95717,-84.500063,intermediate,0,0,4.271484,"LINESTRING (-84.50010 37.95715, -84.50006 37.9..."


In [None]:
# woah why does arcs_gdf have almost 900 duplicate arc_ids? and 2 outright duplicates
# okay the two outright duplicates are of a 5 meter path in a park — nbd
# and the arc_id duplicates occur whenever there are two ways to loop around (common in nbdhs)
# so the duplicates in arcs_gdf appear when merging, but we can just filter out the low-stress ones

# note that each duplicate pair has one high-stress arc and one low-stress arc
# upgraded_df has no duplicates before merging with arcs_gdf