In [1]:
import pandas as pd
import geopandas as gp
import shapely
from shapely.geometry import Point, LineString
from shapely.ops import transform
import pyproj
from collections import Counter

In [2]:
from lrs_tools import gp_lrs

In [3]:
import logging
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG) # Set the debug level here
fileHandler = logging.FileHandler(f'test.log', mode='w')
log.addHandler(fileHandler)

In [4]:
import importlib
importlib.reload(gp_lrs)

<module 'lrs_tools.gp_lrs' from 'c:\\Users\\daniel.fourquet\\Documents\\Tasks\\TMC Conflation 2025\\NPMRDS\\lrs_tools\\gp_lrs.py'>

In [5]:
# Load lrs to an LRS object
lrs_path = r'C:\Users\daniel.fourquet\Documents\Tasks\TMC Conflation 2025\Data\LRS_MASTER_RICHMOND_SINGLEPART.shp'

# Filter to only include NB and EB routes, excluding PA and Interstates.  Exclude ramps
lrs = gp_lrs.LRS(lrs_path, filter=True, ramps=0)

# Load lrs overlap to an LRS object
lrs_overlap_path = r'C:\Users\daniel.fourquet\Documents\Tasks\TMC Conflation 2025\Data\LRS_OVERLAP_RICHMOND.shp'

# Filter to only include NB and EB routes, excluding PA and Interstates.  Exclude ramps
lrs_overlap = gp_lrs.LRS(lrs_overlap_path, filter=True, ramps=0)


In [6]:
# Load TMCs to a GeoDataFrame
tmcs_path = r'C:\Users\daniel.fourquet\Documents\Tasks\TMC Conflation 2025\NPMRDS\data\NPMRDS_medium_test.shp'
tmcs = gp.read_file(tmcs_path)
tmcs = tmcs[['Tmc', 'RoadNumber', 'RoadName', 'TmcLinear', 'geometry']]
tmcs = tmcs.to_crs(epsg=3968)

# Calculate begin, end, and mid-points
tmcs['begin_point'] = shapely.get_point(tmcs['geometry'], 0)
tmcs['end_point'] = shapely.get_point(tmcs['geometry'], -1)
def get_midpoint(geom):
    return geom.interpolate(0.5, normalized=True)
tmcs['mid_point'] = tmcs.geometry.apply(get_midpoint)

# Get TMCs dissolved by lineartmc
linear_tmcs = tmcs.dissolve(['TmcLinear', 'RoadName']).reset_index()
linear_tmcs['geometry'] = shapely.line_merge(linear_tmcs.geometry)
linear_tmcs = linear_tmcs.explode()
def interpolate(geom, pct):
    return geom.interpolate(pct, normalized=True)
linear_tmcs['begin_point'] = linear_tmcs.geometry.apply(interpolate, pct=0)
linear_tmcs['mid_point'] = linear_tmcs.geometry.apply(interpolate, pct=0.5)
linear_tmcs['end_point'] = linear_tmcs.geometry.apply(interpolate, pct=1)


  if s.type.startswith("Multi") or s.type == "GeometryCollection":


In [7]:
# Get most common routes near linear tmcs.  Only return results with one match
def match_routes__begin_mid_end_points(tmc, distance=15, point_threshold=3, lrs=lrs):
    log.debug(f'\n{"="*10}')
    log.debug(f'\n{tmc["Tmc"]} - Match Routes by Points - Distance: {distance}')
    
    points = (tmc['begin_point'], tmc['mid_point'], tmc['end_point'])
    
    log.debug('points:')
    for point in points:
        log.debug(f'\t{point}')

    routes = Counter()
    for point in points:
        nearby_routes = gp_lrs.get_nearby_routes(point, distance, lrs)
        routes.update(nearby_routes)

    # Check for no matches
    if len(routes) == 0:
        log.debug('\tNo matches.  Returning None')
        return None

    log.debug(f'\tRoutes:  {routes}')

    # If the number of points that match with a single route is equal
    # to the point_threshold, it is likely a match.  Otherwise a  more
    # precise method is needed.
    most_common_count = routes.most_common(1)[0][1]
    if most_common_count < point_threshold:
        log.debug('\tMost common below threshold.  Returning None')
        return None
    
    most_common_routes = routes.most_common()

    # Find the maximum count
    max_count = most_common_routes[0][1]

    # Filter for elements with the maximum count
    result = [element for element, count in most_common_routes if count == max_count]

    if len(result) == 1:
        log.debug(f'Returning {result[0]}')
        return result[0]
    
    return None


In [8]:

# Get most common routes
log.debug(f'===\nLinear TMC IDs - match_routes__begin_mid_end_points\n===')
linear_tmcs['rte_nm_match'] = linear_tmcs.apply(match_routes__begin_mid_end_points, axis=1)

# Remove null values.  These will need to be matched at the tmc level
linear_tmcs = linear_tmcs.loc[linear_tmcs['rte_nm_match'].notnull()].copy()

# Reduce to only needed columns to join back to the tmc dataframe
linear_tmcs = linear_tmcs.reset_index()[['TmcLinear', 'RoadName', 'rte_nm_match']].drop_duplicates()

# Remove any linear tmc id that appears more than once
linear_tmcs = linear_tmcs[~linear_tmcs.duplicated(['TmcLinear', 'RoadName'], keep=False)]

In [9]:
# Join tmcs and linear_tmcs by TmcLinear
tmcs = tmcs.merge(linear_tmcs, how='left', on=['TmcLinear', 'RoadName'])

In [10]:
print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 68


In [11]:
# Match by TMC, but only on tmcs where rte_nm_match is null
tmcs_no_match = tmcs.loc[tmcs['rte_nm_match'].isnull()].copy()

log.debug(f'===\TMCs - match_routes__begin_mid_end_points\n===')
tmcs_no_match['rte_nm_match'] = tmcs_no_match.apply(match_routes__begin_mid_end_points, axis=1)
tmcs_no_match = tmcs_no_match[['Tmc', 'rte_nm_match']]

# Join back to main tmcs
tmcs = tmcs.merge(tmcs_no_match, how='left', on='Tmc', suffixes=('', '_new')).fillna(tmcs_no_match)
tmcs.loc[tmcs['rte_nm_match'].isnull(), 'rte_nm_match'] = tmcs['rte_nm_match_new']
tmcs.drop(columns='rte_nm_match_new', axis=1, inplace=True)

print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 21


In [12]:
# Match by TMC again with a lower search radius, but only on tmcs where rte_nm_match is null
tmcs_no_match = tmcs.loc[tmcs['rte_nm_match'].isnull()].copy()

log.debug(f'===\TMCs - match_routes__begin_mid_end_points - low search radius\n===')
tmcs_no_match['rte_nm_match'] = tmcs_no_match.apply(match_routes__begin_mid_end_points, distance=5, axis=1)
tmcs_no_match = tmcs_no_match[['Tmc', 'rte_nm_match']]

# Join back to main tmcs
tmcs = tmcs.merge(tmcs_no_match, how='left', on='Tmc', suffixes=('', '_new')).fillna(tmcs_no_match)
tmcs.loc[tmcs['rte_nm_match'].isnull(), 'rte_nm_match'] = tmcs['rte_nm_match_new']
tmcs.drop(columns='rte_nm_match_new', axis=1, inplace=True)

print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 20


In [13]:
# Match by TMC again with a higher search radius, but only on tmcs where rte_nm_match is null
tmcs_no_match = tmcs.loc[tmcs['rte_nm_match'].isnull()].copy()

log.debug(f'===\TMCs - match_routes__begin_mid_end_points - low search radius\n===')
tmcs_no_match['rte_nm_match'] = tmcs_no_match.apply(match_routes__begin_mid_end_points, distance=25, axis=1)
tmcs_no_match = tmcs_no_match[['Tmc', 'rte_nm_match']]

# Join back to main tmcs
tmcs = tmcs.merge(tmcs_no_match, how='left', on='Tmc', suffixes=('', '_new')).fillna(tmcs_no_match)
tmcs.loc[tmcs['rte_nm_match'].isnull(), 'rte_nm_match'] = tmcs['rte_nm_match_new']
tmcs.drop(columns='rte_nm_match_new', axis=1, inplace=True)

print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 12


In [14]:
# Match by TMC again, but using the overlap lrs instead
tmcs_no_match = tmcs.loc[tmcs['rte_nm_match'].isnull()].copy()

log.debug(f'===\TMCs - match_routes__begin_mid_end_points - overlap LRS\n===')
tmcs_no_match['rte_nm_match'] = tmcs_no_match.apply(match_routes__begin_mid_end_points, distance=25, lrs=lrs_overlap, axis=1)
tmcs_no_match = tmcs_no_match[['Tmc', 'rte_nm_match']]

# Join back to main tmcs
tmcs = tmcs.merge(tmcs_no_match, how='left', on='Tmc', suffixes=('', '_new')).fillna(tmcs_no_match)
tmcs.loc[tmcs['rte_nm_match'].isnull(), 'rte_nm_match'] = tmcs['rte_nm_match_new']
tmcs.drop(columns='rte_nm_match_new', axis=1, inplace=True)

print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 11


In [15]:
def match_routes__line_segments(record, frequency=5, detailed_search=False):
    log.debug(f'\n{"="*10}')
    log.debug(f'\n{record["Tmc"]} - Match Routes by Segments')

    # Break input geometry into a list of individual line segments
    coords = record.geometry.coords
    segments = []
    for i in range(len(coords)):
        begin_point = coords[i-1]
        end_point = coords[i]
        segment = LineString((begin_point, end_point))
        segments.append(segment)

    if len(segments[1::frequency]) < frequency:
        frequency = 2  # Ensures that shorter segments get enough sample points

    if detailed_search:
        frequency = 1  # If detailed search, then all segments should be checked

    #  Reduce list to every {frequency} segment to improve processing time
    segments = segments[1::frequency]  # Ignore first segment because it contains begin and end point
    log.debug(f'\t{len(segments)} test segments')

    # For each segment, find matching nearby routes
    routes = []
    for segment in segments:
        nearby_routes = gp_lrs.get_nearby_route_by_segments(segment, 15, lrs)
        routes.extend(nearby_routes)
        
    # Check for no matches
    if len(routes) == 0:
        log.debug('\tNo matches.  Returning None')
        return None

    log.debug(f'\tAll Routes:  {routes}')

    # If only one segment tested and only one result, return the result
    if len(routes) == 1 and len(segments) == 1:
        return routes[0]

    # Keeps elements in a list that have a consecutive duplicate.
    route_list = []
    i = 0
    while i < len(routes) - 1:
        if routes[i] == routes[i + 1]:
            route_list.extend([routes[i], routes[i + 1]])
            i += 2  # Skip the next element since it's already included
        else:
            i += 1

    route_list = list(set(route_list))

    # If no matches found and this is not a detailed search, try again
    # but include all segments
    if len(route_list) == 0 and detailed_search == False:
        return match_routes__line_segments(record, detailed_search=True)

    log.debug(f'\tReturning Routes:  {route_list}')

    return ','.join(route_list)

In [28]:
null_filter = (tmcs['rte_nm_match'].isnull())
# null_filter = (tmcs['Tmc'] == '110N18081')
tmcs.loc[null_filter, 'rte_nm_match'] = tmcs.loc[null_filter].apply(match_routes__line_segments, axis=1)

print(f"Remaining null values: {len(tmcs.loc[tmcs['rte_nm_match'].isnull()])}")

Remaining null values: 0


In [29]:
# Records with more than one rte_nm in the rte_nm_match field should get new records

# Identify rows with multiple values
tmcs['multiple_values'] = tmcs['rte_nm_match'].str.contains(',')

# Explode the multiple values
tmcs_exploded = tmcs.assign(rte_nm_match=tmcs['rte_nm_match'].str.split(',')).explode('rte_nm_match')

In [30]:
print(len(tmcs), len(tmcs_exploded))

494 499
