In [19]:
## Run when initialise the code
from general_functions import *

import pandas as pd
from pandas import DataFrame
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, MultiPolygon,LineString, MultiLineString
from shapely.ops import linemerge, unary_union

import osmnx as ox
from osmnx import io
import networkx as nx

from sklearn.cluster import DBSCAN

import math
from math import log2
import warnings
from tqdm import tqdm
from itertools import groupby,combinations
from collections import defaultdict
import pickle
import numpy as np


pjr_loc = os.path.dirname(os.getcwd())
warnings.filterwarnings(action='ignore')
project_crs = 'epsg:3857'

In [2]:
## Step: Download, process, and save OSM data as GeoPackage


# Loop over place(s) of interest
for place in ['Santa Barbara, California']:  # or ['Tel Aviv', ...]
    
    # Apply custom tag filters if processing Tel Aviv
    if place == 'Tel Aviv':
        useful_tags_path = ['name:en', 'highway', 'length', 'bearing', 'tunnel', 'junction']
        ox.utils.config(useful_tags_way=useful_tags_path)

    # Create preprocessing object and folder
    my_preprocessing = Preprocessing(place)
    data_folder = my_preprocessing.create_folder()

    # Download OSM graph for the place
    graph = ox.graph_from_place(place, network_type='all')
    print('✔️ Finished downloading OSM data')

    # Add edge bearings (useful for angle analysis, roundabouts, etc.)
    graph = ox.bearing.add_edge_bearings(graph)

    # Project the graph to a specific CRS for analysis
    graph_pro = ox.projection.project_graph(graph, to_crs=project_crs)

    # Save the projected graph to a GeoPackage file
    io.save_graph_geopackage(
        graph_pro,
        filepath=f'{data_folder}/osm_data.gpkg',
        encoding='utf-8',
        directed=False
    )

# Re-initialize preprocessing for subsequent use
my_preprocessing = Preprocessing(place)
data_folder = my_preprocessing.create_folder()

# Load and filter the edges from the saved GeoPackage
gdf_edges = my_preprocessing.first_filtering()
gdf_edges.to_file(f'{data_folder}/edges.shp')

Data folder: C:\Users\18059\OneDrive - ariel.ac.il\Current_research\SOD\Code\places/Santa_Barbara__California
✔️ Finished downloading OSM data
Data folder: C:\Users\18059\OneDrive - ariel.ac.il\Current_research\SOD\Code\places/Santa_Barbara__California


In [13]:
#  Extracts street-to-street connections based on shared nodes.
# ----------------------------------------------------------------
# STEP 1: Build a networkx Graph from the GeoDataFrame edges
# ----------------------------------------------------------------
G = nx.Graph()
for idx, row in gdf_edges.iterrows():
    G.add_edge(row['u'], row['v'], name=row['name'])

# ----------------------------------------------------------------
# STEP 2: Map each node to the set of street names that pass through it
# ----------------------------------------------------------------
node_to_streets = defaultdict(set)
for idx, row in gdf_edges.iterrows():
    node_to_streets[row['u']].add(row['name'])
    node_to_streets[row['v']].add(row['name'])

# ----------------------------------------------------------------
# STEP 3: Build street-to-street connection dictionary with node IDs
# ----------------------------------------------------------------
# Format: street_connections_with_nodes[street1][street2] = {node1, node2, ...}
street_connections_with_nodes = defaultdict(lambda: defaultdict(set))


for node, streets in node_to_streets.items():
    streets = list(streets)
    for i in range(len(streets)):
        for j in range(len(streets)):
            if i != j:
                street_connections_with_nodes[streets[i]][streets[j]].add(node)

# ----------------------------------------------------------------
# STEP 4: Build node ID → coordinate dictionary
# ----------------------------------------------------------------
node_id_to_coords = {}

for node in set(G.nodes()):
    node_geom = gdf_edges.loc[(gdf_edges['u'] == node) | (gdf_edges['v'] == node)].iloc[0]
    if node == node_geom['u']:
        node_id_to_coords[node] = node_geom.geometry.coords[0]
    else:
        node_id_to_coords[node] = node_geom.geometry.coords[-1]

# ----------------------------------------------------------------
# STEP 5: Create a new dictionary to hold connection counts
# Format: street_connection_counts[street1][street2] = number of shared nodes
# ----------------------------------------------------------------
node_id_to_coords = {}
street_connection_counts = defaultdict(lambda: defaultdict(int))

for street, connected_dict in street_connections_with_nodes.items():
    for connected_street, node_ids in connected_dict.items():
        street_connection_counts[street][connected_street] = len(node_ids)
street_connection_counts

defaultdict(<function __main__.<lambda>()>,
            {'Meadows Lane': defaultdict(int, {'Las Positas Road': 1}),
             'Las Positas Road': defaultdict(int,
                         {'Meadows Lane': 1,
                          'Positas Place': 1,
                          'San Roque Road': 1,
                          'State Street': 1,
                          'Calle Real': 1,
                          'Modoc Road': 1,
                          'Baldwin Road': 1,
                          'McCaw Avenue': 1,
                          'San Onofre Road': 1,
                          'Alegria Road': 1,
                          'Peregrina Road': 1,
                          'Veronica Springs Road': 1,
                          'Portesuello Avenue': 1,
                          'Stanley Drive': 1,
                          'Tallant Road': 1,
                          'Jerry Harwin Parkway': 1,
                          'Richelle Lane': 1}),
             'Positas Place': defaultd

In [38]:
street_connection_counts['Rosario Drive']

defaultdict(int,
            {'Paseo Redondo': 1, 'San Martin Way': 2, 'Primavera Road': 2})

Unnamed: 0,street,connected_to,num_connections
1804,APS Turnout,Alameda Padre Serra,2
1805,APS Turnout,Dover Road,1
2001,Abigail Lane,Cedar Lane,1
2000,Abigail Lane,Rosemary Lane,1
3068,Adair Drive,San Remo Drive,1
...,...,...,...
3179,Wyola Road,Samarkand Drive,1
3176,Wyola Road,Stanley Drive,1
580,Yankee Farm Road,Braemar Drive,2
578,Yankee Farm Road,Cliff Drive,1


In [43]:
# Merge roundabout geometries, identify which streets intersect them, and extract roundabout center points for further spatial analysis.


# ------------------------------------------------------------
# STEP 1: Merge roundabout edges into clean geometries
# - Union geometries to dissolve boundaries
# - Linemerge to connect continuous segments
# - Export merged geometries as shapefile
# ------------------------------------------------------------
roundabout_edges = my_preprocessing.round_about  # GeoDataFrame with roundabout segments

# Merge all geometries into a MultiLineString
merged_geom = unary_union(roundabout_edges.geometry)

# Connect continuous line segments
connected_lines = linemerge(merged_geom)

# Normalize to list of LineStrings
if isinstance(connected_lines, LineString):
    lines_list = [connected_lines]
elif isinstance(connected_lines, MultiLineString):
    lines_list = list(connected_lines.geoms)
else:
    lines_list = []

# Save merged roundabout geometries for inspection
gdf_merged = gpd.GeoDataFrame(geometry=lines_list, crs=roundabout_edges.crs)
gdf_merged.to_file(f'{data_folder}/roundabout.shp')

# ------------------------------------------------------------
# STEP 2: Create a buffer around merged roundabouts and find intersecting street edges
# ------------------------------------------------------------
# Buffer each merged roundabout line (e.g., 5 meters)
roundabout_buffer = gdf_merged.copy()
roundabout_buffer['geometry'] = roundabout_buffer.geometry.buffer(5)

# Perform spatial join: find street edges that intersect the roundabout buffer
intersections = gpd.sjoin(gdf_edges, roundabout_buffer, how='inner', predicate='intersects')

# ------------------------------------------------------------
# STEP 3: Build dictionary mapping roundabout index to connected street names
# ------------------------------------------------------------
roundabout_to_streets = defaultdict(set)

for _, row in intersections.iterrows():
    roundabout_idx = row['index_right']  # index of the matched roundabout in gdf_merged
    street_name = row['name']
    roundabout_to_streets[roundabout_idx].add(street_name)

# Save the original roundabout edge segments for reference
roundabout_edges.to_file(f'{data_folder}/roundabout_edges.shp')

# ------------------------------------------------------------
# STEP 4: Calculate center point for each roundabout polygon buffer
# - Buffer again with clean style, then extract centroid
# ------------------------------------------------------------
# Buffer with styling for rounded shapes, then get centroids
roundabout_centers = roundabout_buffer['geometry'].buffer(distance=1, cap_style=1, join_style=1).centroid

# Save as GeoDataFrame
gdf_roundabouts = gpd.GeoDataFrame(geometry=roundabout_centers, crs=roundabout_edges.crs)
gdf_roundabouts.to_file(f'{data_folder}/center_roundabout.shp')



In [44]:
# This code performs the main part of the simplification 
# --- Helper Functions ---

def check_parallelism(to_translate: GeoDataFrame) -> bool:
    """
    Checks whether a group of line segments contain any parallel segments
    by offsetting each line and checking buffer intersections.
    """
    my_buffer = to_translate['geometry'].buffer(cap_style=2, distance=30, join_style=3)
    to_translate['geometry_right'] = to_translate['geometry'].apply(lambda x: x.parallel_offset(35, 'right'))
    to_translate['geometry_left'] = to_translate['geometry'].apply(lambda x: x.parallel_offset(35, 'left'))

    def is_parallel(my_s_join: GeoDataFrame, the_buffer, geo_field: str):
        my_s_join['geometry'] = my_s_join[geo_field]
        sjoin = my_s_join.sjoin(GeoDataFrame(geometry=the_buffer, crs=project_crs), how='inner')
        sjoin = sjoin[sjoin.index != sjoin['index_right']]
        for _, row in sjoin.iterrows():
            overlay = gpd.overlay(
                GeoDataFrame([row], crs=project_crs),
                GeoDataFrame(geometry=[the_buffer[row['index_right']]], crs=project_crs),
                how='intersection')
            if (overlay.length / row.geometry.length).iloc[0] * 100 > 10:
                return True
        return False

    return is_parallel(to_translate, my_buffer, 'geometry_right') or is_parallel(to_translate, my_buffer, 'geometry_left')

def circular_distance(angle1, angle2):
    """Compute minimum circular angle difference between two angles."""
    diff = np.abs(angle1 - angle2) % 180
    return np.minimum(diff, 180 - diff)

def add_more_pnts_to_new_lines(pnt_f, pnt_l, line_pnts, lngth_chck, test_poly):
    """
    Recursively add points along a line if they're more than 10m from existing road segments.
    """
    dist = pnt_f.distance(pnt_l)
    x0, y0 = pnt_f.x, pnt_f.y
    bearing = math.atan2(pnt_l.x - x0, pnt_l.y - y0)
    if bearing < 0:
        bearing += 2 * math.pi
    loops = int(dist / lngth_chck)

    for step in range(1, loops):
        x_new = x0 + lngth_chck * step * math.sin(bearing)
        y_new = y0 + lngth_chck * step * math.cos(bearing)
        new_point = Point(x_new, y_new)
        nearest = GeoDataFrame(geometry=[new_point], crs=project_crs).sjoin_nearest(test_poly, distance_col='dis').iloc[0]
        if nearest['dis'] > 10:
            line = data.loc[nearest['index_right']]['geometry']
            projected = line.interpolate(line.project(new_point))
            if projected.distance(pnt_f) < 10:
                continue
            line_pnts.append(projected)
            return add_more_pnts_to_new_lines(projected, pnt_l, line_pnts, lngth_chck, test_poly)
    return line_pnts

def create_center_line(one_poly):
    """
    Construct a center line through a polygon by identifying its farthest endpoints
    and interpolating new points as needed based on angular continuity.
    """
    lines = data.sjoin(GeoDataFrame(geometry=[one_poly], crs=project_crs)).drop(columns='index_right')
    endpoints = []
    lines['geometry'].apply(lambda line: endpoints.extend([Point(line.coords[0]), Point(line.coords[-1])]))
    combos = list(combinations(endpoints, 2))

    df = DataFrame({
        'point_1': [a for a, _ in combos],
        'point_2': [b for _, b in combos],
    })
    df['dist'] = df.apply(lambda x: x['point_1'].distance(x['point_2']), axis=1)
    dx = df['point_2'].apply(lambda p: p.x) - df['point_1'].apply(lambda p: p.x)
    dy = df['point_2'].apply(lambda p: p.y) - df['point_1'].apply(lambda p: p.y)
    df['angle'] = np.degrees(np.arctan2(dy, dx)) % 180
    avg_angle = lines['angle'].mean()
    df['ratio'] = df['dist'] / df['dist'].max() + 0.5 * np.abs(df['angle'] - avg_angle) / np.abs(df['angle'] - avg_angle).max()

    pnt_f, pnt_l = df.sort_values(by='ratio', ascending=False).iloc[0][['point_1', 'point_2']]
    angle_range = lines['angle'].max() - lines['angle'].min()

    if angle_range < 1:
        new_line_pts = [pnt_f]
    else:
        step = 8.5 if angle_range > 100 else 75 - log2(angle_range) * 10
        new_line_pts = add_more_pnts_to_new_lines(pnt_f, pnt_l, [pnt_f], step, lines)
    new_line_pts.append(pnt_l)
    return new_line_pts

def update_df_with_center_line(new_line, is_simplified=0, group_name=-1):
    """Append a new line feature to the output dictionary."""
    dic_final['name'].append(name)
    dic_final['geometry'].append(new_line)
    dic_final['highway'].append(data.iloc[0]['highway'])
    dic_final['bearing'].append(data['angle'].mean())
    dic_final['group'].append(group_name)
    dic_final['is_simplified'].append(is_simplified)

# --- Main logic ---
dic_final = {'name': [], 'geometry': [], 'highway': [], 'bearing': [], 'group': [], 'is_simplified': []}
df_pro = gdf_edges
grouped = df_pro.groupby('name')

with tqdm(total=len(grouped)) as pbar:
    for name, group_df in grouped:
        pbar.update(1)
        group_df = group_df.dropna(subset=['angle'])
        if len(group_df) < 2:
            data = group_df
            _ = group_df['geometry'].apply(lambda geom: update_df_with_center_line(geom))
            continue

        angles = group_df['angle'].to_numpy()
        dists = np.array([[circular_distance(a1, a2) for a2 in angles] for a1 in angles])
        dbscan = DBSCAN(eps=10, min_samples=2, metric='precomputed')
        group_df['group'] = dbscan.fit_predict(dists)

        if (group_df['group'] == -1).all():
            data = group_df
            _ = group_df['geometry'].apply(lambda geom: update_df_with_center_line(geom))
            continue

        for group_id, sub_group in group_df.groupby('group'):
            data = sub_group
            if group_id == -1:
                _ = data['geometry'].apply(lambda geom: update_df_with_center_line(geom))
                continue
            if check_parallelism(data.copy()):
                min_polylines = len(data) / 15
                condition = (data['highway'].isin(['service', 'unclassified'])) & (
                    data.groupby('highway')['highway'].transform('count') <= min_polylines)
                data = data[~condition]

                buffers = data.buffer(cap_style=3, distance=30, join_style=3)
                unified = buffers.unary_union

                if isinstance(unified, MultiPolygon):
                    for poly in unified.geoms:
                        center_pts = create_center_line(poly)
                        update_df_with_center_line(LineString(center_pts), 1, group_id)
                else:
                    center_pts = create_center_line(unified)
                    update_df_with_center_line(LineString(center_pts), 1, group_id)
            else:
                _ = data['geometry'].apply(lambda geom: update_df_with_center_line(geom))

# Finalize and export
print(f'number_of_parallel: {sum(dic_final["is_simplified"])}')
print('create new files')
new_network = GeoDataFrame(dic_final, crs=project_crs)
new_network['length'] = new_network.length
new_network.to_file(f'{data_folder}/simp.shp')


100%|██████████| 832/832 [00:42<00:00, 19.65it/s]


number_of_parallel: 66
create new files


In [46]:
# The code filters out short, weakly connected street segments that don't intersect other lines at their endpoints and cleans up the geometry for a simplified street network.

# Copy the simplified network
gdf = new_network.copy()

# ----------------------------------------------------------------
# Step 1: Precompute useful attributes
# ----------------------------------------------------------------

# Count how many times each street name appears
gdf['name_count'] = gdf['name'].map(gdf['name'].value_counts())

# Count how many times each endpoint appears (used for degree check)
endpoint_counts = defaultdict(int)
for _, row in gdf.iterrows():
    coords = list(row.geometry.coords)
    endpoint_counts[coords[0]] += 1
    endpoint_counts[coords[-1]] += 1

# ----------------------------------------------------------------
# Step 2: Keep only lines from streets that are connected to roundabouts
# ----------------------------------------------------------------
connected_streets = {name for street_set in roundabout_to_streets.values() for name in street_set}
gdf_connected = gdf[gdf['name'].isin(connected_streets)].copy()

# ----------------------------------------------------------------
# Step 3: Identify candidate short terminal lines
# ----------------------------------------------------------------
candidates = gdf_connected[
    (gdf_connected['length'] < 100) &
    (gdf_connected['name_count'] > 1) &
    (gdf_connected.geometry.apply(lambda geom: endpoint_counts[geom.coords[0]] == 1 and
                                                 endpoint_counts[geom.coords[-1]] == 1))
].copy()

# ----------------------------------------------------------------
# Step 4: Check intersections of candidates with other edges
# ----------------------------------------------------------------
intersections = gpd.sjoin(candidates, gdf, how='inner', predicate='intersects')
intersections = intersections[intersections.index != intersections['index_right']]  # exclude self-intersections

# ----------------------------------------------------------------
# Step 5: Determine which candidates should be removed
# ----------------------------------------------------------------
lines_to_remove = set()

for idx, row in candidates.iterrows():
    temp_line = row.geometry
    start = Point(temp_line.coords[0])
    end = Point(temp_line.coords[-1])

    # All intersections involving this line
    matching = intersections.loc[intersections.index == idx]

    # If there are no intersections at all, remove the line
    if matching.empty:
        lines_to_remove.add(idx)
        continue

    # Check whether any intersection point lies exactly at start or end
    valid_intersection = False
    for other_idx in matching['index_right']:
        other_line = gdf.loc[other_idx].geometry
        inter = temp_line.intersection(other_line)

        if inter.is_empty:
            continue

        # Ensure we're working with a list of Points
        inter_points = [inter] if isinstance(inter, Point) else list(inter.geoms)

        # Check if any intersecting point matches start or end
        if any(pt.equals(start) or pt.equals(end) for pt in inter_points):
            valid_intersection = True
            break

    if not valid_intersection:
        lines_to_remove.add(idx)

# ----------------------------------------------------------------
# Step 6: Remove flagged lines and clean up geometries
# ----------------------------------------------------------------
gdf_simplified = gdf.drop(index=lines_to_remove).copy()

# Remove consecutive duplicate coordinates
gdf_simplified['geometry'] = gdf_simplified['geometry'].apply(
    lambda geom: LineString([pt for pt, _ in groupby(geom.coords)]) if len(set(geom.coords)) > 1 else None
)

# Drop invalid geometries (empty or single-point lines)
gdf_simplified = gdf_simplified[gdf_simplified['geometry'].notnull()].copy()





In [47]:
# The code adjusts street geometries to explicitly connect roundabout points by inserting them into the closest street segments

# --- Parameters ---
TOLERANCE = 20  # Max distance (in meters) to consider attaching to start or end
updated_geoms = {}
gdf_simplified_fix1 = gdf_simplified.copy()

# --- Step 1: Iterate through roundabouts and their connected streets ---
for ridx, streets in roundabout_to_streets.items():
    roundabout_point = gdf_roundabouts.loc[ridx].geometry

    for street in streets:
        # Get all edges for the current street
        street_edges = gdf_simplified_fix1[gdf_simplified_fix1['name'] == street]

        # Find edge(s) closest to the roundabout
        min_dist = street_edges.geometry.distance(roundabout_point).min()
        closest_edges = street_edges[street_edges.geometry.distance(roundabout_point) == min_dist]

        for edge_idx, edge_row in closest_edges.iterrows():
            # Use updated geometry if already modified
            line = updated_geoms.pop(edge_idx, edge_row.geometry)

            coords = list(line.coords)
            nearest_on_line = line.interpolate(line.project(roundabout_point))

            # Compute distances to start and end
            dist_to_start = nearest_on_line.distance(Point(coords[0]))
            dist_to_end = nearest_on_line.distance(Point(coords[-1]))

            # --- Step 2: Modify the line geometry based on proximity ---
            if dist_to_start > TOLERANCE and dist_to_end > TOLERANCE:
                # Insert roundabout into interior of the line
                insert_idx = np.argmin([Point(c).distance(nearest_on_line) for c in coords])
                if insert_idx == 0:
                    new_coords = [roundabout_point.coords[0]] + coords
                elif insert_idx == len(coords) - 1:
                    new_coords = coords[:-1] + [roundabout_point.coords[0]] + [coords[-1]]
                else:
                    new_coords = coords[:insert_idx + 1] + [roundabout_point.coords[0]] + coords[insert_idx + 1:]
            else:
                # Snap roundabout to closest endpoint
                if dist_to_start < dist_to_end:
                    new_coords = [roundabout_point.coords[0]] + coords[1:]
                else:
                    new_coords = coords[:-1] + [roundabout_point.coords[0]]

            # Save updated geometry
            updated_geoms[edge_idx] = LineString(new_coords)

# --- Step 3: Apply updated geometries to the GeoDataFrame ---
for idx, new_geom in updated_geoms.items():
    gdf_simplified_fix1.at[idx, 'geometry'] = new_geom


In [48]:
# This code snaps dead-end street endpoints to nearby roundabouts if they're within a specified distance buffer, improving network connectivity.

# Make a working copy
gdf_simplified_fix2 = gdf_simplified_fix1.copy()

# --- Parameters ---
TOL_BUFFER = 200  # Buffer radius in meters
updated_geoms_buffer = {}

# --- Step 1: Build node appearance count, including roundabout centers ---
node_count = defaultdict(int)

# Count start and end points of all line segments
for _, row in gdf_simplified_fix2.iterrows():
    coords = list(row.geometry.coords)
    node_count[coords[0]] += 1
    node_count[coords[-1]] += 1

# Also count roundabout center points
for pt in gdf_roundabouts.geometry:
    node_count[pt.coords[0]] += 1

# --- Helper: Check if roundabout already in line ---
def is_roundabout_in_line(roundabout_point, line_coords):
    return roundabout_point.coords[0] in line_coords

# --- Step 2: Snap dead-end line endpoints to roundabout if within buffer ---
for ridx, streets in roundabout_to_streets.items():
    roundabout_point = gdf_roundabouts.loc[ridx].geometry
    roundabout_buffer = roundabout_point.buffer(TOL_BUFFER)

    for street in streets:
        street_edges = gdf_simplified_fix2[gdf_simplified_fix2['name'] == street]

        for idx, row in street_edges.iterrows():
            # Use previously updated geometry if exists
            coords = list(updated_geoms_buffer.pop(idx, row.geometry).coords)

            # Only adjust if roundabout not already in the line
            if not is_roundabout_in_line(roundabout_point, coords):
                start, end = Point(coords[0]), Point(coords[-1])

                # Snap start if it's a dead-end and inside buffer
                if node_count[coords[0]] == 1 and roundabout_buffer.contains(start):
                    coords[0] = roundabout_point.coords[0]

                # Snap end if it's a dead-end and inside buffer
                elif node_count[coords[-1]] == 1 and roundabout_buffer.contains(end):
                    coords[-1] = roundabout_point.coords[0]

            # Save updated geometry
            updated_geoms_buffer[idx] = LineString(coords)

# --- Step 3: Apply changes to GeoDataFrame ---
for idx, geom in updated_geoms_buffer.items():
    gdf_simplified_fix2.at[idx, 'geometry'] = geom



In [49]:
# This code removes duplicate street segments (with same endpoints but different geometries) around roundabouts, keeping only the longest one per pair.


# Make a working copy of the dataset
gdf_simplified_fix3 = gdf_simplified_fix2.copy()

# Set to store indices of duplicate edges to remove
duplicate_removal_indices = set()

# --- Helper function: Normalize edge as unordered pair of start/end points ---
def normalize_edge(pt1, pt2):
    return tuple(sorted([tuple(pt1), tuple(pt2)]))

# --- Step 1: Loop through roundabouts and their connected streets ---
for ridx, streets in roundabout_to_streets.items():
    # Collect all edges from the connected streets
    street_edges = gdf_simplified_fix3[gdf_simplified_fix3['name'].isin(streets)]

    # Group edges by normalized endpoint pair (to catch reversed duplicates)
    edge_groups = defaultdict(list)

    for idx, row in street_edges.iterrows():
        coords = list(row.geometry.coords)
        edge_key = normalize_edge(coords[0], coords[-1])
        edge_groups[edge_key].append((idx, row.geometry.length))

    # --- Step 2: Within each group, retain only the longest edge ---
    for edges in edge_groups.values():
        if len(edges) > 1:
            # Sort by length in descending order
            edges_sorted = sorted(edges, key=lambda x: -x[1])
            # Keep the longest (first); mark others for removal
            for edge_idx, _ in edges_sorted[1:]:
                duplicate_removal_indices.add(edge_idx)

# --- Step 3: Remove marked duplicates and export result ---
gdf_simplified_fix3 = gdf_simplified_fix3[~gdf_simplified_fix3.index.isin(duplicate_removal_indices)].copy()
gdf_simplified_fix3.to_file(f'{data_folder}/gdf_fix_ra.shp')



In [50]:
joined = gpd.sjoin(gdf_roundabouts, gdf_simplified_fix3, how='left', predicate='intersects')
from collections import defaultdict
def compare_roundabout_dicts(old_dict, new_dict):
    differences = {}

    all_keys = set(old_dict.keys()).union(new_dict.keys())

    for key in all_keys:
        old = set(old_dict.get(key, []))
        new = set(new_dict.get(key, []))

        if old != new:
            differences[key] = {
                'added': new - old,
                'removed': old - new
            }

    return differences
roundabout_to_streets_new = defaultdict(set)

for ridx, row in joined.iterrows():
    roundabout_idx = row.name  # index of the roundabout
    street_name = row['name']
    if pd.notnull(street_name):
        roundabout_to_streets_new[roundabout_idx].add(street_name)
diffs = compare_roundabout_dicts(roundabout_to_streets, roundabout_to_streets_new)

if not diffs:
    print("✅ Roundabout-street mapping is unchanged.")
else:
    print("❌ Differences found:")
    for ridx, change in diffs.items():
        print(f"\nRoundabout {ridx}:")
        if change['added']:
            print(f"  ➕ Streets added: {sorted(change['added'])}")
        if change['removed']:
            print(f"  ➖ Streets removed: {sorted(change['removed'])}")



✅ Roundabout-street mapping is unchanged.


In [32]:
# Restore Missing Street Connections
# Create a new dictionary to store updated street connections.
gdf_reset = gpd.read_file(f'{data_folder}/gdf_fix_ra.shp').drop(columns=['name_count'])
gdf_reset

Unnamed: 0,name,highway,bearing,group,is_simplif,length,geometry
0,APS Turnout,tertiary_link,99.598300,-1,0,27.227677,"LINESTRING (-13324285.439 4087680.057, -133242..."
1,Abigail Lane,residential,64.556643,-1,0,57.719959,"LINESTRING (-13321780.25 4087117.497, -1332178..."
2,Abigail Lane,residential,64.556643,-1,0,64.433339,"LINESTRING (-13321837.123 4087139.632, -133218..."
3,Adair Drive,residential,9.745865,-1,0,86.147494,"LINESTRING (-13329585.182 4088430.202, -133295..."
4,Alamar Avenue,residential,43.469136,-1,0,94.745217,"LINESTRING (-13327641.655 4089141.722, -133276..."
...,...,...,...,...,...,...,...
4443,Yankee Farm Road,residential,33.074244,-1,0,9.403687,"LINESTRING (-13330399.373 4084028.712, -133304..."
4444,Yankee Farm Road,residential,11.205277,-1,0,119.871449,"LINESTRING (-13330458.038 4083548.234, -133304..."
4445,Yankee Farm Road,residential,11.205277,-1,0,120.020692,"LINESTRING (-13330434.327 4083665.889, -133304..."
4446,Yankee Farm Road,residential,11.205277,-1,0,353.662596,"LINESTRING (-13330404.505 4084020.832, -133304..."


In [33]:


# Step 1: Perform spatial join to find intersecting lines (excluding self-intersections)
intersections = gpd.sjoin(gdf_reset, gdf_reset, how='inner', predicate='intersects')

# Remove self-intersections (same feature)
intersections = intersections[intersections.index != intersections['index_right']].copy()
intersections

Unnamed: 0,name_left,highway_left,bearing_left,group_left,is_simplif_left,length_left,geometry,index_right,name_right,highway_right,bearing_right,group_right,is_simplif_right,length_right
0,APS Turnout,tertiary_link,99.598300,-1,0,27.227677,"LINESTRING (-13324285.439 4087680.057, -133242...",25,Alameda Padre Serra,tertiary,98.611547,0,1,5586.460339
0,APS Turnout,tertiary_link,99.598300,-1,0,27.227677,"LINESTRING (-13324285.439 4087680.057, -133242...",942,Dover Road,residential,86.130943,-1,0,121.046160
1,Abigail Lane,residential,64.556643,-1,0,57.719959,"LINESTRING (-13321780.25 4087117.497, -1332178...",651,Cedar Lane,residential,115.958491,-1,0,61.795798
1,Abigail Lane,residential,64.556643,-1,0,57.719959,"LINESTRING (-13321780.25 4087117.497, -1332178...",653,Cedar Lane,residential,115.958491,-1,0,185.397028
1,Abigail Lane,residential,64.556643,-1,0,57.719959,"LINESTRING (-13321780.25 4087117.497, -1332178...",3117,Rosemary Lane,residential,85.542049,-1,0,190.203237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4445,Yankee Farm Road,residential,11.205277,-1,0,120.020692,"LINESTRING (-13330434.327 4083665.889, -133304...",318,Braemar Drive,service,98.737187,-1,0,122.048280
4445,Yankee Farm Road,residential,11.205277,-1,0,120.020692,"LINESTRING (-13330434.327 4083665.889, -133304...",313,Braemar Drive,service,98.737187,-1,0,33.650551
4446,Yankee Farm Road,residential,11.205277,-1,0,353.662596,"LINESTRING (-13330404.505 4084020.832, -133304...",311,Braemar Drive,service,98.737187,-1,0,172.661893
4446,Yankee Farm Road,residential,11.205277,-1,0,353.662596,"LINESTRING (-13330404.505 4084020.832, -133304...",313,Braemar Drive,service,98.737187,-1,0,33.650551


In [35]:

# Step 2: Initialize connection dictionary
connection_counts = defaultdict(lambda: defaultdict(int))
checked_edge_pairs = set()

# Step 3: Loop through joined pairs and check endpoint conditions
for idx, row in intersections.iterrows():
    edge1_idx = idx
    edge2_idx = row['index_right']
    # Prevent duplicate edge comparisons (unordered)
    edge_pair_key = tuple(sorted([edge1_idx, edge2_idx]))
    if edge_pair_key in checked_edge_pairs:
        continue
    checked_edge_pairs.add(edge_pair_key)
    name1 = row['name_left']
    name2 = row['name_right']
    
    if name1 == name2:
        continue  # same name, skip
    
    geom1 = row['geometry']
    geom2 = gdf_reset.loc[row['index_right']].geometry  # Get second geometry
    pt = geom1.intersection(geom2)

    if pt.is_empty or not isinstance(pt, Point):
        continue  # skip if not a valid intersection point

    # Check endpoint match
    count = 0
    if pt.equals(Point(geom1.coords[0])) or pt.equals(Point(geom1.coords[-1])):
        count += 1
        if pt.equals(Point(geom1.coords[0])) and pt.equals(Point(geom1.coords[-1])):
            count += 1
        elif pt.equals(Point(geom2.coords[0])) and pt.equals(Point(geom2.coords[-1])):
            count += 1
    elif pt.equals(Point(geom2.coords[-1])) or pt.equals(Point(geom2.coords[-1])):
        count += 1
        if pt.equals(Point(geom2.coords[0])) and pt.equals(Point(geom2.coords[-1])):
            count += 1


    if count > 0:
        # Always store street names alphabetically
        street_a, street_b = sorted([name1, name2])
        connection_counts[street_a][street_b] += count
connection_counts


defaultdict(<function __main__.<lambda>()>,
            {'APS Turnout': defaultdict(int, {'Dover Road': 1}),
             'Abigail Lane': defaultdict(int,
                         {'Cedar Lane': 2, 'Rosemary Lane': 2}),
             'Adair Drive': defaultdict(int, {'San Remo Drive': 2}),
             'Alamar Avenue': defaultdict(int,
                         {'Lucinda Lane': 2,
                          'Marilyn Way': 2,
                          'Foothill Road': 2,
                          'Paseo del Descanso': 4,
                          'Paseo del Refugio': 4,
                          'Calle Rosales': 2,
                          'Paseo Tranquillo': 4,
                          'Calle Noguera': 2,
                          'Puesta del Sol': 2,
                          'Ventura Drive': 2,
                          'West Alamar Avenue': 1,
                          'East Alamar Avenue': 2,
                          'Verde Vista Drive': 2,
                          'Miradero Drive'

In [None]:
# for test - count how meny time each node connect between lines
# Step 1: Convert to list of records
node_records = []
for pt_coords, count in node_count.items():
    node_records.append({'geometry': Point(pt_coords), 'count': count})

# Step 2: Create GeoDataFrame
gdf_node_count = gpd.GeoDataFrame(node_records, geometry='geometry', crs=gdf_simplified.crs)
gdf_node_count.to_file(f'{data_folder}/gdf_node_count.shp')