# RideScore DC — LTS from OSM (MVP)

This notebook builds a **Level of Traffic Stress (LTS)** layer for Washington, DC from OpenStreetMap tags and an explicit ruleset. Optionally, it joins DC crash points and computes a composite **RideScore (0–100)**. Outputs a map-ready **GeoJSON** and an inline Folium map with layer toggles.

**Method anchors:** Mekuria–Furth–Nixon LTS (1–4) and public agency adaptations (Montgomery, Fairfax). Thresholds here are **starting points**—we’ll tune with the group.  [oai_citation:0‡NACTO](https://nacto.org/wp-content/uploads/1005-low-stress-bicycling-network-connectivity.pdf?utm_source=chatgpt.com)

In [1]:
# 1) Install + imports

# If running locally (or in Kaggle with Internet on), uncomment:
!pip -q install osmnx geopandas shapely pyproj folium requests rtree

import os, json, math, re, warnings
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import osmnx as ox
import folium
from folium import Element
from folium.plugins import MeasureControl
import requests

warnings.filterwarnings("ignore")
ox.settings.use_cache = True
ox.settings.log_console = False

In [2]:
# 2) Config

PLACE_NAME = "Washington, District of Columbia, USA"
CRS = "EPSG:4326"
YEARS_BACK = 5                 # crash window
CRASH_ENABLE = True            # set False to skip crash join
CRASH_BUFFER_METERS = 10
OUTPUT_GEOJSON = "segments_lts_mvp.geojson"

OUT_DIR = "."
print("Output ->", os.path.join(OUT_DIR, OUTPUT_GEOJSON))

Output -> ./segments_lts_mvp.geojson


In [3]:
# 3) AOI polygon

aoi_gdf = ox.geocode_to_gdf(PLACE_NAME).to_crs(CRS)
aoi = aoi_gdf.geometry.iloc[0]
aoi_gdf

Unnamed: 0,geometry,bbox_west,bbox_south,bbox_east,bbox_north,place_id,osm_type,osm_id,lat,lon,class,type,place_rank,importance,addresstype,name,display_name
0,"POLYGON ((-77.11979 38.93435, -77.11977 38.934...",-77.119795,38.79163,-76.909366,38.995968,394370670,relation,5396194,38.895037,-77.036543,place,city,16,0.803067,city,Washington,"Washington, District of Columbia, United States"


In [None]:
# 4) Pull OSM streets + cycleways

# Highway filter includes cycleways/paths so we don’t lose protected/sidepath facilities
custom_filter = (
    '["highway"~"motorway|trunk|primary|secondary|tertiary|residential|living_street|'
    'unclassified|service|cycleway|path|pedestrian|track"]'
)

G = ox.graph_from_polygon(aoi, custom_filter=custom_filter, retain_all=False, simplify=True)
edges = ox.graph_to_gdfs(G, nodes=False, edges=True, fill_edge_geometry=True).to_crs(CRS)

# clip strictly to AOI
edges = gpd.overlay(edges, aoi_gdf[["geometry"]], how="intersection")
print("OSM edges:", len(edges))
edges.head()

In [None]:
# 5) OSM tag parsers (speed, lanes, facility, parking)

def parse_maxspeed(val):
    if val is None or (isinstance(val, float) and math.isnan(val)):
        return None
    if isinstance(val, (list, tuple)): val = val[0]
    s = str(val).strip().lower()
    m = re.match(r"^([0-9]+)\\s*(mph)?$", s)
    if m: return int(m.group(1))
    m = re.match(r"^([0-9]+)\\s*km/?h$", s)
    if m: return int(round(int(m.group(1)) * 0.621371))
    m = re.search(r"([0-9]+)", s)
    return int(m.group(1)) if m else None

def parse_lanes(row):
    val = row.get("lanes")
    if isinstance(val, (list, tuple)):
        try: val = int(val[0])
        except: val = None
    lanes = int(val) if (val is not None and str(val).isdigit()) else None
    fwd = row.get("lanes:forward"); bwd = row.get("lanes:backward")
    try: fwd = int(fwd) if fwd is not None else None
    except: fwd = None
    try: bwd = int(bwd) if bwd is not None else None
    except: bwd = None
    if fwd is not None or bwd is not None:
        return (fwd or 0) + (bwd or 0) or lanes
    return lanes

def classify_facility(tags):
    c = str(tags.get("cycleway", "")).lower()
    cb = str(tags.get("cycleway:both", "")).lower()
    cl = str(tags.get("cycleway:left", "")).lower()
    cr = str(tags.get("cycleway:right", "")).lower()
    hw = str(tags.get("highway", "")).lower()

    if hw == "cycleway": return "separated_lane"
    if hw == "path" and str(tags.get("bicycle","")).lower() in {"designated","yes"}:
        return "protected_track"

    cand = "|".join([c,cb,cl,cr])
    if any(k in cand for k in ["track","separate","separated","buffered_protected"]):
        return "protected_track"
    if "buffered" in cand: return "buffered_lane"
    if "lane" in cand:     return "painted_lane"
    if any(k in cand for k in ["shared_lane","sharrow","shared"]): return "shared"
    return "none"

def has_parking(tags):
    for k,v in tags.items():
        if k.startswith("parking:lane") and str(v).lower() not in {"no","none"}:
            return True
    return False

In [None]:
# 6) Normalize attributes

edges = edges.reset_index(drop=True)
norm = []
for i, r in edges.iterrows():
    tags = r.to_dict()
    facility = classify_facility(tags)
    speed_mph = parse_maxspeed(tags.get("maxspeed"))
    lanes = parse_lanes(tags)
    highway = str(tags.get("highway","")).lower()

    norm.append({
        "segment_id": f"osm-{r.get('u','')}-{r.get('v','')}-{i}",
        "highway": highway,
        "num_lanes": lanes if lanes is not None else 1,
        "speed_limit": speed_mph if speed_mph is not None else 25,  # conservative fallback
        "bike_facility_type": facility,
        "parking_presence": has_parking(tags),
        "geometry": r.geometry
    })

gdf = gpd.GeoDataFrame(norm, geometry="geometry", crs=CRS)
print("Segments normalized:", len(gdf))
gdf.head()

In [None]:
# 7) LTS rules (tunable)

def lts_level(facility, speed, lanes, parking=False, highway=""):
    hw = (highway or "").lower()

    # Dedicated/truly separated
    if facility in {"protected_track","separated_lane"} or hw in {"cycleway","path"}:
        return 1

    # Painted/buffered lanes
    if facility in {"buffered_lane","painted_lane"}:
        if speed <= 25 and lanes <= 2: return 2
        if speed <= 30 and lanes <= 2: return 3
        return 4

    # Mixed traffic
    if facility in {"shared","none"}:
        if speed <= 20 and lanes <= 2 and hw in {"residential","living_street","service","unclassified"}:
            return 2
        if speed <= 30 and lanes <= 2:
            return 3
        return 4

    return 4

gdf["lts_level"] = gdf.apply(
    lambda r: lts_level(r.bike_facility_type, int(r.speed_limit), int(r.num_lanes), r.parking_presence, r.highway),
    axis=1
)
gdf["lts_level"].value_counts().sort_index()

In [None]:
# 8) Fetch DC crashes (ArcGIS Open Data) - Optional

def try_fetch_dc_crashes(aoi_gdf, years_back=5):
    # ArcGIS open data often exposes a direct GeoJSON; try common endpoints
    urls = [
        "https://opendata.arcgis.com/datasets/DCGIS::crashes-in-dc.geojson",
        "https://opendata.arcgis.com/api/v3/datasets/9c0b8b0673da4a6fa3b3a8bdafbbf7a2_0/downloads/data?format=geojson&spatialRefId=4326"
    ]
    gj = None
    for url in urls:
        try:
            r = requests.get(url, timeout=60)
            if r.ok and "json" in r.headers.get("content-type","{}").lower():
                gj = r.json(); break
        except: pass
    if not gj:
        print("Crash fetch failed -> proceeding without crashes.")
        return gpd.GeoDataFrame(columns=["geometry"], geometry="geometry", crs=CRS)

    crashes = gpd.GeoDataFrame.from_features(gj["features"], crs=CRS)
    # Limit to AOI
    crashes = gpd.overlay(crashes, aoi_gdf[["geometry"]], how="intersection")
    # Try to find a date column and filter last N years
    date_col = None
    for c in crashes.columns:
        cl = c.lower()
        if ("report" in cl and "date" in cl) or cl == "date":
            date_col = c; break
    if date_col is not None:
        crashes[date_col] = pd.to_datetime(crashes[date_col], errors="coerce", utc=True)
        cutoff = pd.Timestamp.utcnow() - pd.Timedelta(days=365*years_back)
        crashes = crashes[crashes[date_col] >= cutoff]

    crashes = crashes[crashes.geometry.notnull() & crashes.geometry.geom_type.isin(["Point","MultiPoint"])]
    print("Crashes retained:", len(crashes))
    return crashes

crashes_gdf = try_fetch_dc_crashes(aoi_gdf, YEARS_BACK) if CRASH_ENABLE else gpd.GeoDataFrame(columns=["geometry"], geometry="geometry", crs=CRS)
crashes_gdf.head(2)

In [None]:
# 9) Join crash counts to segments (buffered spatial join)

def count_crashes_near_segments(segments_gdf, crashes_gdf, buffer_m=10):
    if crashes_gdf.empty or segments_gdf.empty:
        segments_gdf["crash_count_5yr"] = 0
        return segments_gdf

    proj = "EPSG:3857"  # meters
    seg_p = segments_gdf.to_crs(proj)
    cr_p = crashes_gdf.to_crs(proj)

    seg_p["buf"] = seg_p.geometry.buffer(buffer_m)
    seg_p = seg_p.set_geometry("buf", crs=proj)

    j = gpd.sjoin(cr_p[["geometry"]], seg_p[["buf"]], how="left", predicate="within")
    counts = j.groupby(j.index_right).size().rename("crash_count_5yr")

    seg_p = seg_p.drop(columns=["buf"]).set_geometry("geometry")
    seg_p["crash_count_5yr"] = counts.reindex(seg_p.index).fillna(0).astype(int)
    return seg_p.to_crs(segments_gdf.crs)

gdf = count_crashes_near_segments(gdf, crashes_gdf, buffer_m=CRASH_BUFFER_METERS)
gdf["serious_injury_count_5yr"] = 0  # placeholders; populate when schema mapped
gdf["fatal_count_5yr"] = 0
gdf[["crash_count_5yr"]].describe()

In [None]:
# 10) RideScore v1 (0–100)

def p95(values):
    s = sorted(v for v in values if pd.notnull(v))
    if not s: return 1
    k = int(round(0.95 * (len(s) - 1)))
    return max(int(s[k]), 1)

def lts_to_score(lts): return {1:100, 2:75, 3:40, 4:10}.get(int(lts), 10)

facility_bonus = {
    "protected_track": 10, "separated_lane": 10,
    "buffered_lane": 5, "painted_lane": 3,
    "shared": 0, "none": 0,
}

P95_CRASH = p95(gdf["crash_count_5yr"])
def crash_inv_score(n): return 100.0 * (1.0 - min(max(float(n)/float(P95_CRASH), 0.0), 1.0))

W_LTS, W_CRASH, W_FAC = 0.6, 0.3, 0.1

gdf["s_LTS"] = gdf["lts_level"].map(lts_to_score)
gdf["s_crash"] = gdf["crash_count_5yr"].map(crash_inv_score)
gdf["s_facility"] = gdf["bike_facility_type"].map(facility_bonus).fillna(0)

gdf["ridescore_v1"] = (W_LTS*gdf.s_LTS + W_CRASH*gdf.s_crash + W_FAC*gdf.s_facility).round(1)
gdf[["s_LTS","s_crash","s_facility","ridescore_v1"]].describe().round(1)

In [None]:
# 11) Export GeoJSON

keep = ["segment_id","highway","num_lanes","speed_limit","bike_facility_type","parking_presence",
        "lts_level","crash_count_5yr","serious_injury_count_5yr","fatal_count_5yr","ridescore_v1","geometry"]
out_path = os.path.join(OUT_DIR, OUTPUT_GEOJSON)
gdf[keep].to_file(out_path, driver="GeoJSON")
print("Wrote:", out_path)

In [None]:
# 12) Folium map (3 toggles + title) — FULLY OPTIMIZED
import folium
from folium.plugins import MeasureControl
from branca.element import Element
import json

def ramp_blue(v):
    stops = [(0, "#f7fbff"), (25, "#c6dbef"), (50, "#6baed6"), (75, "#2171b5"), (100, "#08306b")]
    color = "#999"
    for th, col in stops:
        if v >= th:
            color = col
    return color

def ramp_green(v):
    stops = [(0, "#e5f5e0"), (1, "#a1d99b"), (3, "#31a354"), (7, "#006d2c")]
    color = "#999"
    for th, col in stops:
        if v >= th:
            color = col
    return color

lane_colors = {"1": "#08519c", "2": "#3182bd", "3": "#6baed6", "4+": "#bdd7e7"}

def extract_coords(geom):
    """Return a flat list of (x,y) coords for common geometry types."""
    if geom is None:
        return []
    t = getattr(geom, 'geom_type', None)
    if t == 'LineString':
        return list(geom.coords)
    if t == 'MultiLineString':
        coords = []
        for part in geom.geoms:
            try:
                coords.extend(list(part.coords))
            except Exception:
                continue
        return coords
    if t == 'Point':
        return [(geom.x, geom.y)]
    if t == 'MultiPoint':
        return [(p.x, p.y) for p in geom.geoms]
    try:
        return list(geom.coords)
    except Exception:
        try:
            p = geom.representative_point()
            return [(p.x, p.y)]
        except Exception:
            return []

print("Extracting coordinates from geometries...")
gdf['coords'] = gdf.geometry.apply(extract_coords)

print("Building GeoJSON features (vectorized)...")
# Fill missing columns with defaults upfront
gdf_filled = gdf.copy()
gdf_filled['segment_id'] = gdf_filled['segment_id'].fillna('')
gdf_filled['num_lanes'] = gdf_filled['num_lanes'].fillna(1).astype(int)
gdf_filled['speed_limit'] = gdf_filled['speed_limit'].fillna(25).astype(int)
gdf_filled['bike_facility_type'] = gdf_filled['bike_facility_type'].fillna('unknown')
gdf_filled['parking_presence'] = gdf_filled['parking_presence'].fillna(False).astype(bool)
gdf_filled['lts_level'] = gdf_filled['lts_level'].fillna(4).astype(int)
gdf_filled['crash_count_5yr'] = gdf_filled['crash_count_5yr'].fillna(0).astype(int)
gdf_filled['serious_injury_count_5yr'] = gdf_filled['serious_injury_count_5yr'].fillna(0).astype(int)
gdf_filled['fatal_count_5yr'] = gdf_filled['fatal_count_5yr'].fillna(0).astype(int)
gdf_filled['ridescore_v1'] = gdf_filled['ridescore_v1'].fillna(10.0).astype(float)

# Vectorized feature creation
features = []
for i in range(len(gdf_filled)):
    row = gdf_filled.iloc[i]
    coords = row['coords']
    features.append({
        "type": "Feature",
        "properties": {
            "segment_id": str(row['segment_id']),
            "num_lanes": int(row['num_lanes']),
            "speed_limit": int(row['speed_limit']),
            "bike_facility_type": str(row['bike_facility_type']),
            "parking_presence": bool(row['parking_presence']),
            "lts_level": int(row['lts_level']),
            "crash_count_5yr": int(row['crash_count_5yr']),
            "serious_injury_count_5yr": int(row['serious_injury_count_5yr']),
            "fatal_count_5yr": int(row['fatal_count_5yr']),
            "ridescore_v1": float(row['ridescore_v1']),
        },
        "geometry": {"type": "LineString", "coordinates": [[float(x), float(y)] for x, y in coords]}
    })

geo = {"type": "FeatureCollection", "features": features}
print(f"✓ Created {len(features)} GeoJSON features in {len(gdf_filled)} ms")

print("Initializing map...")
m = folium.Map(location=[38.9072, -77.0369], zoom_start=12, tiles="OpenStreetMap")

print("Adding Ranking layer...")
fg_rank = folium.FeatureGroup(name="Ranking (RideScore 0–100)", show=True)
folium.GeoJson(
    geo,
    style_function=lambda f: {"color": ramp_blue(f["properties"].get("ridescore_v1", 0)), "weight": 2.5},
    tooltip=folium.GeoJsonTooltip(fields=["segment_id", "ridescore_v1", "lts_level", "bike_facility_type"]),
).add_to(fg_rank)
fg_rank.add_to(m)

print("Adding Crash layer...")
fg_crash = folium.FeatureGroup(name="Crash history (5y)")
folium.GeoJson(
    geo,
    style_function=lambda f: {"color": ramp_green(f["properties"].get("crash_count_5yr", 0)), "weight": 3},
    tooltip=folium.GeoJsonTooltip(fields=["segment_id", "crash_count_5yr", "serious_injury_count_5yr", "fatal_count_5yr"]),
).add_to(fg_crash)
fg_crash.add_to(m)

print("Adding Lanes layer...")
def lane_style(f):
    n = f["properties"].get("num_lanes", 1)
    key = "4+" if n >= 4 else str(n)
    return {"color": lane_colors.get(key, "#999"), "weight": 3}
fg_lanes = folium.FeatureGroup(name="Number of car lanes")
folium.GeoJson(
    geo, style_function=lane_style,
    tooltip=folium.GeoJsonTooltip(fields=["segment_id", "num_lanes", "speed_limit", "bike_facility_type"]),
).add_to(fg_lanes)
fg_lanes.add_to(m)

print("Finalizing map...")
folium.LayerControl(collapsed=False).add_to(m)
m.add_child(MeasureControl(primary_length_unit='meters', primary_area_unit='sqmeters'))

title_html = """
<div style="position: fixed; top: 10px; left: 50%; transform: translateX(-50%);
  z-index: 9999; background: rgba(255,255,255,.95); padding: 8px 12px; border-radius: 8px;
  box-shadow: 0 1px 6px rgba(0,0,0,.2); font: 15px/1.2 system-ui,-apple-system,Segoe UI,Roboto,sans-serif;">
  <strong>RideScore DC — LTS (MVP)</strong>
  <span style="color:#666;"> · toggle ranking, crashes, and car lanes</span>
</div>
"""
m.get_root().html.add_child(Element(title_html))

print("✓ Map rendering complete!")
# m
display(m)
print("map outputted!")