In [1]:
import os
import osmnx as ox
import networkx as nx
import pandas as pd
from osmnx.io import save_graph_geopackage

In [2]:
def get_map_osm(
    place="",
    location=(-3.7327, -38.5270),
    radius=500.0,
    savedir = "../data/",
    city=""
):
    """
    Download and process a street network from OpenStreetMap, extracting 
    attributes for traffic modeling such as speed and capacity.

    Parameters
    ----------
    place : str, optional
        Place name string to define area (e.g., "San Francisco, CA").
    location : tuple of float, optional
        Latitude and longitude center point, used if `place` is empty.
    radius : float, optional
        Distance (in meters) for area radius if using location.
    savedir : str, optional
        Output path .

        If provided, saves the graph to a GeoPackage in 'data/{city}.gpkg'.

    Returns
    -------
    edges_df : pandas.DataFrame
        DataFrame with edge info, including speed and capacity estimates.
    nodes_df : pandas.DataFrame
        DataFrame with node info (lat/lon and original node IDs).
    """
    if not city:
        print("⚠️ Warning: No city name provided. using 'city'")
        city = "city"
    if not savedir:
        print("⚠️ Warning: No savedir name provided. using 'data'")
        savedir = "data"

    nfile = f"{savedir}/{city}_nodes.csv"
    efile = f"{savedir}/{city}_edges.csv"
    gfile = f"{savedir}/{city}.gpkg"

    os.makedirs(savedir, exist_ok=True)
    # Download the graph
    if place:
        G = ox.graph_from_place(place, network_type="drive", simplify=True)
    else:
        G = ox.graph_from_point(location, dist=radius, network_type="drive", simplify=True)

    print(f"\nNumber of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")
    print(f"Is directed: {nx.is_directed(G)}")

    # Node data
    nodes = list(G.nodes(data=True))
    nodes_df = pd.DataFrame({
        'idx': range(1, len(nodes) + 1),
        'osm_idx': [n for n, _ in nodes],
        'lat': [data['y'] for _, data in nodes],
        'lon': [data['x'] for _, data in nodes]
    })
    node_id_map = {n: idx for idx, (n, _) in enumerate(nodes, start=1)}

    # Create output directories if they don't exist

    nodes_df.to_csv(nfile, index=False)

    # Default speeds for highway types (in km/h)
    default_speeds = {
        "motorway": 100,
        "trunk": 80,
        "primary": 60,
        "secondary": 50,
        "tertiary": 40,
        "residential": 30,
        "service": 20
    }

    default_capacities = {
        "motorway": 100,
        "trunk": 80,
        "primary": 60,
        "secondary": 50,
        "tertiary": 40,
        "residential": 30,
        "service": 20
    }

    # Extract edge data
    edge_data = []
    seen = set()
    for u, v, data in G.edges(data=True):
        key = (u, v)
        if key in seen:
            continue
        seen.add(key)

        length = data.get("length", 0)
        highway = data.get("highway", None)
        maxspeed = data.get("maxspeed", None)
        lanes = data.get("lanes", None)
        oneway = data.get("oneway", False)

        # Normalize maxspeed
        if isinstance(maxspeed, list):
            maxspeed = maxspeed[0]
        try:
            maxspeed_val = int(str(maxspeed).split()[0])
        except (ValueError, TypeError):
            maxspeed_val = None

        # Estimate speed
        if maxspeed_val:
            speed_kph = maxspeed_val
        elif isinstance(highway, list):
            speed_kph = default_speeds.get(highway[0], 30)
        else:
            speed_kph = default_speeds.get(highway, 30)

        # Normalize lanes
        if isinstance(lanes, list):
            lanes = lanes[0]
        try:
            lanes_val = int(str(lanes))
        except (ValueError, TypeError):
            lanes_val = 1  # default

        # Estimate volume capacity (vehicles per hour)
        capacity_vph = lanes_val * 1800

        if speed_kph > 0:
            free_flow_time = length / (speed_kph * 1000 / 3600)
        else:
            free_flow_time = None  # fallback in case speed_kph is zero
    
        edge_data.append({
            "src": node_id_map[u],
            "dst": node_id_map[v],
            "osm_src": u,
            "osm_dst": v,            
            "length": length,
            "highway": highway,
            "maxspeed": maxspeed_val,
            "lanes": lanes_val,
            "oneway": oneway,
            "speed_kph": speed_kph,
            "free_flow_time_s": free_flow_time,
            "capacity_vph": capacity_vph
        })

    edges_df = pd.DataFrame(edge_data)
    edges_df.to_csv(efile, index=False)

    # Optionally save graph as GeoPackage
    if city:
        os.makedirs("data", exist_ok=True)
        save_graph_geopackage(G, filepath=gfile)

    return edges_df, nodes_df

In [26]:
edges, nodes = get_map_osm(
    place="San Francisco, California, USA",
    savedir="../data/",
    city="sanfrancisco"
)


Number of nodes: 10010
Number of edges: 27591
Is directed: True


In [27]:
edges, nodes = get_map_osm(
    place="Boston, Massachusetts, USA",
    savedir="../data/",
    city="boston"
)


Number of nodes: 11330
Number of edges: 25942
Is directed: True


In [28]:
edges, nodes = get_map_osm(
    place="Manhattan, New York, USA",
    savedir="../data/",
    city="manhattan"
)


Number of nodes: 4607
Number of edges: 9888
Is directed: True


In [4]:
bay_area_places = [
    "San Francisco, California, USA",
    "Oakland, California, USA",
    "Berkeley, California, USA",
    "San Jose, California, USA",
    "Fremont, California, USA",
    "Palo Alto, California, USA",
    "Mountain View, California, USA"
]

edges, nodes = get_map_osm(
    place=bay_area_places,
    savedir="../data/",
    city="bay_area"
)


Number of nodes: 20598
Number of edges: 55644
Is directed: True
