In [1]:
import os
import time
import json
import pickle
import osmnx as ox
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [1]:
# mode = "medium"
# GRAPH_FILE = f"delhi_{mode}_drive.graphml"
# NODE_OUT_FILE = f"node_{mode}.json"
# ROAD_OUT_FILE = f"road_{mode}.json"

GRAPH_FILE = "vv.graphml"
NODE_OUT_FILE = "node_vv.json"
ROAD_OUT_FILE = "rode_vv.json"

In [4]:
import os
import json
import time
import random
import osmnx as ox
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderUnavailable, GeocoderTimedOut


print("Loading graph...")
G = ox.load_graphml(GRAPH_FILE)
print("Graph loaded:", len(G.nodes), "nodes,", len(G.edges), "edges")

# Load existing node.json if present (resume)
if os.path.exists(NODE_OUT_FILE):
    with open(NODE_OUT_FILE, "r", encoding="utf-8") as f:
        try:
            node_to_address = json.load(f)
        except json.JSONDecodeError:
            node_to_address = {}
else:
    node_to_address = {}

print("Already saved nodes:", len(node_to_address))

# Geocoder with a larger timeout
geolocator = Nominatim(user_agent="delhi-router", timeout=10)

def reverse_geocode_node(lat, lon, max_tries=5):
    """
    Reverse geocode with retries + exponential backoff.
    Returns address string or None.
    """
    delay = 2.0
    for attempt in range(max_tries):
        try:
            loc = geolocator.reverse((lat, lon), language="en")
            if loc and loc.address:
                return loc.address
            return None
        except (GeocoderTimedOut, GeocoderUnavailable):
            # backoff and retry
            time.sleep(delay + random.uniform(0, 0.5))
            delay *= 2
        except Exception:
            # unknown error, don't crash whole run
            time.sleep(delay)
            delay *= 2
    return None


# Iterate nodes one by one, save after each new mapping
all_nodes = list(G.nodes())
total = len(all_nodes)

for i, node_id in enumerate(all_nodes, start=1):
    key = str(node_id)

    # skip if already saved
    if key in node_to_address:
        if i % 500 == 0:
            print(f"Progress {i}/{total} (skipping saved)")
        continue

    lat = G.nodes[node_id]["y"]
    lon = G.nodes[node_id]["x"]

    addr = reverse_geocode_node(lat, lon)

    if addr is None:
        addr = f"{lat}, {lon}"  # fallback

    node_to_address[key] = addr

    # save immediately (atomic-ish safe write)
    tmp_file = NODE_OUT_FILE + ".tmp"
    with open(tmp_file, "w", encoding="utf-8") as f:
        json.dump(node_to_address, f, ensure_ascii=False, indent=2)
    os.replace(tmp_file, NODE_OUT_FILE)

    # polite delay so Nominatim doesn't block you
    time.sleep(1.0 + random.uniform(0, 0.3))

    if i % 100 == 0:
        print(f"Saved {len(node_to_address)} nodes so far... ({i}/{total})")

print("Done. Total saved nodes:", len(node_to_address))
print("File:", NODE_OUT_FILE)

Loading graph...
Graph loaded: 991 nodes, 1324 edges
Already saved nodes: 441
Saved 517 nodes so far... (100/991)
Saved 747 nodes so far... (500/991)
Saved 858 nodes so far... (700/991)
Done. Total saved nodes: 991
File: node_vv.json


In [5]:
import json
import osmnx as ox

print("Loading graph...")
G = ox.load_graphml(GRAPH_FILE)
print("Graph loaded:", len(G.nodes), "nodes,", len(G.edges), "edges")

print("Building road name -> OSM ID mapping...")
road_to_osmids = {}

def normalize_name(name):
    if name is None:
        return None
    if isinstance(name, list):
        return str(name[0]) if name else None
    return str(name)

def normalize_osmids(osmid):
    if osmid is None:
        return []
    if isinstance(osmid, list):
        flat = []
        for x in osmid:
            if isinstance(x, list):
                flat.extend(x)
            else:
                flat.append(x)
        return flat
    return [osmid]


unnamed_counter = 0

for u, v, k, data in G.edges(keys=True, data=True):
    road_name = normalize_name(data.get("name"))

    # If no name exists â†’ create an artificial one
    if not road_name:
        unnamed_counter += 1
        highway = data.get("highway", "road")
        road_name = f"unnamed_{highway}_{unnamed_counter}"

    osm_list = normalize_osmids(data.get("osmid"))

    if road_name not in road_to_osmids:
        road_to_osmids[road_name] = set()

    for oid in osm_list:
        try:
            if oid is not None:
                road_to_osmids[road_name].add(int(oid))
        except:
            pass


# Convert sets to sorted lists
road_to_osmids = {name: sorted(ids) for name, ids in road_to_osmids.items()}

with open(ROAD_OUT_FILE, "w", encoding="utf-8") as f:
    json.dump(road_to_osmids, f, ensure_ascii=False, indent=2)

print(f"Saved ALL roads (named + unnamed) to {ROAD_OUT_FILE}")
print("Total roads:", len(road_to_osmids))


Loading graph...
Graph loaded: 991 nodes, 1324 edges
Building road name -> OSM ID mapping...
Saved ALL roads (named + unnamed) to rode_vv.json
Total roads: 333


In [15]:
# import json

# with open("node_small.json", "r", encoding="utf-8") as f:
#     node_map = json.load(f)

# with open("road_small.json", "r", encoding="utf-8") as f:
#     road_map = json.load(f)

# # example:
# print(node_map["58056528"])
# print(road_map["Nelson Mandela Marg"])
