In [3]:
import osmium
import csv
from geopy.distance import geodesic


In [2]:

MIN_DISTANCE = 20 # meters for sampling way nodes


In [3]:

class NamedOnlyHandler(osmium.SimpleHandler):
    def __init__(self, outfile):
        super().__init__()
        self.writer = csv.writer(open(outfile, "w", newline=""))
        self.writer.writerow(["lat", "lon", "name"])  # header

    def node(self, n):
        name = n.tags.get("name")
        if name:
            self.writer.writerow([int(n.location.lat*1e6), int(n.location.lon*1e6), name])

    def way(self, w):
        name = w.tags.get("name")
        if name:
            last_point = None
            for n in w.nodes:
                lat, lon = n.lat, n.lon
                if last_point is None or geodesic((last_point[0], last_point[1]), (lat, lon)).meters >= MIN_DISTANCE:
                    self.writer.writerow([int(lat*1e6), int(lon*1e6), name])
                    last_point = (lat, lon)

In [4]:

handler = NamedOnlyHandler("saved_csv/p7.csv")


In [5]:
handler.apply_file("kanpur.osm.pbf", locations=True, idx="sparse_mem_array")


In [None]:
# sort according to latitudes

In [5]:

input_file = "p7.csv"
output_file = "p7_sorted.csv"

rows = []
header = None

# Read the file
with open(input_file, newline='') as f:
    reader = csv.reader(f)
    header = next(reader, None)   # read and skip header row
    for row in reader:
        if not row:
            continue
        lat = int(row[0])   # microdegrees as integer
        lon = int(row[1])
        name = row[2]
        rows.append((lat, lon, name))

# Sort by latitude
rows.sort(key=lambda x: x[0])

# Write back with header
with open(output_file, "w", newline='') as f:
    writer = csv.writer(f)
    if header:
        writer.writerow(header)
    for lat, lon, name in rows:
        writer.writerow([lat, lon, name])

print(f"Sorted {len(rows)} records by latitude → {output_file}")


Sorted 108553 records by latitude → p7_sorted.csv
