In [None]:
import osmium
import geopandas as gpd
from shapely.geometry import LineString
import os
import time

class HighwayGeometryExtractor(osmium.SimpleHandler):
    def __init__(self):
        super().__init__()
        self.highways = []
        self.count = 0
        self.error_count = 0
        self.start_time = time.time()
        self.debug_samples = []

    def way(self, w):
        if 'highway' in w.tags:
            self.count += 1
            if self.count % 10000 == 0:
                elapsed = time.time() - self.start_time
                print(f"Processed {self.count} highways in {elapsed:.2f} seconds")
            if self.count <= 5:
                print(f"Way {w.id}: {len(w.nodes)} nodes, first node: {w.nodes[0].ref}")
            try:
                coords = []
                for n in w.nodes:
                    try:
                        coords.append((n.lon, n.lat))
                    except:
                        pass
                if len(coords) >= 2:
                    way_dict = {
                        'id': w.id,
                        'highway': w.tags.get('highway', ''),
                        'surface': w.tags.get('surface', ''),
                        'name': w.tags.get('name', ''),
                        'geometry': LineString(coords)
                    }
                    self.highways.append(way_dict)
                else:
                    if len(self.debug_samples) < 3:
                        self.debug_samples.append(w.id)
                    self.error_count += 1
            except Exception:
                self.error_count += 1


: 

In [None]:
input_dir = r"C:\Users\Arnell\OneDrive - Food and Agriculture Organization\project_work\p0002_primary_forest_support\raw\roads\osm\osm_regional_250521\europe"
output_dir = r"C:\Users\Arnell\OneDrive - Food and Agriculture Organization\project_work\p0002_primary_forest_support\work_in_progress\roads\osm\osm_regional_250521\europe_gpkg"

os.makedirs(output_dir, exist_ok=True)

for file in os.listdir(input_dir):
    if file.endswith("-latest.osm.pbf"):
        input_path = os.path.join(input_dir, file)
        country_name = file.replace("-latest.osm.pbf", "").replace("-", "_")
        output_path = os.path.join(output_dir, f"{country_name}_highways.gpkg")
        print(f"\nProcessing: {file} -> {output_path}")

        try:
            osm = osmium.io.Reader(input_path)
            idx = osmium.index.create_map("sparse_mem_array")
            lh = osmium.NodeLocationsForWays(idx)
            handler = HighwayGeometryExtractor()
            osmium.apply(osm, lh, handler)

            print(f"Found {len(handler.highways)} highways, Errors: {handler.error_count}")
            if handler.highways:
                gdf = gpd.GeoDataFrame(handler.highways, crs="EPSG:4326")
                gdf.to_file(output_path, driver="GPKG")
                print(f"Saved to {output_path}")
            else:
                print("No valid highways to save.")
        except Exception as e:
            print(f"Error processing {file}: {e}")



Processing: albania-latest.osm.pbf -> C:\Users\Arnell\OneDrive - Food and Agriculture Organization\project_work\p0002_primary_forest_support\work_in_progress\roads\osm\osm_regional_250521\europe_gpkg\albania_highways.gpkg
Way 3192359: 8 nodes, first node: 15492116
Way 3192360: 6 nodes, first node: 15492426
Way 3192369: 16 nodes, first node: 4548140918
Way 3192381: 5 nodes, first node: 4546692293
Way 3192411: 7 nodes, first node: 3524348462
Processed 10000 highways in 1.55 seconds
Processed 20000 highways in 2.65 seconds
Processed 30000 highways in 3.67 seconds
Processed 40000 highways in 4.75 seconds
Processed 50000 highways in 5.80 seconds
Processed 60000 highways in 6.53 seconds
Processed 70000 highways in 7.85 seconds
Processed 80000 highways in 8.87 seconds
Processed 90000 highways in 9.60 seconds
Processed 100000 highways in 10.02 seconds
Processed 110000 highways in 10.62 seconds
Processed 120000 highways in 11.28 seconds
Processed 130000 highways in 12.07 seconds
Processed 1400