# Config


In [20]:
import geopandas as gpd
from shapely.geometry import LineString
from pathlib import Path
import pandas as pd
from config import RAW_DATA_DIR as INPUT
from config import PROCESSED_DATA_DIR as OUTPUT


# Load data

In [21]:
# 1️⃣ Read all GPKG files in order
files = sorted(INPUT.glob("bizkaibus_*.gpkg"))

gdfs = []
for f in files:
    gdf = gpd.read_file(f)
    # Keep only essential columns
    try:
        gdf = gdf[["vehicle_ref", "journey_ref", "stop_ref", "recorded_at", "geometry"]]
        gdfs.append(gdf)
    except:
        print(f"Problem with {f}")





Problem with /home/lliebsch/Escritorio/bizkaia_od/data/raw/bizkaibus_trajectories.gpkg


  return ogr_read(


In [22]:
# 2️⃣ Combine all snapshots
all_vehicles = pd.concat(gdfs, ignore_index=True)
all_vehicles["recorded_at"] = pd.to_datetime(all_vehicles["recorded_at"])

# 3️⃣ Optional: sort by vehicle and time
all_vehicles = all_vehicles.sort_values(["vehicle_ref", "recorded_at"])

# 4️⃣ Convert back to GeoDataFrame
all_vehicles_gdf = gpd.GeoDataFrame(all_vehicles, geometry="geometry", crs="EPSG:4326")


In [23]:
all_vehicles_gdf

Unnamed: 0,vehicle_ref,journey_ref,stop_ref,recorded_at,geometry
0,1101,trp_A3411_806_OP9VIN_62700,4160,2025-11-28 17:20:36+01:00,POINT (-3.00879 43.3773)
249,1101,trp_A3411_806_OP9VIN_62700,4160,2025-11-28 17:22:52+01:00,POINT (-3.00878 43.37729)
498,1101,trp_A3411_806_OP9VIN_62700,4160,2025-11-28 17:25:06+01:00,POINT (-3.00878 43.37731)
748,1101,trp_A3411_806_OP9VIN_62700,4163,2025-11-28 17:27:24+01:00,POINT (-3.01617 43.36856)
999,1101,trp_A3411_806_OP9VIN_62700,93,2025-11-28 17:29:40+01:00,POINT (-3.01606 43.36161)
...,...,...,...,...,...
2744,8683,trp_A3532_1303_OP44VIN_61200,1866,2025-11-28 17:43:11+01:00,POINT (-2.62897 43.39182)
2992,8683,trp_A3532_1303_OP44VIN_61200,1867,2025-11-28 17:45:26+01:00,POINT (-2.62161 43.38496)
3241,8683,trp_A3532_1303_OP44VIN_61200,1870,2025-11-28 17:47:41+01:00,POINT (-2.59244 43.38012)
3489,8683,trp_A3532_1303_OP44VIN_61200,1870,2025-11-28 17:49:58+01:00,POINT (-2.58672 43.38078)


In [19]:
# 5️⃣ Create trajectories per vehicle (LineString)
trajectories = []
for vehicle, group in all_vehicles_gdf.groupby("vehicle_ref"):
    # Only keep vehicles with at least 2 points
    if len(group) >= 2:
        line = LineString(group.geometry.tolist())
        traj = {
            "vehicle_ref": vehicle,
            "journey_ref": group["journey_ref"].iloc[0],
            "stop_ref": group["stop_ref"].iloc[0],
            "start_time": group["recorded_at"].min(),
            "end_time": group["recorded_at"].max(),
            "geometry": line
        }
        trajectories.append(traj)

traj_gdf = gpd.GeoDataFrame(trajectories, geometry="geometry", crs="EPSG:4326")

# 6️⃣ Save trajectories to a single GPKG
traj_gdf.to_file(OUTPUT / "bizkaibus_trajectories.gpkg", driver="GPKG")

print(f"Created {len(traj_gdf)} vehicle trajectories")

Created 260 vehicle trajectories
