# Config


In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import geopandas as gpd
from datetime import datetime
from google.transit import gtfs_realtime_pb2
import time
from pathlib import Path
import warnings

from realtime.vehicles import load_positions_bus
from config import EXTERNAL_DATA_DIR, PROCESSED_DATA_DIR


In [2]:
#Url Dinamic Data
BUS_VEHICLES_URL = "https://ctb-siri.s3.eu-south-2.amazonaws.com/bizkaibus-vehicle-positions.xml"
NS = {"siri": "http://www.siri.org.uk/siri"}

#Location static data
FOLDER = "Bizkaibus_GPKG"
gpkg_path = EXTERNAL_DATA_DIR / FOLDER / "Bizkaibus_GPKG.gpkg"


# Load data

## Get vehicles

In [3]:
vehicles = load_positions_bus(BUS_VEHICLES_URL, NS)
vehicles = gpd.GeoDataFrame(vehicles, geometry=gpd.points_from_xy(vehicles.lon, vehicles.lat), crs="EPSG:4326")
vehicles.head()

Test


Unnamed: 0,vehicle_id,line_id,lat,lon,timestamp,mode,geometry
0,1102,A3411,43.368668,-3.01607,2025-12-01 16:31:03+01:00,bus,POINT (-3.01607 43.36867)
1,1511,A3414,43.363857,-3.016624,2025-12-01 16:31:03+01:00,bus,POINT (-3.01662 43.36386)
2,1512,A3414,43.264194,-2.934375,2025-12-01 16:31:03+01:00,bus,POINT (-2.93438 43.26419)
3,1513,A3414,43.328445,-2.992316,2025-12-01 16:31:03+01:00,bus,POINT (-2.99232 43.32844)
4,1515,A3414,43.351044,-3.003369,2025-12-01 16:31:03+01:00,bus,POINT (-3.00337 43.35104)


## Get stations

In [15]:
stops = gpd.read_file(gpkg_path, layer="Geralekuak_Paradas")
stops.head(2)

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry
0,4026,1,ARABA/ÁLAVA,2,AMURRIO,Venta los Aires (Lekamaña),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499127 4762865)
1,4027,1,ARABA/ÁLAVA,2,AMURRIO,Saratxo (eliza/iglesia),"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499120.89 4763439.28)


## Get lines

In [5]:
# List all layer names in the GeoPackage
layers = gpd.list_layers(gpkg_path)["name"]

# Exclude the "Geralekuak_Paradas" layer
layers_to_read = [lyr for lyr in layers if lyr != "Geralekuak_Paradas"]

gdfs = []
for lyr in layers_to_read:
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="Non-conformant content for record",
            category=RuntimeWarning,
            module="pyogrio",
        )
        gdf = gpd.read_file(gpkg_path, layer=lyr)

    gdf["layer_name"] = lyr
    gdfs.append(gdf)


# Combine into one GeoDataFrame
lines = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)

lines
lines.head(2)

Unnamed: 0,CodigoLinea,Sentido,DenominacionLinea,CodigoTipoRuta,DenominacionTipoRuta,DenominacionRuta,Fecha,geometry,layer_name
0,A0651,I,BILBAO - BALMASEDA,2,Desvío,Sopuerta-Balmaseda (Errefortzu Zerbitzua/Refue...,2022-09-12 08:02:52+00:00,"MULTILINESTRING ((487407.7 4791248.09, 487294....",Ibilbideak_Rutas_NoPrincipales_Ida
1,A0652,I,LANESTOSA - BALMASEDA,6,Secundaria,Lanestosa-Trucios Turtzioz-Artzentales-Zalla-B...,2024-03-21 11:44:30+00:00,"MULTILINESTRING ((464375.53 4785378.66, 464395...",Ibilbideak_Rutas_NoPrincipales_Ida


# Data management (Code)

## Clean Stops

In [19]:

stops[["Denominacion","CodificacionRuta"]].describe()
stops["line_id"] = (
    stops["CodificacionRuta"]
    .fillna("")
    .str.split(",")
    .apply(lambda parts: [p.split("_", 1)[0].strip() for p in parts if p.strip()])
)

stops["line_id"] = stops["line_id"].apply(lambda x: list(dict.fromkeys(x)))


## Clean lines


In [22]:
lines.rename(columns={"CodigoLinea":"line_id"}, inplace=True)

# Plots

In [13]:
# Plots and visualizations

# Save results

In [23]:
#Save results and figures
FOLDER = FOLDER.split("_GPKG")[0]
(PROCESSED_DATA_DIR / FOLDER).mkdir(parents=True, exist_ok=True)
lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.gpkg", layer="lines")
stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.gpkg", layer="stops")

#Evaluate files size as geojson
# vehicles.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_vehicles.geojson", driver="GeoJSON")
# lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.geojson", driver="GeoJSON")
# stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.geojson", driver="GeoJSON")