# Config


In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import geopandas as gpd
from datetime import datetime
from google.transit import gtfs_realtime_pb2
import time
from pathlib import Path
import warnings

from realtime.vehicles import load_positions_bus
from config import EXTERNAL_DATA_DIR, PROCESSED_DATA_DIR


In [2]:
#Url Dinamic Data
BUS_VEHICLES_URL = "https://ctb-siri.s3.eu-south-2.amazonaws.com/bizkaibus-vehicle-positions.xml"
NS = {"siri": "http://www.siri.org.uk/siri"}

#Location static data
FOLDER = "Bizkaibus_GPKG"
gpkg_path = EXTERNAL_DATA_DIR / FOLDER / "Bizkaibus_GPKG.gpkg"


# Load data

## Get vehicles

In [10]:
vehicles = load_positions_bus(BUS_VEHICLES_URL, NS)
vehicles = gpd.GeoDataFrame(vehicles, geometry=gpd.points_from_xy(vehicles.lon, vehicles.lat), crs="EPSG:4326")
vehicles

Unnamed: 0,vehicle_id,lat,lon,timestamp,mode,geometry
0,1102,43.283110,-2.966315,2025-12-01 15:12:02+01:00,bus,POINT (-2.96631 43.28311)
1,1511,43.285572,-2.936608,2025-12-01 15:12:02+01:00,bus,POINT (-2.93661 43.28557)
2,1513,43.348885,-3.000950,2025-12-01 15:12:02+01:00,bus,POINT (-3.00095 43.34888)
3,1515,43.264340,-2.934412,2025-12-01 15:12:02+01:00,bus,POINT (-2.93441 43.26434)
4,1516,43.262646,-2.947980,2025-12-01 15:12:02+01:00,bus,POINT (-2.94798 43.26265)
...,...,...,...,...,...,...
242,8669,43.262380,-2.961767,2025-12-01 15:12:02+01:00,bus,POINT (-2.96177 43.26238)
243,8671,43.301727,-2.639483,2025-12-01 15:12:02+01:00,bus,POINT (-2.63948 43.30173)
244,8673,43.232773,-2.901465,2025-12-01 15:12:02+01:00,bus,POINT (-2.90147 43.23277)
245,8674,43.306416,-2.681340,2025-12-01 15:12:02+01:00,bus,POINT (-2.68134 43.30642)


## Get stations

In [6]:
stops = gpd.read_file(gpkg_path, layer="Geralekuak_Paradas")
stops

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry
0,4026,01,ARABA/ÁLAVA,002,AMURRIO,Venta los Aires (Lekamaña),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499127 4762865)
1,4027,01,ARABA/ÁLAVA,002,AMURRIO,Saratxo (eliza/iglesia),"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499120.89 4763439.28)
2,4028,01,ARABA/ÁLAVA,002,AMURRIO,Saratxo (bidegurutzea/cruce),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499448 4764157)
3,4029,01,ARABA/ÁLAVA,002,AMURRIO,Aldaiturriaga,"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499482 4764877)
4,4030,01,ARABA/ÁLAVA,002,AMURRIO,Alturriaga,A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499626 4765620)
...,...,...,...,...,...,...,...,...
2348,2642,48,BIZKAIA,916,USANSOLO,Usansolo,"A3927_Bilbao-Lemoa-Zeanuri (Autopista),A3925_B...",POINT (515094.25 4785116.15)
2349,1832,48,BIZKAIA,916,USANSOLO,Labeaga,A3513_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514592 4785672)
2350,1833,48,BIZKAIA,916,USANSOLO,Ospitalea/Hospital,A3932_Hospital Galdakao Ospitalea-Metro Boluet...,POINT (514690.25 4785588.14)
2351,1834,48,BIZKAIA,916,USANSOLO,Labeaga,A3513_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514632 4785818)


## Get lines

In [7]:
# List all layer names in the GeoPackage
layers = gpd.list_layers(gpkg_path)["name"]

# Exclude the "Geralekuak_Paradas" layer
layers_to_read = [lyr for lyr in layers if lyr != "Geralekuak_Paradas"]

gdfs = []
for lyr in layers_to_read:
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="Non-conformant content for record",
            category=RuntimeWarning,
            module="pyogrio",
        )
        gdf = gpd.read_file(gpkg_path, layer=lyr)

    gdf["layer_name"] = lyr
    gdfs.append(gdf)


# Combine into one GeoDataFrame
lines = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)

lines

Unnamed: 0,CodigoLinea,Sentido,DenominacionLinea,CodigoTipoRuta,DenominacionTipoRuta,DenominacionRuta,Fecha,geometry,layer_name
0,A0651,I,BILBAO - BALMASEDA,2,Desvío,Sopuerta-Balmaseda (Errefortzu Zerbitzua/Refue...,2022-09-12 08:02:52+00:00,"MULTILINESTRING ((487407.7 4791248.09, 487294....",Ibilbideak_Rutas_NoPrincipales_Ida
1,A0652,I,LANESTOSA - BALMASEDA,6,Secundaria,Lanestosa-Trucios Turtzioz-Artzentales-Zalla-B...,2024-03-21 11:44:30+00:00,"MULTILINESTRING ((464375.53 4785378.66, 464395...",Ibilbideak_Rutas_NoPrincipales_Ida
2,A0653,I,TRUCIOS TURTZIOZ - ARTZENTALES,2,Desvío,Trucios Turtzioz-San Miguel-Santa Cruz-Traslaviña,2015-04-09 07:00:05+00:00,"MULTILINESTRING ((479034.76 4792241.76, 479034...",Ibilbideak_Rutas_NoPrincipales_Ida
3,A0654,I,BALMASEDA - Gurutzeta/Cruces - UPV/EHU,3,Prolongación,Balmaseda-Gurutzeta/Cruces-UPV/EHU,2020-03-30 09:53:58+00:00,"MULTILINESTRING ((483923.1 4782284.5, 483931.2...",Ibilbideak_Rutas_NoPrincipales_Ida
4,A0654,I,BALMASEDA - Gurutzeta/Cruces - UPV/EHU,2,Desvío,Balmaseda-Gurutzeta/Cruces (Institutu),2021-11-30 15:36:25+00:00,"MULTILINESTRING ((483923.1 4782284.5, 483931.2...",Ibilbideak_Rutas_NoPrincipales_Ida
...,...,...,...,...,...,...,...,...,...
404,A3928,V,ARTEA - ZEBERIO - UGAO MIRABALLES,1,Principal,Zeberio-Ugao Miraballes,2016-12-30 23:00:00+00:00,"MULTILINESTRING ((507932.41 4780968.93, 507930...",Ibilbideak_Rutas_Principal_Vuelta
405,A3930,V,BILBAO - GALDAKAO (Autopista),1,Principal,Bilbao-Galdakao (Autopista),2022-07-12 09:53:48+00:00,"MULTILINESTRING ((504128.98 4789805.78, 504134...",Ibilbideak_Rutas_Principal_Vuelta
406,A3931,V,GARAI - DURANGO,1,Principal,Garai-Durango,2019-02-06 11:49:42+00:00,"MULTILINESTRING ((529824.51 4779945.15, 529854...",Ibilbideak_Rutas_Principal_Vuelta
407,A3932,V,GALDAKAO - Metro Bolueta,1,Principal,Galdakao-Metro Bolueta,2022-05-03 13:49:43+00:00,"MULTILINESTRING ((512854.17 4786443.14, 512906...",Ibilbideak_Rutas_Principal_Vuelta


# Data management (Code)

## Clean Stations & Stops

In [32]:
stops.describe()

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry
count,2353,2353,2353,2353,2353,2353,2353,2353
unique,2353,4,4,113,121,1392,1116,2353
top,4026,48,BIZKAIA,20,BILBAO,Elexalde,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (499127 4762865)
freq,1,2251,2251,117,117,17,59,1


# Plots

In [3]:
# Plots and visualizations

# Save results

In [11]:
#Save results and figures
FOLDER = FOLDER.split("_GPKG")[0]
(PROCESSED_DATA_DIR / FOLDER).mkdir(parents=True, exist_ok=True)
lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.gpkg", layer="lines")
stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.gpkg", layer="stops")

#Evaluate files size as geojson
vehicles.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_vehicles.geojson", driver="GeoJSON")
lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.geojson", driver="GeoJSON")
stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.geojson", driver="GeoJSON")