# Config


In [64]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import geopandas as gpd
from datetime import datetime
from google.transit import gtfs_realtime_pb2
import time
from pathlib import Path
import warnings
import ast

from realtime.vehicles import load_positions_bus
from config import EXTERNAL_DATA_DIR, PROCESSED_DATA_DIR


In [65]:
#Url Dinamic Data
BUS_VEHICLES_URL = "https://ctb-siri.s3.eu-south-2.amazonaws.com/bizkaibus-vehicle-positions.xml"
NS = {"siri": "http://www.siri.org.uk/siri"}

#Location static data
FOLDER = "Bizkaibus_GPKG"
gpkg_path = EXTERNAL_DATA_DIR / FOLDER / "Bizkaibus_GPKG.gpkg"


# Load data

## Get vehicles

In [66]:
vehicles = load_positions_bus(BUS_VEHICLES_URL, NS)
vehicles = gpd.GeoDataFrame(vehicles, geometry=gpd.points_from_xy(vehicles.lon, vehicles.lat), crs="EPSG:4326")
vehicles.head()

Test


Unnamed: 0,vehicle_id,line_id,lat,lon,timestamp,mode,geometry
0,1102,A3411,43.30268,-2.977018,2025-12-01 17:53:56+01:00,bus,POINT (-2.97702 43.30268)
1,1511,A3414,43.267544,-2.933164,2025-12-01 17:53:56+01:00,bus,POINT (-2.93316 43.26754)
2,1512,A3414,43.363712,-3.016427,2025-12-01 17:53:56+01:00,bus,POINT (-3.01643 43.36371)
3,1513,A3414,43.344643,-2.998001,2025-12-01 17:53:56+01:00,bus,POINT (-2.998 43.34464)
4,1515,A3414,43.26553,-2.933868,2025-12-01 17:53:56+01:00,bus,POINT (-2.93387 43.26553)


## Get stations

In [67]:
stops = gpd.read_file(gpkg_path, layer="Geralekuak_Paradas")
stops.head(2)

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry
0,4026,1,ARABA/ÁLAVA,2,AMURRIO,Venta los Aires (Lekamaña),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499127 4762865)
1,4027,1,ARABA/ÁLAVA,2,AMURRIO,Saratxo (eliza/iglesia),"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499120.89 4763439.28)


## Get lines

In [68]:
# List all layer names in the GeoPackage
layers = gpd.list_layers(gpkg_path)["name"]

# Exclude the "Geralekuak_Paradas" layer
layers_to_read = [lyr for lyr in layers if lyr != "Geralekuak_Paradas"]

gdfs = []
for lyr in layers_to_read:
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="Non-conformant content for record",
            category=RuntimeWarning,
            module="pyogrio",
        )
        gdf = gpd.read_file(gpkg_path, layer=lyr)

    gdf["layer_name"] = lyr
    gdfs.append(gdf)


# Combine into one GeoDataFrame
lines = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)

lines
lines.head(2)

Unnamed: 0,CodigoLinea,Sentido,DenominacionLinea,CodigoTipoRuta,DenominacionTipoRuta,DenominacionRuta,Fecha,geometry,layer_name
0,A0651,I,BILBAO - BALMASEDA,2,Desvío,Sopuerta-Balmaseda (Errefortzu Zerbitzua/Refue...,2022-09-12 08:02:52+00:00,"MULTILINESTRING ((487407.7 4791248.09, 487294....",Ibilbideak_Rutas_NoPrincipales_Ida
1,A0652,I,LANESTOSA - BALMASEDA,6,Secundaria,Lanestosa-Trucios Turtzioz-Artzentales-Zalla-B...,2024-03-21 11:44:30+00:00,"MULTILINESTRING ((464375.53 4785378.66, 464395...",Ibilbideak_Rutas_NoPrincipales_Ida


# Data management (Code)

## Clean Stops

In [69]:
print(stops.shape)
# 1️⃣ Parse line IDs
stops["line_id"] = (
    stops["CodificacionRuta"]
    .fillna("")
    .str.split(",")
    .apply(lambda parts: [p.split("_", 1)[0].strip() for p in parts if p.strip()])
)

stops["line_id"] = stops["line_id"].apply(lambda x: list(dict.fromkeys(x)))  # remove duplicates
stops["line_id"] = stops["line_id"].apply(lambda x: [str(v) for v in x])      # ensure strings

# 2️⃣ Explode stops so each stop-line pair is a separate row
stops_exploded = stops.explode("line_id").reset_index(drop=True)
print(stops_exploded.shape)


(2353, 8)
(5251, 9)


In [70]:
stops_exploded

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry,line_id
0,4026,01,ARABA/ÁLAVA,002,AMURRIO,Venta los Aires (Lekamaña),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499127 4762865),A3641
1,4027,01,ARABA/ÁLAVA,002,AMURRIO,Saratxo (eliza/iglesia),"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499120.89 4763439.28),A3641
2,4028,01,ARABA/ÁLAVA,002,AMURRIO,Saratxo (bidegurutzea/cruce),A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499448 4764157),A3641
3,4029,01,ARABA/ÁLAVA,002,AMURRIO,Aldaiturriaga,"A3641_Urduña Orduña-Arrigorriaga,A3641_Urduña ...",POINT (499482 4764877),A3641
4,4030,01,ARABA/ÁLAVA,002,AMURRIO,Alturriaga,A3641_Urduña Orduña-Hospital Galdakao Ospitale...,POINT (499626 4765620),A3641
...,...,...,...,...,...,...,...,...,...
5246,1922,48,BIZKAIA,916,USANSOLO,Ospitalea/Hospital,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514677.26 4785617.14),A3631
5247,1922,48,BIZKAIA,916,USANSOLO,Ospitalea/Hospital,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514677.26 4785617.14),A3917
5248,1922,48,BIZKAIA,916,USANSOLO,Ospitalea/Hospital,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514677.26 4785617.14),A3912
5249,1922,48,BIZKAIA,916,USANSOLO,Ospitalea/Hospital,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (514677.26 4785617.14),A3915


## Clean lines


In [71]:
lines.rename(columns={"CodigoLinea":"line_id"}, inplace=True)
lines["line_id"].dtype

dtype('O')

# Plots

In [72]:
# Plots and visualizations

# Save results

In [73]:
#Save results and figures
FOLDER = FOLDER.split("_GPKG")[0]
(PROCESSED_DATA_DIR / FOLDER).mkdir(parents=True, exist_ok=True)
lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.gpkg", layer="lines")
# stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.gpkg", layer="stops")
stops_exploded.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.gpkg", layer="stops")
#Evaluate files size as geojson
# vehicles.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_vehicles.geojson", driver="GeoJSON")
# lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.geojson", driver="GeoJSON")
# stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.geojson", driver="GeoJSON")