# Introducción
Explorar el formato de los datos del metro

# Config


In [27]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import geopandas as gpd
from datetime import datetime
from google.transit import gtfs_realtime_pb2
import time
from pathlib import Path
import warnings

from realtime.vehicles import load_positions_bus
from config import EXTERNAL_DATA_DIR, PROCESSED_DATA_DIR


In [23]:
#Url Dinamic Data
BUS_VEHICLES_URL = "https://ctb-siri.s3.eu-south-2.amazonaws.com/bizkaibus-vehicle-positions.xml"
NS = {"siri": "http://www.siri.org.uk/siri"}

#Location static data
FOLDER = "Bizkaibus_GPKG"
gpkg_path = EXTERNAL_DATA_DIR / FOLDER / "Bizkaibus_GPKG.gpkg"


# Load data

## Get vehicles

In [5]:
df_metro = load_positions_bus(BUS_VEHICLES_URL, NS)
df_metro

Unnamed: 0,vehicle_id,lat,lon,timestamp,mode
0,1102,43.271378,-2.941909,2025-12-01 10:51:35+01:00,bus
1,1512,43.271460,-2.931730,2025-12-01 10:51:35+01:00,bus
2,1513,43.363750,-3.016452,2025-12-01 10:51:35+01:00,bus
3,1515,43.264260,-2.934424,2025-12-01 10:51:35+01:00,bus
4,1516,43.262733,-2.947942,2025-12-01 10:51:35+01:00,bus
...,...,...,...,...,...
232,8673,43.396255,-2.698034,2025-12-01 10:51:35+01:00,bus
233,8674,43.334470,-2.676568,2025-12-01 10:51:35+01:00,bus
234,8681,43.313667,-2.675975,2025-12-01 10:51:35+01:00,bus
235,8683,43.317554,-2.675326,2025-12-01 10:51:35+01:00,bus


## Get stations

In [None]:
stops = gpd.read_file(gpkg_path, layer="Geralekuak_Paradas")

## Get lines

In [25]:
# List all layer names in the GeoPackage
layers = gpd.list_layers(gpkg_path)["name"]

# Exclude the "Geralekuak_Paradas" layer
layers_to_read = [lyr for lyr in layers if lyr != "Geralekuak_Paradas"]

gdfs = []
for lyr in layers_to_read:
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="Non-conformant content for record",
            category=RuntimeWarning,
            module="pyogrio",
        )
        gdf = gpd.read_file(gpkg_path, layer=lyr)

    gdf["layer_name"] = lyr
    gdfs.append(gdf)


# Combine into one GeoDataFrame
lines = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=gdfs[0].crs)

# Data management (Code)

## Clean Stations & Stops

In [32]:
stops.describe()

Unnamed: 0,CodigoReducidoParada,CodigoProvincia,DescripcionProvincia,CodigoMunicipio,DescripcionMunicipio,Denominacion,CodificacionRuta,geometry
count,2353,2353,2353,2353,2353,2353,2353,2353
unique,2353,4,4,113,121,1392,1116,2353
top,4026,48,BIZKAIA,20,BILBAO,Elexalde,A3523_Bilbao-Hospital Galdakao Ospitalea-Gerni...,POINT (499127 4762865)
freq,1,2251,2251,117,117,17,59,1


# Plots

In [3]:
# Plots and visualizations

# Save results

In [None]:
#Save results and figures
FOLDER = FOLDER.split("_GPKG")[0]
(PROCESSED_DATA_DIR / FOLDER).mkdir(parents=True, exist_ok=True)
lines.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_lines.gpkg", layer="lines")
stops.to_file(PROCESSED_DATA_DIR / FOLDER / "bizkaibus_stops.gpkg", layer="stops")