# Config

In [None]:
import requests
import time
from datetime import datetime

from config import DATA_DIR 

In [28]:
URL = "https://ctb-siri.s3.eu-south-2.amazonaws.com/bizkaibus-vehicle-positions.xml"
ns = {"siri": "http://www.siri.org.uk/siri"}


# Load data

In [23]:
# -----------------------------------------------------
# 1. Download XML
# -----------------------------------------------------
response = requests.get(URL)
response.raise_for_status()
xml_content = response.content

In [None]:
# -----------------------------------------------------
# 2. Parse XML
# -----------------------------------------------------
root = ET.fromstring(xml_content)

# Namespace handling (Bizkaibus uses default SIRI namespaces)

data = []

for activity in root.findall(".//siri:VehicleActivity", ns):

    mvj = activity.find(".//siri:MonitoredVehicleJourney", ns)
    if mvj is None:
        continue

    # Coordinates
    loc = mvj.find(".//siri:VehicleLocation", ns)
    if loc is None:
        continue

    lat = loc.find("siri:Latitude", ns)
    lon = loc.find("siri:Longitude", ns)

    if lat is None or lon is None:
        continue

    vehicle_id = mvj.findtext("siri:VehicleRef", default=None, namespaces=ns)
    timestamp = activity.findtext("siri:RecordedAtTime", default=None, namespaces=ns)

    data.append({
        "vehicle_id": vehicle_id,
        "timestamp": timestamp,
        "lat": float(lat.text),
        "lon": float(lon.text)
    })

In [25]:
# 3. Convert to DataFrame
df = pd.DataFrame(data)

# 4. Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.lon, df.lat),
    crs="EPSG:4326"
)

print(gdf.head())
print("\nTotal records:", len(gdf))

  vehicle_id                  timestamp        lat       lon  \
0       1511  2025-11-28T17:02:37+01:00  43.316246 -2.968602   
1       1513  2025-11-28T17:02:37+01:00  43.261780 -2.948374   
2       1515  2025-11-28T17:02:37+01:00  43.331030 -2.970186   
3       1516  2025-11-28T17:02:37+01:00  43.329320 -2.997089   
4       1534  2025-11-28T17:02:37+01:00  43.265137 -2.945476   

                    geometry  
0   POINT (-2.9686 43.31625)  
1  POINT (-2.94837 43.26178)  
2  POINT (-2.97019 43.33103)  
3  POINT (-2.99709 43.32932)  
4  POINT (-2.94548 43.26514)  

Total records: 245


# Visualize


In [26]:
TIME = datetime.now()
hour_minute_second = TIME.strftime("%H-%M-%S")

gdf.to_file(f"{DATA_DIR}/buses-{hour_minute_second}.gpkg")