In [1]:
import pandas as pd
import requests
import json
import os
import io
import py7zr
import zipfile
import csv
import tempfile
from datetime import datetime
from pathlib import Path
from itertools import islice
from collections import defaultdict
from dotenv import load_dotenv
import shutil
import copy
from google.transit import gtfs_realtime_pb2
from google.protobuf.message import DecodeError
import logging

load_dotenv()

True

In [2]:
KODA_KEY = os.getenv("API_KODA_KEY")
GTFS_RT_KEY = os.getenv("API_GTFS_RT_KEY")  # clé GTFS Regional Realtime

In [3]:
#host_api = "https://api.koda.trafiklab.se/KoDa/api/v2"

#base_history = "gtfs-rt"
#base_reference = "gtfs-static"

#operator = "sl"

#type_of_api = "TripUpdates"

#url_history = host_api + base_history + operator + type_of_api
#url_reference = host_api + base_reference + operator

#call_date = "2025-03-15"

#params = {
 #   "date": call_date, 
 #   "key": KODA_KEY
#}

In [14]:
def call_koda_api(base_url, date, operator = "sl", endpoint=""):
    if endpoint != "":
        api_url = f"https://api.koda.trafiklab.se/KoDa/api/v2/{base_url}/{operator}/{endpoint}"
    else:
        api_url = f"https://api.koda.trafiklab.se/KoDa/api/v2/{base_url}/{operator}"
        
    params = {
        "date": date, 
        "key": KODA_KEY
    }
    
    request = requests.get(f"{api_url}", params=params, timeout=20)
    print(api_url)
    print(request)
    print(len(request.content), "BYTES")

    return request

def call_koda_history_api(date):
    request = call_koda_api("gtfs-rt", date, endpoint="TripUpdates")
    return request

def call_koda_reference_api(date):
    request = call_koda_api("gtfs-static", date)
    return request

In [15]:
r_history = call_koda_history_api("2025-03-15")

https://api.koda.trafiklab.se/KoDa/api/v2/gtfs-rt/sl/TripUpdates
<Response [200]>
41448769 BYTES


In [6]:
r_reference = call_koda_reference_api("2025-03-15")

https://api.koda.trafiklab.se/KoDa/api/v2/gtfs-static/sl
<Response [200]>
52438156 BYTES


In [24]:
def read_koda_history_day(request, items_by_batch=400):
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)s | %(message)s",
        datefmt="%H:%M:%S"
    )

    logger = logging.getLogger("gtfs")

    archive_bytes = io.BytesIO(request.content)
    
    # FICHIER TEMPORAIRES
    tmpdir = tempfile.mkdtemp(prefix="koda_")
    tmp = Path(tmpdir)

    history_entities = []
    bad_files = []

    BATCH = items_by_batch

    feed = gtfs_realtime_pb2.FeedMessage()

    # 1) liste des candidats
    archive_bytes.seek(0)
    with py7zr.SevenZipFile(archive_bytes, mode="r") as z:
        candidates = [n for n in z.getnames() if n.lower().endswith(".pb")]

    print("Nb fichiers .pb:", len(candidates))

    # 2) extraction + parse par batch
    for i in range(0, len(candidates), BATCH):
        batch = candidates[i:i+BATCH]

        logger.info(
            "Batch %d–%d / %d (%.1f%%)",
            i + 1,
            min(i + BATCH, len(candidates)),
            len(candidates),
            100 * (i + len(batch)) / len(candidates)
        )

        try:
            archive_bytes.seek(0)
            with py7zr.SevenZipFile(archive_bytes, mode="r") as z:
                z.extract(path=tmpdir, targets=batch)
        except Exception as e:
            # si un batch plante, on retombe en mode "un par un" juste pour ce batch
            for name in batch:
                try:
                    archive_bytes.seek(0)
                    with py7zr.SevenZipFile(archive_bytes, mode="r") as z:
                        z.extract(path=tmpdir, targets=[name])
                except Exception as e2:
                    bad_files.append((name, f"ExtractError: {e2!r}"))
            continue

        for name in batch:
            p = tmp / name
            try:
                raw = p.read_bytes()
                feed.Clear()
                try:
                    feed.ParseFromString(raw)
                except DecodeError as de:
                    bad_files.append((name, f"DecodeError: {de!r}"))
                    continue

                for entity in feed.entity:
                    history_entities.append(copy.deepcopy(entity))

            except Exception as e:
                bad_files.append((name, f"Read/ParseError: {e!r}"))
            finally:
                p.unlink(missing_ok=True)

    shutil.rmtree(tmpdir, ignore_errors=True)

    print("✅ Total entités enregistrées:", len(history_entities))
    print("⚠️ Fichiers ignorés:", len(bad_files))
    print("Exemples:", bad_files[:5])

    return history_entities, bad_files

In [25]:
history_entities, bad_files = read_koda_history_day(r_history, 500)

20:03:22 | INFO | Batch 1–500 / 6031 (8.3%)


Nb fichiers .pb: 6031


20:03:24 | INFO | Batch 501–1000 / 6031 (16.6%)
20:03:26 | INFO | Batch 1001–1500 / 6031 (24.9%)
20:03:28 | INFO | Batch 1501–2000 / 6031 (33.2%)
20:03:32 | INFO | Batch 2001–2500 / 6031 (41.5%)
20:03:37 | INFO | Batch 2501–3000 / 6031 (49.7%)
20:03:45 | INFO | Batch 3001–3500 / 6031 (58.0%)
20:03:54 | INFO | Batch 3501–4000 / 6031 (66.3%)
20:04:07 | INFO | Batch 4001–4500 / 6031 (74.6%)
20:04:19 | INFO | Batch 4501–5000 / 6031 (82.9%)
20:04:28 | INFO | Batch 5001–5500 / 6031 (91.2%)
20:04:39 | INFO | Batch 5501–6000 / 6031 (99.5%)
20:04:48 | INFO | Batch 6001–6031 / 6031 (100.0%)


✅ Total entités enregistrées: 3113034
⚠️ Fichiers ignorés: 0
Exemples: []


In [26]:
for e in history_entities[:200]:
    print(e)

id: "14010516897425752"
trip_update {
  trip {
    trip_id: "14010000685561305"
    start_date: "20250314"
    schedule_relationship: SCHEDULED
  }
  vehicle {
    id: "9031001003003885"
  }
  stop_time_update {
    stop_sequence: 1
    stop_id: "9022001041441002"
    arrival {
      delay: -131
      time: 1741983709
      uncertainty: 0
    }
    departure {
      delay: 9018
      time: 1741992858
      uncertainty: 0
    }
  }
  timestamp: 1741992858
}

id: "14010516479723124"
trip_update {
  trip {
    trip_id: "14010000668271329"
    start_date: "20250314"
    schedule_relationship: SCHEDULED
  }
  vehicle {
    id: "9031001004302576"
  }
  stop_time_update {
    stop_sequence: 25
    stop_id: "9022001006071004"
    arrival {
      delay: -65
      time: 1741992535
      uncertainty: 0
    }
    departure {
      delay: 9
      time: 1741992609
      uncertainty: 0
    }
  }
  stop_time_update {
    stop_sequence: 26
    stop_id: "9022001006081002"
    arrival {
      delay: -66
