In [5]:
import os
import gzip
import zipfile
from io import BytesIO
from datetime import date as dt_date
import tempfile

import requests
import pandas as pd
import py7zr
from dotenv import load_dotenv
from google.transit import gtfs_realtime_pb2


# ----------------------------
# 0) ENV / KEYS
# ----------------------------
load_dotenv()

KODA_KEY = os.getenv("API_KODA_KEY")
GTFS_RT_KEY = os.getenv("API_GTFS_RT_KEY")  # clé GTFS Regional Realtime
GTFS_REGIONAL_STATIC_KEY = os.getenv("GTFS_REGIONAL_STATIC_KEY")  # clé GTFS Regional Static

if not KODA_KEY or not GTFS_RT_KEY or not GTFS_REGIONAL_STATIC_KEY:
    raise RuntimeError(
        "Clés manquantes. Vérifie ton .env:\n"
        "KODA_KEY=...\n"
        "GTFS_RT_KEY=...\n"
        "GTFS_REGIONAL_STATIC_KEY=...\n"
    )


# ----------------------------
# 1) Helpers: decode / parse
# ----------------------------
def _maybe_decompress_7z_to_pb(raw: bytes) -> bytes:
    """Si raw est un 7z, extrait un .pb et renvoie ses bytes (compatible anciennes versions py7zr)."""
    if raw[:2] != b"7z":
        return raw

    with tempfile.TemporaryDirectory() as tmpdir:
        with py7zr.SevenZipFile(BytesIO(raw), mode="r") as z:
            z.extractall(path=tmpdir)

        # Cherche un .pb extrait
        pb_path = None
        for root, _, files in os.walk(tmpdir):
            for fn in files:
                if fn.endswith(".pb"):
                    pb_path = os.path.join(root, fn)
                    break
            if pb_path:
                break

        if not pb_path:
            raise ValueError("Archive 7z reçue, mais aucun fichier .pb trouvé après extraction.")

        with open(pb_path, "rb") as f:
            return f.read()


def _maybe_decompress_gzip(raw: bytes) -> bytes:
    """Si raw est gzip, décompresse."""
    if len(raw) >= 2 and raw[:2] == b"\x1f\x8b":
        return gzip.decompress(raw)
    return raw


def parse_gtfs_rt_any(content: bytes) -> gtfs_realtime_pb2.FeedMessage:
    """
    Parse GTFS-RT protobuf depuis:
    - 7z (application/x-7z-compressed) contenant un .pb
    - gzip
    - brut
    """
    raw = content
    raw = _maybe_decompress_7z_to_pb(raw)
    raw = _maybe_decompress_gzip(raw)

    feed = gtfs_realtime_pb2.FeedMessage()
    try:
        feed.ParseFromString(raw)
    except Exception as e:
        # Diagnostic utile
        snippet = raw[:32]
        raise ValueError(
            "Impossible de parser GTFS-RT. Le contenu n'est peut-être pas du protobuf.\n"
            f"Début bytes: {snippet}\n"
            f"Erreur: {repr(e)}"
        )
    return feed


def build_trip_to_route_short_name_from_static_zip(zip_bytes: bytes) -> pd.DataFrame:
    """
    Construit mapping trip_id -> route_short_name (+ route_id, direction_id)
    depuis un ZIP GTFS static contenant trips.txt et routes.txt.
    """
    with zipfile.ZipFile(BytesIO(zip_bytes)) as z:
        if "trips.txt" not in z.namelist() or "routes.txt" not in z.namelist():
            raise ValueError(f"ZIP static GTFS ne contient pas trips.txt/routes.txt. Contenu: {z.namelist()[:20]}")

        trips = pd.read_csv(z.open("trips.txt"), dtype=str)
        routes = pd.read_csv(z.open("routes.txt"), dtype=str)

    mapping = trips[["trip_id", "route_id", "direction_id"]].merge(
        routes[["route_id", "route_short_name", "route_long_name"]],
        on="route_id",
        how="left",
    )

    return mapping.set_index("trip_id")


def feed_tripupdates_to_df(feed: gtfs_realtime_pb2.FeedMessage, trip_map: pd.DataFrame) -> pd.DataFrame:
    """
    Une ligne par trip_update, enrichie avec route_short_name via trip_map.
    (Tu pourras ensuite éclater par stop_time_update si tu veux du retard par arrêt.)
    """
    rows = []
    for ent in feed.entity:
        if not ent.HasField("trip_update"):
            continue

        tu = ent.trip_update
        tid = tu.trip.trip_id
        start_date = tu.trip.start_date if tu.trip.start_date else None

        if tid in trip_map.index:
            m = trip_map.loc[tid]
            route_id = m.get("route_id")
            route_short_name = m.get("route_short_name")
            direction_id = m.get("direction_id")
        else:
            route_id = None
            route_short_name = None
            direction_id = None

        rows.append(
            {
                "trip_id": tid,
                "start_date": start_date,
                "route_id": route_id,
                "route_short_name": route_short_name,
                "direction_id": direction_id,
                "n_stop_updates": len(tu.stop_time_update),
            }
        )

    return pd.DataFrame(rows)


# ----------------------------
# 2) KODA: historique du jour
# ----------------------------
def koda_day_with_route_short_name(operator: str, day: str) -> pd.DataFrame:
    # 1) KoDa TripUpdates du jour (peut être 7z/gzip/brut)
    url_rt = f"https://api.koda.trafiklab.se/KoDa/api/v2/gtfs-rt/{operator}/TripUpdates"
    r_rt = requests.get(url_rt, params={"date": day, "key": KODA_KEY}, timeout=120)
    r_rt.raise_for_status()

    # Debug optionnel:
    # print("KoDa RT Content-Type:", r_rt.headers.get("Content-Type"))
    feed = parse_gtfs_rt_any(r_rt.content)

    # 2) KoDa static du jour (ZIP GTFS)
    url_static = f"https://api.koda.trafiklab.se/KoDa/api/v2/gtfs-static/{operator}"
    r_static = requests.get(url_static, params={"date": day, "key": KODA_KEY}, timeout=180)
    r_static.raise_for_status()

    trip_map = build_trip_to_route_short_name_from_static_zip(r_static.content)

    # 3) DataFrame enrichi
    df = feed_tripupdates_to_df(feed, trip_map)
    df["source"] = "koda"
    df["day"] = day
    return df


# ----------------------------
# 3) Realtime: live du moment
# ----------------------------
def realtime_now_with_route_short_name(operator: str) -> pd.DataFrame:
    # 1) GTFS-RT live (souvent gzip/brut)
    url_rt = f"https://opendata.samtrafiken.se/gtfs-rt/{operator}/TripUpdates.pb"
    r_rt = requests.get(url_rt, params={"key": GTFS_RT_KEY}, timeout=60)
    r_rt.raise_for_status()

    feed = parse_gtfs_rt_any(r_rt.content)

    # 2) Static courant (ZIP)
    url_static = f"https://opendata.samtrafiken.se/gtfs/{operator}/{operator}.zip"
    r_static = requests.get(url_static, params={"key": GTFS_REGIONAL_STATIC_KEY}, timeout=240)
    r_static.raise_for_status()

    trip_map = build_trip_to_route_short_name_from_static_zip(r_static.content)

    # 3) DataFrame enrichi
    df = feed_tripupdates_to_df(feed, trip_map)
    df["source"] = "realtime"
    df["day"] = str(dt_date.today())
    return df


# ----------------------------
# 4) DEMO / EXÉCUTION
# ----------------------------
operator = "sl"
day = "2025-09-04"

df_koda = koda_day_with_route_short_name(operator, day)
print("KoDa df shape:", df_koda.shape)
print(df_koda.head(10))

df_rt = realtime_now_with_route_short_name(operator)
print("Realtime df shape:", df_rt.shape)
print(df_rt.head(10))

print("KoDa route_short_name sample:", sorted(df_koda["route_short_name"].dropna().unique()))
print("Realtime route_short_name sample:", sorted(df_rt["route_short_name"].dropna().unique()))

# Exemple : filtrer sur une ligne (numéro public)
TARGET_LINE = "1"
print("KoDa TARGET_LINE:", TARGET_LINE, (df_koda["route_short_name"] == TARGET_LINE).sum())
print("RT   TARGET_LINE:", TARGET_LINE, (df_rt["route_short_name"] == TARGET_LINE).sum())


KoDa df shape: (53, 8)
             trip_id start_date          route_id route_short_name  \
0                      20250904              None             None   
1  14010000675625909   20250904  9011001059200000              592   
2  14010000670441697   20250904  9011001059100000              591   
3  14010000698869168   20250904  9011001079100000              791   
4  14010000698124406   20250904  9011001089300000              893   
5  14010000670441768   20250904  9011001059100000              591   
6  14010000673374206   20250904  9011001059300000              593   
7  14010000699240035   20250904  9011001089200000              892   
8  14010000695935850   20250904  9011001079100000              791   
9  14010000675701096   20250904  9011001059800000              598   

  direction_id  n_stop_updates source         day  
0         None              19   koda  2025-09-04  
1            1              10   koda  2025-09-04  
2            0               7   koda  2025-09-04 