In [None]:
import csv
import io
import json
import os
import tempfile
import zipfile
from collections import defaultdict
from datetime import datetime
from itertools import islice
from pathlib import Path

import pandas as pd
import py7zr
import requests
from dotenv import load_dotenv

# If you use GTFS Realtime protobufs, uncomment:
# from google.transit import gtfs_realtime_pb2

load_dotenv()

GTFS_REGIONAL_RT_KEY = os.getenv("GTFS_REGIONAL_RT_KEY")  # clé GTFS Regional Realtime
GTFS_REGIONAL_STATIC_KEY = os.getenv("GTFS_REGIONAL_STATIC_KEY")  # clé GTFS Regional Static

In [None]:
import requests
import gzip
from google.transit import gtfs_realtime_pb2

# Exemple : à adapter à ton cas
url_rt = "https://exemple.com/gtfs-rt"
params_rt = {
    # "key": "value"
}

# Requête HTTP
request_rt = requests.get(url_rt, params=params_rt, timeout=20)
request_rt.raise_for_status()

# Contenu brut
raw = request_rt.content

# Décompresse seulement si c'est vraiment du gzip (magic bytes 1f 8b)
if raw[:2] == b"\x1f\x8b":
    raw = gzip.decompress(raw)

# Parsing GTFS-Realtime
feed_rt = gtfs_realtime_pb2.FeedMessage()
feed_rt.ParseFromString(raw)

print("Entities:", len(feed_rt.entity))


In [None]:
import pandas as pd

rows = []

for e in feed_rt.entity:
    # On ne garde que les TripUpdate
    if not e.HasField("trip_update"):
        continue

    tu = e.trip_update
    tr = tu.trip

    for stu in tu.stop_time_update:
        row = {
            "entity_id": e.id,
            "trip_id": tr.trip_id,
            "start_date": tr.start_date,
            "schedule_relationship": tr.schedule_relationship,  # à vérifier côté données
            "vehicle_id": tu.vehicle.id if tu.HasField("vehicle") else None,

            "stop_sequence": stu.stop_sequence,
            "stop_id": stu.stop_id,

            "stop_arrival_delay": stu.arrival.delay if stu.HasField("arrival") else None,
            "stop_arrival_time": stu.arrival.time if stu.HasField("arrival") else None,
            "stop_uncertainty": stu.arrival.uncertainty if stu.HasField("arrival") else None,

            "departure_delay": stu.departure.delay if stu.HasField("departure") else None,
            "departure_time": stu.departure.time if stu.HasField("departure") else None,
            "departure_uncertainty": stu.departure.uncertainty if stu.HasField("departure") else None,

            "timestamp": tu.timestamp if tu.timestamp else None
        }

        rows.append(row)

df_rt = pd.DataFrame(rows)


In [None]:
import io
import zipfile

# OUVERTURE DU ZIP POUR LES .TXT
archive_bytes = io.BytesIO(request_static.content)

with zipfile.ZipFile(archive_bytes, "r") as z:
    names = z.namelist()

print("Total entrées:", len(names))
print("Exemples:", names[:10])


In [None]:
# Harmonisation des types
df_rt["trip_id"] = df_rt["trip_id"].astype("string")
df_trips["trip_id"] = df_trips["trip_id"].astype("string")

# Jointure GTFS-RT ↔ GTFS static sur trip_id
df_merge_on_trip = df_rt.merge(
    df_trips,
    on="trip_id",
    how="left"
)

print(df_merge_on_trip.head())


In [None]:
# Jointure avec routes.txt
df_merge_on_route = df_merge_on_trip.merge(
    df_routes,
    on="route_id",
    how="left"
)

print(df_merge_on_route.head())


In [None]:
df_merge_on_route.groupby("route_short_name").count().sort_values(by="entity_id", ascending=False)


In [None]:
df_bus = df_merge_on_route[df_merge_on_route["route_type"] == 700]
df_bus

In [None]:
import io
import zipfile
import pandas as pd

archive_bytes = io.BytesIO(request_static.content)

with zipfile.ZipFile(archive_bytes, "r") as z:
    with z.open("stops.txt") as f:
        df_stops = pd.read_csv(f)

print(df_stops.head(20))


In [None]:
# Harmonisation des types pour la jointure
df_bus["stop_id"] = df_bus["stop_id"].astype("string")
df_stops["stop_id"] = df_stops["stop_id"].astype("string")

# Jointure avec stops.txt
df_global = df_bus.merge(
    df_stops,
    on="stop_id",
    how="left"
)

print(df_global.sample(20))


In [None]:
# Suppression des colonnes non nécessaires
df_final = df_global.drop(
    columns=[
        "route_type",
        "route_desc",
        "stop_name",
        "location_type",
        "route_long_name",
        "parent_station",
        "platform_code",
    ]
)

print(df_final.head())


In [None]:
bus_numbers = df_final["route_short_name"].unique()
print(bus_numbers)