### Data Loader

In [22]:
import requests
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict

In [225]:
def get_gtfs_rt_data(gtfs_rt_url:str) -> gtfs_realtime_pb2.FeedMessage:
    """
    Fetches GTFS-RT data from the given URL.
    Args:
        gtfs_rt_url (str): The URL to fetch the GTFS-RT data from.
        verbose (bool, optional): Whether to print verbose output. Defaults to True.
    Returns:
        gtfs_realtime_pb2.FeedMessage: The parsed GTFS-RT data.
    Raises:
        ValueError: If an error occurs while fetching the GTFS-RT data.
    """


    def fetch_data(url:str) -> requests.Response:
        """
        Fetches GTFS-RT data from the given URL.
        Args:
            url (str): The URL to fetch the data from.
        Returns:
            requests.Response: The response object containing the fetched data.
        Raises:
            requests.exceptions.RequestException: If an error occurs while fetching the data.
        """
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response
        
        except requests.exceptions.RequestException as e:
            return None


    # Get response
    response = fetch_data(gtfs_rt_url)

    # Parse GTFS RT Datas
    feed = gtfs_realtime_pb2.FeedMessage()
    feed.ParseFromString(response.content)
    
    # Convert FeedMessage to dictionary
    feed_dict = MessageToDict(feed)
    
    
    return feed_dict

feed_dict = get_gtfs_rt_data(gtfs_rt_url="https://proxy.transport.data.gouv.fr/resource/sncf-tgv-gtfs-rt-trip-updates")

### Transformer

In [227]:
def transform_feed(feed_dict:dict) -> list:
    """

    """
    # Remove Header key
    del feed_dict['header']
    # Remove entity key and keep the list value as dictionary
    feed_dict = feed_dict['entity']

    all_trip_data = []
    all_stop_times_data = []

    for elt in feed_dict:
        trip_id = elt['tripUpdate']['trip']['tripId']
        departure_date = elt['tripUpdate']['trip']['startDate']
        departure_time = elt['tripUpdate']['trip']['startTime']
        updated_at = elt['tripUpdate']['timestamp']

        trip_data = {
            'trip_id': trip_id,
            'departure_date' : departure_date,
            'departure_time' : departure_time,
        }

        all_trip_data.append(trip_data)


        for stop_time_update in elt['tripUpdate']['stopTimeUpdate']:
            stop_id = stop_time_update['stopId']
            arrival_time = stop_time_update['arrival']['time'] if 'arrival' in stop_time_update else None
            departure_time = stop_time_update['departure']['time'] if 'departure' in stop_time_update else None
            delay_arrival = int(stop_time_update['arrival']['delay']/60 if 'arrival' in stop_time_update else 0)
            delay_departure = int(stop_time_update['departure']['delay']/60 if 'departure' in stop_time_update else 0)

            stop_times_data = {
                'trip_id': trip_id,
                'stop_id': stop_id,
                'arrival_time': arrival_time,
                'departure_time': departure_time,
                'delay_arrival': delay_arrival,
                'delay_departure': delay_departure,
                'update_time': updated_at,
            }

            all_stop_times_data.append(stop_times_data)

    return all_trip_data, all_stop_times_data

all_trip_data, all_stop_times_data = transform_feed(feed_dict)