In [1]:
import pandas as pd
from typing import Union

pd.set_option('display.max_columns', None)

In [9]:
def get_trip_informations(circulation_nb:int, date:int) -> Union[dict, pd.DataFrame]:
    """
    Get trip information based on the circulation number and date.
    Parameters:
    - circulation_nb (int): The circulation number.
    - date (int): The date in the format YYYYMMDD.
    Returns:
    - Union[dict, pd.DataFrame]: A dictionary or a pandas DataFrame containing the trip information.
    """

    # Function to determine the type of stop
    def determine_stop_type(row):
        if row["pickup_type"] == 0 and row["drop_off_type"] == 1:
            return "Origin"
        elif row["pickup_type"] == 0 and row["drop_off_type"] == 0:
            return "Stop"
        elif row["pickup_type"] == 1 and row["drop_off_type"] == 0:
            return "Destination"
        else:
            return "special"


    # Get the trip information related to the circulation number
    load_trip = pd.read_csv("./gtfs/export_gtfs_voyages/trips.txt")
    load_trip = load_trip[load_trip["trip_headsign"] == circulation_nb]
    load_trip.drop(["shape_id", "trip_headsign"], axis=1, inplace=True)
    direction_id_to_str: dict = {0: "Aller", 1: "Retour"}
    load_trip["direction_id"] = load_trip["direction_id"].map(direction_id_to_str)

    # Get the route information related to the trip
    routes_trip = pd.read_csv("./gtfs/export_gtfs_voyages/routes.txt")
    routes_trip = routes_trip[routes_trip["route_id"] == load_trip["route_id"].values[0]]
    routes_trip.drop(["route_desc", "route_url", "route_color", "route_text_color", "agency_id"], axis=1, inplace=True)
    route_types_id_to_str: dict = {0: "Tramway", 1: "Metro", 2: "Train", 3: "Bus", 4: "Ferry", 5: "Cable car", 6: "Gondola", 7: "Funicular", 11: "Trolleybus"}
    routes_trip["route_type"] = routes_trip["route_type"].map(route_types_id_to_str)

    # Merge
    merged_df = pd.merge(load_trip, routes_trip, on="route_id", how="inner")

    # Get the service information related to the trip (date, exception type)
    service_id = pd.read_csv("./gtfs/export_gtfs_voyages/calendar_dates.txt")
    service_id = service_id[service_id["service_id"].isin(merged_df["service_id"])]
    service_id = service_id[service_id["date"] == date]
    exception_type_to_str: dict = {1: "Desservi", 2: "Supp"}
    service_id["exception_type"] = service_id["exception_type"].map(exception_type_to_str)

    # Merge
    merged_df = pd.merge(merged_df, service_id, on="service_id", how="inner")

    # Get the stop times information related to the trip
    df_stop_times = pd.read_csv("./gtfs/export_gtfs_voyages/stop_times.txt")
    df_stop_times = df_stop_times[df_stop_times["trip_id"].isin(merged_df["trip_id"])]
    df_stop_times.drop(["stop_headsign", "shape_dist_traveled"], axis=1, inplace=True)

    df_stop_times["stop_type"] = df_stop_times.apply(determine_stop_type, axis=1)

    pickup_type_to_str: dict = {0: "regular", 1: "no pickup", 2: "ask authorization"}
    dropoff_type_to_str: dict = {0: "regular", 1: "no dropoff", 2: "ask authorization"}
    df_stop_times["pickup_type"] = df_stop_times["pickup_type"].map(pickup_type_to_str)
    df_stop_times["drop_off_type"] = df_stop_times["drop_off_type"].map(dropoff_type_to_str)

    # Merge
    merged_df = pd.merge(merged_df, df_stop_times, on="trip_id", how="inner")

    # Get the stops information related to the trip
    df_stops = pd.read_csv("./gtfs/export_gtfs_voyages/stops.txt")
    df_stops = df_stops[df_stops["stop_id"].isin(merged_df["stop_id"])]
    df_stops.drop(["stop_desc", "zone_id", "stop_url"], axis=1, inplace=True)
    location_type_to_str: dict = {0: "Arret", 1: "Gare"}
    df_stops["location_type"] = df_stops["location_type"].map(location_type_to_str)

    # Merge
    merged_df = pd.merge(merged_df, df_stops, on="stop_id", how="inner")

    # Transform date to datetime
    merged_df["date"] = pd.to_datetime(merged_df["date"], format="%Y%m%d")

    # Clean unnecessary columns
    merged_df.drop(["route_id", "service_id", "trip_id", "stop_id", "route_short_name", "parent_station"], axis=1, inplace=True)



    # added_stops = merged_df[merged_df["exception_type"] == "Desservi"]
    # stops_with_times = [f"{row['stop_name']} à {row['arrival_time']}" for _, row in added_stops.iterrows()]
    stops_with_times = [f"{row['stop_name']} à {row['arrival_time']}" for _, row in merged_df.iterrows() if row["exception_type"] == "Desservi"]
    stops_with_times = ', '.join(stops_with_times)
    train_info: dict = {
        "Circulation" : circulation_nb,
        "Date" : merged_df["date"].values[0],
        "Transport" : merged_df["route_type"].values[0],
        "Trajet" : merged_df["route_long_name"].values[0],
        "Gare de départ" : merged_df["stop_name"].values[0],
        "Heure de départ" : merged_df["departure_time"].values[0],
        "Gare d'arrivée" : merged_df["stop_name"].values[-1],
        "Heure d'arrivée" : merged_df["arrival_time"].values[-1],
        "Arrêts" : [stops_with_times],
    }

    df = pd.DataFrame(train_info)
    df.set_index("Circulation", inplace=True)


    return df, merged_df


In [10]:
df, merged_df = get_trip_informations(7389, 20240803)
df

Unnamed: 0_level_0,Date,Transport,Trajet,Gare de départ,Heure de départ,Gare d'arrivée,Heure d'arrivée,Arrêts
Circulation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7389,2024-08-03,Train,Paris - Dunkerque par Bassin Minier,Paris Gare du Nord,19:51:00,Dunkerque,22:04:00,"Paris Gare du Nord à 19:51:00, Arras à 20:40:0..."


In [11]:
merged_df

Unnamed: 0,direction_id,block_id,route_long_name,route_type,date,exception_type,arrival_time,departure_time,stop_sequence,pickup_type,drop_off_type,stop_type,stop_name,stop_lat,stop_lon,location_type
0,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,19:51:00,19:51:00,0,regular,no dropoff,Origin,Paris Gare du Nord,48.880136,2.354851,Arret
1,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,20:40:00,20:50:00,1,no pickup,regular,Destination,Arras,50.286811,2.780159,Arret
2,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,21:02:00,21:05:00,2,no pickup,regular,Destination,Lens,50.427716,2.825984,Arret
3,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,21:16:00,21:19:00,3,no pickup,regular,Destination,Béthune,50.521091,2.642,Arret
4,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,21:39:00,21:42:00,4,no pickup,regular,Destination,Hazebrouck,50.724924,2.541463,Arret
5,Aller,1578,Paris - Dunkerque par Bassin Minier,Train,2024-08-03,Desservi,22:04:00,22:04:00,5,no pickup,regular,Destination,Dunkerque,51.031248,2.368515,Arret


In [12]:
import folium

# Step 2: Create a map centered around the average latitude and longitude
center_lat = merged_df['stop_lat'].mean()
center_lon = merged_df['stop_lon'].mean()
mymap = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Step 3: Add markers for each stop
for idx, row in merged_df.iterrows():
    folium.Marker(
        location=[row['stop_lat'], row['stop_lon']],
        popup=row['stop_name'],
        tooltip=row['stop_name']
    ).add_to(mymap)

# Step 4: Add lines connecting the stops
coordinates = merged_df[['stop_lat', 'stop_lon']].values.tolist()
folium.PolyLine(locations=coordinates, color='blue').add_to(mymap)

# Save the map to an HTML file
mymap.save('map.html')

# Display the map in a Jupyter Notebook
mymap