In [1]:
# import

import pandas as pd
import plotly.express as px
import pyproj
import plotly.graph_objects as go
from itertools import product
from collections import defaultdict

import plotly.io as pio
pio.renderers.default="vscode+pdf"
# pio.templates["custom"] = pio.templates["plotly_white"]
# pio.templates["custom"]["layout"]["font"] = {"size": 15}
# pio.templates.default = "custom"

In [2]:
# Load des données

baseline_legs = pd.read_csv("baseline_output/routing_pt_june/eqasim_legs.csv", sep=";")
baseline_pt = pd.read_csv("baseline_output/routing_pt_june/eqasim_pt.csv", sep=";")
baseline_trips = pd.read_csv("baseline_output/routing_pt_june/eqasim_trips.csv", sep=";")
baseline_trips = baseline_trips[baseline_trips["mode"]=="pt"]

gpe_legs = pd.read_csv("gpe_output/routing_pt_june/eqasim_legs.csv", sep=";")
gpe_pt = pd.read_csv("gpe_output/routing_pt_june/eqasim_pt.csv", sep=";")
gpe_trips = pd.read_csv("gpe_output/routing_pt_june/eqasim_trips.csv", sep=";")
gpe_trips = gpe_trips[gpe_trips["mode"]=="pt"]

routes = pd.read_csv("../implementation_gtfs/GTFS_versions/GTFS_completed/routes.txt")

In [3]:
# On vérifie que l'on compare bien les mêmes trajets

same_col = ["person_id", "person_trip_id", "origin_x", "origin_y", "destination_x", "destination_y", "departure_time", "travel_time"]

merged_data = baseline_trips[same_col].merge(
    gpe_trips[same_col],
    on=["person_id","person_trip_id"],
    suffixes=("_baseline", "_gpe"),
    how="outer" 
)

for col in ["origin_x", "origin_y", "destination_x", "destination_y", "departure_time"]:
    col_base = f"{col}_baseline"
    col_gpe = f"{col}_gpe"
    merged_data[f"{col}_match"] = merged_data[col_base] == merged_data[col_gpe]

diff_rows = merged_data.loc[~merged_data[[c for c in merged_data.columns if c.endswith("_match")]].all(axis=1)]

print("Différences détectées :", len(diff_rows))
print(f"Cela représente {(len(diff_rows)/len(merged_data)*100):.2f}%")


Différences détectées : 378
Cela représente 0.16%


In [4]:
merged_data = merged_data.loc[merged_data[[c for c in merged_data.columns if c.endswith("_match")]].all(axis=1)]
merged_data["travel_time_diff"] = merged_data["travel_time_baseline"] - merged_data["travel_time_gpe"]
merged_data.loc[:,'travel_time_diff_percent'] = ((merged_data['travel_time_diff']) / merged_data['travel_time_baseline']) * 100

labels = ['Pas de changement', 'Gain de temps', 'Perte de temps']
counts = [
    len(merged_data[merged_data.travel_time_diff == 0]),
    len(merged_data[merged_data.travel_time_diff > 0]),
    len(merged_data[merged_data.travel_time_diff < 0])
]

df_pie = pd.DataFrame({
    "Type de changement": labels,
    "Nombre de trajets": counts
})

fig = px.pie(
    df_pie,
    names="Type de changement",
    values="Nombre de trajets",
    title="Répartition des gains et pertes de temps",
    color="Type de changement",
    color_discrete_map={
        "Pas de changement": "#636EFA",
        "Gain de temps": "#00CC96",
        "Perte de temps": "#EF553B"
    },
)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.write_image("outputs/plots/routing/repartition_gain_perte_temps.png", width = 1000) 
fig.show()

In [5]:
positif = merged_data[merged_data.travel_time_diff > 0]

positif[["travel_time_diff", "travel_time_diff_percent"]].describe()

Unnamed: 0,travel_time_diff,travel_time_diff_percent
count,16094.0,16094.0
mean,765.969927,19.714077
std,569.720009,12.012196
min,1.0,0.022242
25%,360.0,10.573572
50%,673.0,18.166285
75%,1022.0,27.115157
max,17340.0,77.03243


In [6]:
fig = px.histogram(
    positif,
    x=positif["travel_time_diff"]/60,
    nbins=75,
    title="Distribution des différences de temps de trajet (baseline - GPE)",
    labels={"x": "Différence de temps (minutes)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.show()
fig.write_image("outputs/plots/routing/distribution_temps_trajet.png", width = 1000) 

In [7]:
fig = px.histogram(
    positif,
    x='travel_time_diff_percent',
    nbins=20,
    title="Distribution des différences de temps de trajet en pourcentage (baseline - GPE)",
    labels={"travel_time_diff_percent": "Différence de temps (%)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

# Afficher l'histogramme
fig.write_image("outputs/plots/routing/distribution_temps_trajet_pourcentage.png", width = 1000) 
fig.show()

In [8]:
negatif = merged_data[merged_data.travel_time_diff < 0]

negatif[["travel_time_diff", "travel_time_diff_percent"]].describe()

Unnamed: 0,travel_time_diff,travel_time_diff_percent
count,746.0,746.0
mean,-256.160858,-7.640292
std,214.40162,7.549193
min,-1320.0,-59.164969
25%,-378.5,-10.842055
50%,-216.5,-5.552151
75%,-90.5,-2.262083
max,-1.0,-0.017671


In [9]:
fig = px.histogram(
    negatif,
    x=negatif["travel_time_diff"]/60,
    nbins=25,
    title="Distribution des différences de temps de trajet (baseline - GPE)",
    labels={"x": "Différence de temps (minutes)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.write_image("outputs/plots/routing/distrib_temps_trajet_negatif.png", width = 1000) 
fig.show()

In [10]:
fig = px.histogram(
    negatif,
    x='travel_time_diff_percent',
    nbins=20,
    title="Distribution des différences de temps de trajet en pourcentage (baseline - GPE)",
    labels={"travel_time_diff_percent": "Différence de temps (%)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.write_image("outputs/plots/routing/distrib_temps_trajet_pourcentage_negatif.png", width = 1000) 
fig.show()

In [11]:
negatif_baseline_pt = baseline_pt.merge(
    negatif[["person_id","person_trip_id"]],
    on=["person_id", "person_trip_id"],
    how="inner",
)

negatif_gpe_pt = gpe_pt.merge(
    negatif[["person_id","person_trip_id"]],
    on=["person_id", "person_trip_id"],
    how="inner",
)

baseline_counts = negatif_baseline_pt.groupby(["person_id", "person_trip_id"]).size().reset_index(name="count_baseline")
gpe_counts = negatif_gpe_pt.groupby(["person_id", "person_trip_id"]).size().reset_index(name="count_gpe")

merged_counts = pd.merge(baseline_counts, gpe_counts, on=["person_id", "person_trip_id"], how="outer")
merged_counts["count_diff"] = merged_counts["count_baseline"] - merged_counts["count_gpe"]

merged_counts["count_diff"].describe()


count    746.000000
mean       0.895442
std        0.520511
min       -1.000000
25%        1.000000
50%        1.000000
75%        1.000000
max        3.000000
Name: count_diff, dtype: float64

In [12]:
fig = px.histogram(
    merged_counts,
    x="count_diff",
    nbins=50,
    title="Distribution des différences du nombre de correspondance (baseline - GPE)",
    labels={"count_diff": "Différence de correspondance", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.write_image("outputs/plots/routing/distrib_nb_corresp_neg.png", width = 1000) 
fig.show()

In [19]:


merged_df = pd.merge(
    negatif[["person_id", "person_trip_id", "travel_time_diff_percent"]],
    merged_counts[["person_id", "person_trip_id", "count_diff"]],
    on=["person_id", "person_trip_id"],
    how="inner"
)


fig = px.box(
    merged_df,
    x="count_diff",
    y="travel_time_diff_percent",
    title="Variation du temps de trajet perdu (%) selon la différence de correspondances",
    labels={
        "count_diff": "Différence de correspondances (baseline - GPE)",
        "travel_time_diff_percent": "Différence de temps de trajet (%)"
    },
    #points="all",  # ajoute les points individuels (peut être "outliers" ou False)
    color_discrete_sequence=["#636EFA"]
)

fig.write_image("outputs/plots/routing/boxplot_temps_vs_corresp.png", width=1000)
fig.show()


In [20]:
merged_counts_neg = merged_counts[merged_counts["count_diff"] <0]
negatif[negatif.person_id.isin(merged_counts_neg.person_id)]

Unnamed: 0,person_id,person_trip_id,origin_x_baseline,origin_y_baseline,destination_x_baseline,destination_y_baseline,departure_time_baseline,travel_time_baseline,origin_x_gpe,origin_y_gpe,...,destination_y_gpe,departure_time_gpe,travel_time_gpe,origin_x_match,origin_y_match,destination_x_match,destination_y_match,departure_time_match,travel_time_diff,travel_time_diff_percent
15486,10706679_0,0,661356.52283,6856710.0,653247.25602,6860221.0,26525.0,2495.0,661356.52283,6856710.0,...,6860221.0,26525.0,2534.0,True,True,True,True,True,-39.0,-1.563126
77204,2577977_1,0,673752.418872,6881721.0,678962.03424,6863505.0,50029.0,6289.0,673752.418872,6881721.0,...,6863505.0,50029.0,6737.0,True,True,True,True,True,-448.0,-7.123549
94434,3577538_0,0,615560.006903,6873832.0,621182.941295,6878146.0,35177.0,3645.0,615560.006903,6873832.0,...,6878146.0,35177.0,3836.0,True,True,True,True,True,-191.0,-5.240055


In [21]:
merged_trips_change = merged_data[merged_data.travel_time_diff != 0]

baseline_pt_change = baseline_pt.merge(
    merged_trips_change[["person_id", "person_trip_id"]],
    on=["person_id","person_trip_id"],
    how="inner",
)

gpe_pt_change = gpe_pt.merge(
    merged_trips_change[["person_id", "person_trip_id"]],
    on=["person_id","person_trip_id"],
    how="inner",
)

line_baseline = baseline_pt_change["transit_line_id"].value_counts().reset_index()
line_baseline.columns = ["route_id", "count_baseline"]

line_gpe = gpe_pt_change["transit_line_id"].value_counts().reset_index()
line_gpe.columns = ["route_id", "count_gpe"]

line_counts = line_baseline.merge(
    line_gpe,
    on=["route_id"],
    how="outer",
).fillna(0)

line_counts = line_counts.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner",
)

route_type_labels = {
    0: "Tramway",
    1: "Subway",
    2: "Rail",
    3: "Bus"
}

line_counts["route_type"] = line_counts["route_type"].map(route_type_labels)

route_type_counts = line_counts.groupby("route_type")[["count_baseline", "count_gpe"]].sum().reset_index()

In [22]:
fig = px.histogram(
    route_type_counts,
    x="route_type",
    y = ["count_baseline", "count_gpe"],
    title="Nombre de trajets par mode de transport en commun",
    labels={"route_type":"Mode de transport"},
    barmode="group"
)
fig.show()

fig.write_image("outputs/plots/routing/nb_trajet_transport_commun.png", width = 1000) 

In [23]:
for route_type in line_counts.route_type.unique():
    fig = px.histogram(
        line_counts[line_counts.route_type == route_type],
        x="route_short_name",
        y = ["count_baseline", "count_gpe"],
        title=f"Nombre de trajets par ligne, Mode de transport : {route_type}",
        labels={"route_short_name":"Nom de la ligne"},
        barmode="group"
    )
    fig.show()

In [24]:
cols = ["person_id", "person_trip_id", "leg_index", "transit_line_id", "transit_mode"]

baseline_pt_change = baseline_pt_change[cols]
gpe_pt_change = gpe_pt_change[cols]

baseline_pt_change = baseline_pt_change.rename(columns={"transit_line_id":"route_id"})
gpe_pt_change = gpe_pt_change.rename(columns={"transit_line_id":"route_id"})

baseline_pt_change = baseline_pt_change.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner"
)

gpe_pt_change = gpe_pt_change.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner"
)

baseline_pt_change.loc[baseline_pt_change["route_type"] == 3, "route_short_name"] = "bus"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 3, "route_short_name"] = "bus"

baseline_pt_change.loc[baseline_pt_change["route_type"] == 0, "route_short_name"] = "tram"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 0, "route_short_name"] = "tram"

baseline_pt_change.loc[baseline_pt_change["route_type"] == 2, "route_short_name"] = "rail"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 2, "route_short_name"] = "rail"

new = ["15", "16", "17", "18"]
baseline_pt_change.loc[baseline_pt_change["route_type"] == 1, "route_short_name"] = "subway"
gpe_pt_change.loc[(gpe_pt_change["route_type"] == 1) & (~gpe_pt_change["route_short_name"].isin(new)), "route_short_name"] = "subway"

In [25]:
sankey_nodes = []
link_weights = defaultdict(float)
vue_gpe = set()
vue_baseline = set()

for (person_id, person_trip_id), group in baseline_pt_change.groupby(["person_id", "person_trip_id"]):

    baseline_boucle = baseline_pt_change[(baseline_pt_change.person_id == person_id) & (baseline_pt_change.person_trip_id == person_trip_id)].reset_index()
    gpe_boucle = gpe_pt_change[(gpe_pt_change.person_id == person_id) & (gpe_pt_change.person_trip_id == person_trip_id)].reset_index()

    min_len = min(len(baseline_boucle), len(gpe_boucle))
    prefix_len = 0
    for i in range(min_len):
        if baseline_boucle.loc[i, "route_id"] == gpe_boucle.loc[i, "route_id"]:
            prefix_len += 1
        else:
            break

    suffix_len = 0
    for i in range(1, min_len - prefix_len + 1):  
        if baseline_boucle.iloc[-i]["route_id"] == gpe_boucle.iloc[-i]["route_id"]:
            suffix_len += 1
        else:
            break
        
    if suffix_len > 0:
        baseline_lines = baseline_boucle["route_short_name"].iloc[prefix_len:-suffix_len].tolist()
        gpe_lines = gpe_boucle["route_short_name"].iloc[prefix_len:-suffix_len].tolist()
    else:
        baseline_lines = baseline_boucle["route_short_name"].iloc[prefix_len:].tolist()
        gpe_lines = gpe_boucle["route_short_name"].iloc[prefix_len:].tolist()

    links = list(product(baseline_lines, gpe_lines))
    weight = 1/len(links) if links else 0

    for el in set(baseline_lines):
        if el not in vue_baseline:
            sankey_nodes.append(el+"_s")
            vue_baseline.add(el)

    for el in set(gpe_lines):
        if el not in vue_gpe:
            sankey_nodes.append(el+"_t")
            vue_gpe.add(el)

    node_index = {name: i for i, name in enumerate(sankey_nodes)}

    for src, tgt in links:
        s = node_index[src+"_s"]
        t = node_index[tgt+"_t"]
        link_weights[(s, t)] += weight

In [26]:
sankey_sources = []
sankey_targets = []
sankey_values = []

for (s, t), v in link_weights.items():
    sankey_sources.append(s)
    sankey_targets.append(t)
    sankey_values.append(v)

In [27]:
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=sankey_nodes
    ),
    link=dict(
        source=sankey_sources,
        target=sankey_targets,
        value=sankey_values
    )
))

fig.update_layout(title_text="Comparaison des trajets", font_size=10)
fig.write_image("outputs/plots/routing/sankey.png", width = 1000) 
fig.show()

In [28]:
anomalies = positif[positif.travel_time_diff >= 10800].copy()

transformer = pyproj.Transformer.from_crs("EPSG:2154", "EPSG:4326", always_xy=True)

origin_lon, origin_lat = transformer.transform(anomalies['origin_x_baseline'].values, anomalies['origin_y_baseline'].values)
destination_lon, destination_lat = transformer.transform(anomalies['destination_x_baseline'].values, anomalies['destination_y_baseline'].values)

anomalies['origin_lat'] = origin_lat
anomalies['origin_lon'] = origin_lon

anomalies['destination_lat'] = destination_lat
anomalies['destination_lon'] = destination_lon

anomalies['departure_time_baseline']/=3600
anomalies['travel_time_baseline']/=3600

print(anomalies[['departure_time_baseline', 'travel_time_baseline','origin_lat', 'origin_lon', 'destination_lat', 'destination_lon']])

        departure_time_baseline  travel_time_baseline  origin_lat  origin_lon  \
93416                 17.333611              6.252778   48.912761    1.912530   
132936                18.672500              4.904444   48.736501    2.311565   

        destination_lat  destination_lon  
93416         49.055642         1.766952  
132936        48.426306         2.167623  
