In [88]:
# import

import pandas as pd
import plotly.express as px
import pyproj
import plotly.graph_objects as go
from itertools import product
from collections import defaultdict

import plotly.io as pio
pio.renderers.default="vscode+pdf"
# pio.templates["custom"] = pio.templates["plotly_white"]
# pio.templates["custom"]["layout"]["font"] = {"size": 15}
# pio.templates.default = "custom"

In [89]:
# Load des données

baseline_legs = pd.read_csv("gpe_output/navette/eqasim_legs.csv", sep=";")
baseline_pt = pd.read_csv("gpe_output/navette/eqasim_pt.csv", sep=";")
baseline_trips = pd.read_csv("gpe_output/navette/eqasim_trips.csv", sep=";")
baseline_trips = baseline_trips[baseline_trips["mode"]=="pt"]

gpe_legs = pd.read_csv("gpe_output/with_shuttle/eqasim_legs.csv", sep=";")
gpe_pt = pd.read_csv("gpe_output/with_shuttle/eqasim_pt.csv", sep=";")
gpe_trips = pd.read_csv("gpe_output/with_shuttle/eqasim_trips.csv", sep=";")
gpe_trips = gpe_trips[gpe_trips["mode"]=="pt"]

routes = pd.read_csv("../implementation_gtfs/GTFS_versions/GTFS_completed/routes.txt")

In [90]:
# On vérifie que l'on compare bien les mêmes trajets

same_col = ["person_id", "person_trip_id", "origin_x", "origin_y", "destination_x", "destination_y", "departure_time", "travel_time"]

merged_data = baseline_trips[same_col].merge(
    gpe_trips[same_col],
    on=["person_id","person_trip_id"],
    suffixes=("_baseline", "_gpe"),
    how="outer" 
)

for col in ["origin_x", "origin_y", "destination_x", "destination_y", "departure_time"]:
    col_base = f"{col}_baseline"
    col_gpe = f"{col}_gpe"
    merged_data[f"{col}_match"] = merged_data[col_base] == merged_data[col_gpe]

diff_rows = merged_data.loc[~merged_data[[c for c in merged_data.columns if c.endswith("_match")]].all(axis=1)]

print("Différences détectées :", len(diff_rows))
print(f"Cela représente {(len(diff_rows)/len(merged_data)*100):.2f}%")


Différences détectées : 895
Cela représente 0.37%


In [91]:
merged_data = merged_data.loc[merged_data[[c for c in merged_data.columns if c.endswith("_match")]].all(axis=1)]
merged_data["travel_time_diff"] = merged_data["travel_time_baseline"] - merged_data["travel_time_gpe"]
merged_data.loc[:,'travel_time_diff_percent'] = ((merged_data['travel_time_diff']) / merged_data['travel_time_baseline']) * 100

labels = ['Pas de changement', 'Gain de temps', 'Perte de temps']
counts = [
    len(merged_data[merged_data.travel_time_diff == 0]),
    len(merged_data[merged_data.travel_time_diff > 0]),
    len(merged_data[merged_data.travel_time_diff < 0])
]

df_pie = pd.DataFrame({
    "Type de changement": labels,
    "Nombre de trajets": counts
})

fig = px.pie(
    df_pie,
    names="Type de changement",
    values="Nombre de trajets",
    title="Répartition des gains et pertes de temps",
    color="Type de changement",
    color_discrete_map={
        "Pas de changement": "#636EFA",
        "Gain de temps": "#00CC96",
        "Perte de temps": "#EF553B"
    },
)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [92]:
positif = merged_data[merged_data.travel_time_diff > 0]

positif[["travel_time_diff", "travel_time_diff_percent"]].describe()

Unnamed: 0,travel_time_diff,travel_time_diff_percent
count,0.0,0.0
mean,,
std,,
min,,
25%,,
50%,,
75%,,
max,,


In [94]:
fig = px.histogram(
    positif,
    x=positif["travel_time_diff"]/60,
    nbins=75,
    title="Distribution des différences de temps de trajet (baseline - GPE)",
    labels={"x": "Différence de temps (minutes)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.show()

In [95]:

fig = px.histogram(
    positif,
    x='travel_time_diff_percent',
    nbins=20,
    title="Distribution des différences de temps de trajet en pourcentage (baseline - GPE)",
    labels={"travel_time_diff_percent": "Différence de temps (%)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

# Afficher l'histogramme
fig.show()

In [96]:
negatif = merged_data[merged_data.travel_time_diff < 0]

negatif[["travel_time_diff", "travel_time_diff_percent"]].describe()

Unnamed: 0,travel_time_diff,travel_time_diff_percent
count,905.0,905.0
mean,-2345.953591,-80.280921
std,5396.598873,208.102007
min,-54360.0,-2773.469388
25%,-1800.0,-60.200669
50%,-1020.0,-34.52244
75%,-720.0,-21.649234
max,-128.0,-3.386243


In [110]:
negatif[negatif["travel_time_diff"] < -50000]

Unnamed: 0,person_id,person_trip_id,origin_x_baseline,origin_y_baseline,destination_x_baseline,destination_y_baseline,departure_time_baseline,travel_time_baseline,origin_x_gpe,origin_y_gpe,...,destination_y_gpe,departure_time_gpe,travel_time_gpe,origin_x_match,origin_y_match,destination_x_match,destination_y_match,departure_time_match,travel_time_diff,travel_time_diff_percent
18143,10826051_0,0,656708.952128,6855934.0,654157.458676,6852993.0,23750.0,1960.0,656708.952128,6855934.0,...,6852993.0,23750.0,56320.0,True,True,True,True,True,-54360.0,-2773.469388


In [115]:
gpe_pt[gpe_pt.person_id.isin(negatif[negatif["travel_time_diff"] < -50000].person_id)]

Unnamed: 0,person_id,person_trip_id,leg_index,access_stop_id,egress_stop_id,transit_line_id,transit_route_id,departure_id,access_area_id,egress_area_id,transit_mode
7274,10826051_0,0,1,IDFM:37277.link:138835,IDFM:7848.link:292859,IDFM:C01193,IDFM:RATP:156307-C01193-COU_RATP_5112356_25428...,IDFM:RATP:156307-C01193-COU_RATP_5112356_25428...,IDFM:70235,IDFM:70174,bus
397572,10826051_0,0,3,IDFM:7848.link:292859,IDFM:7849.link:205281,IDFM:C01160,IDFM:RATP:175356-C01160-COU_RATP_5121144_32551...,IDFM:RATP:175356-C01160-COU_RATP_5121144_32551...,IDFM:70174,IDFM:69994,bus


In [114]:
baseline_legs[baseline_legs.person_id.isin(negatif[negatif["travel_time_diff"] < -50000].person_id)]

Unnamed: 0,person_id,person_trip_id,leg_index,origin_x,origin_y,destination_x,destination_y,departure_time,travel_time,vehicle_distance,routed_distance,mode,euclidean_distance,origin_link_id,destination_link_id
17129,10826051_0,0,0,656708.952128,6855934.0,656722.959496,6855243.0,23750.0,760.0,0.0,911.408694,walk,690.722184,221309,138835
21539,10826051_0,0,1,656722.959496,6855243.0,654498.397638,6854679.0,24510.0,630.0,2252.209711,2252.209711,pt,2294.957243,138835,292859
21577,10826051_0,0,2,654498.397638,6854679.0,654498.397638,6854679.0,25140.0,0.0,0.0,0.0,walk,0.0,292859,292859
25084,10826051_0,0,3,654498.397638,6854679.0,654103.422777,6853077.0,25140.0,420.0,2545.420848,2545.420848,pt,1650.123259,292859,205281
26397,10826051_0,0,4,654103.422777,6853077.0,654157.458676,6852993.0,25560.0,150.0,0.0,178.877878,walk,99.691536,205281,567428


In [97]:
fig = px.histogram(
    negatif,
    x=negatif["travel_time_diff"]/60,
    nbins=25,
    title="Distribution des différences de temps de trajet (baseline - GPE)",
    labels={"x": "Différence de temps (minutes)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.show()

In [98]:
fig = px.histogram(
    negatif,
    x='travel_time_diff_percent',
    nbins=20,
    title="Distribution des différences de temps de trajet en pourcentage (baseline - GPE)",
    labels={"travel_time_diff_percent": "Différence de temps (%)", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.show()

In [99]:
negatif_baseline_pt = baseline_pt.merge(
    negatif[["person_id","person_trip_id"]],
    on=["person_id", "person_trip_id"],
    how="inner",
)

negatif_gpe_pt = gpe_pt.merge(
    negatif[["person_id","person_trip_id"]],
    on=["person_id", "person_trip_id"],
    how="inner",
)

baseline_counts = negatif_baseline_pt.groupby(["person_id", "person_trip_id"]).size().reset_index(name="count_baseline")
gpe_counts = negatif_gpe_pt.groupby(["person_id", "person_trip_id"]).size().reset_index(name="count_gpe")

merged_counts = pd.merge(baseline_counts, gpe_counts, on=["person_id", "person_trip_id"], how="outer")
merged_counts["count_diff"] = merged_counts["count_baseline"] - merged_counts["count_gpe"]

merged_counts["count_diff"].describe()


count    905.0
mean       0.0
std        0.0
min        0.0
25%        0.0
50%        0.0
75%        0.0
max        0.0
Name: count_diff, dtype: float64

In [100]:


fig = px.histogram(
    merged_counts,
    x="count_diff",
    nbins=50,
    title="Distribution des différences de fréquence (baseline - GPE)",
    labels={"count_diff": "Différence de fréquence", "count": "Nombre de trajets"},
    color_discrete_sequence=["#636EFA"]
)

fig.show()

In [101]:
merged_counts_neg = merged_counts[merged_counts["count_diff"] <0]
negatif[negatif.person_id.isin(merged_counts_neg.person_id)]

Unnamed: 0,person_id,person_trip_id,origin_x_baseline,origin_y_baseline,destination_x_baseline,destination_y_baseline,departure_time_baseline,travel_time_baseline,origin_x_gpe,origin_y_gpe,...,destination_y_gpe,departure_time_gpe,travel_time_gpe,origin_x_match,origin_y_match,destination_x_match,destination_y_match,departure_time_match,travel_time_diff,travel_time_diff_percent


In [102]:
merged_trips_change = merged_data[merged_data.travel_time_diff != 0]

baseline_pt_change = baseline_pt.merge(
    merged_trips_change[["person_id", "person_trip_id"]],
    on=["person_id","person_trip_id"],
    how="inner",
)

gpe_pt_change = gpe_pt.merge(
    merged_trips_change[["person_id", "person_trip_id"]],
    on=["person_id","person_trip_id"],
    how="inner",
)

line_baseline = baseline_pt_change["transit_line_id"].value_counts().reset_index()
line_baseline.columns = ["route_id", "count_baseline"]

line_gpe = gpe_pt_change["transit_line_id"].value_counts().reset_index()
line_gpe.columns = ["route_id", "count_gpe"]

line_counts = line_baseline.merge(
    line_gpe,
    on=["route_id"],
    how="outer",
).fillna(0)

line_counts = line_counts.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner",
)

route_type_labels = {
    0: "Tramway",
    1: "Subway",
    2: "Rail",
    3: "Bus"
}

line_counts["route_type"] = line_counts["route_type"].map(route_type_labels)

route_type_counts = line_counts.groupby("route_type")[["count_baseline", "count_gpe"]].sum().reset_index()

In [103]:
fig = px.histogram(
    route_type_counts,
    x="route_type",
    y = ["count_baseline", "count_gpe"],
    title="Nombre de trajets par ligne",
    labels={"route_type":"Mode de transport"},
    barmode="group"
)
fig.show()

In [104]:
for route_type in line_counts.route_type.unique():
    fig = px.histogram(
        line_counts[line_counts.route_type == route_type],
        x="route_short_name",
        y = ["count_baseline", "count_gpe"],
        title=f"Nombre de trajets par ligne, Mode de transport : {route_type}",
        labels={"route_short_name":"Nom de la ligne"},
        barmode="group"
    )
    fig.show()

In [105]:
cols = ["person_id", "person_trip_id", "leg_index", "transit_line_id", "transit_mode"]

baseline_pt_change = baseline_pt_change[cols]
gpe_pt_change = gpe_pt_change[cols]

baseline_pt_change = baseline_pt_change.rename(columns={"transit_line_id":"route_id"})
gpe_pt_change = gpe_pt_change.rename(columns={"transit_line_id":"route_id"})

baseline_pt_change = baseline_pt_change.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner"
)

gpe_pt_change = gpe_pt_change.merge(
    routes[["route_id", "route_short_name", "route_type"]],
    on=["route_id"],
    how="inner"
)

baseline_pt_change.loc[baseline_pt_change["route_type"] == 3, "route_short_name"] = "bus"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 3, "route_short_name"] = "bus"

baseline_pt_change.loc[baseline_pt_change["route_type"] == 0, "route_short_name"] = "tram"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 0, "route_short_name"] = "tram"

baseline_pt_change.loc[baseline_pt_change["route_type"] == 2, "route_short_name"] = "rail"
gpe_pt_change.loc[gpe_pt_change["route_type"] == 2, "route_short_name"] = "rail"

new = ["15", "16", "17", "18"]
baseline_pt_change.loc[baseline_pt_change["route_type"] == 1, "route_short_name"] = "subway"
gpe_pt_change.loc[(gpe_pt_change["route_type"] == 1) & (~gpe_pt_change["route_short_name"].isin(new)), "route_short_name"] = "subway"

In [106]:
sankey_nodes = []
link_weights = defaultdict(float)
vue_gpe = set()
vue_baseline = set()

for (person_id, person_trip_id), group in baseline_pt_change.groupby(["person_id", "person_trip_id"]):

    baseline_boucle = baseline_pt_change[(baseline_pt_change.person_id == person_id) & (baseline_pt_change.person_trip_id == person_trip_id)].reset_index()
    gpe_boucle = gpe_pt_change[(gpe_pt_change.person_id == person_id) & (gpe_pt_change.person_trip_id == person_trip_id)].reset_index()

    min_len = min(len(baseline_boucle), len(gpe_boucle))
    prefix_len = 0
    for i in range(min_len):
        if baseline_boucle.loc[i, "route_id"] == gpe_boucle.loc[i, "route_id"]:
            prefix_len += 1
        else:
            break

    suffix_len = 0
    for i in range(1, min_len - prefix_len + 1):  
        if baseline_boucle.iloc[-i]["route_id"] == gpe_boucle.iloc[-i]["route_id"]:
            suffix_len += 1
        else:
            break
        
    if suffix_len > 0:
        baseline_lines = baseline_boucle["route_short_name"].iloc[prefix_len:-suffix_len].tolist()
        gpe_lines = gpe_boucle["route_short_name"].iloc[prefix_len:-suffix_len].tolist()
    else:
        baseline_lines = baseline_boucle["route_short_name"].iloc[prefix_len:].tolist()
        gpe_lines = gpe_boucle["route_short_name"].iloc[prefix_len:].tolist()

    links = list(product(baseline_lines, gpe_lines))
    weight = 1/len(links) if links else 0

    for el in set(baseline_lines):
        if el not in vue_baseline:
            sankey_nodes.append(el+"_s")
            vue_baseline.add(el)

    for el in set(gpe_lines):
        if el not in vue_gpe:
            sankey_nodes.append(el+"_t")
            vue_gpe.add(el)

    node_index = {name: i for i, name in enumerate(sankey_nodes)}

    for src, tgt in links:
        s = node_index[src+"_s"]
        t = node_index[tgt+"_t"]
        link_weights[(s, t)] += weight

In [107]:
sankey_sources = []
sankey_targets = []
sankey_values = []

for (s, t), v in link_weights.items():
    sankey_sources.append(s)
    sankey_targets.append(t)
    sankey_values.append(v)

In [108]:
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=sankey_nodes
    ),
    link=dict(
        source=sankey_sources,
        target=sankey_targets,
        value=sankey_values
    )
))

fig.update_layout(title_text="Comparaison des trajets", font_size=10)
fig.show()

In [109]:
anomalies = positif[positif.travel_time_diff >= 10800].copy()

transformer = pyproj.Transformer.from_crs("EPSG:2154", "EPSG:4326", always_xy=True)

origin_lon, origin_lat = transformer.transform(anomalies['origin_x_baseline'].values, anomalies['origin_y_baseline'].values)
destination_lon, destination_lat = transformer.transform(anomalies['destination_x_baseline'].values, anomalies['destination_y_baseline'].values)

anomalies['origin_lat'] = origin_lat
anomalies['origin_lon'] = origin_lon

anomalies['destination_lat'] = destination_lat
anomalies['destination_lon'] = destination_lon

anomalies['departure_time_baseline']/=3600
anomalies['travel_time_baseline']/=3600

print(anomalies[['departure_time_baseline', 'travel_time_baseline','origin_lat', 'origin_lon', 'destination_lat', 'destination_lon']])

Empty DataFrame
Columns: [departure_time_baseline, travel_time_baseline, origin_lat, origin_lon, destination_lat, destination_lon]
Index: []
