In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.copy_on_write = True 

In [2]:
# Lit le fichier des temps d'arrêts des trains
liste_stop_times_ter = pd.read_csv("./data_cleaned/stop_times_ter_full.csv", sep=",")

In [3]:
liste_stop_times_ter = liste_stop_times_ter.astype({"date": "datetime64[ns]"})

In [4]:
liste_stop_times_ter.dtypes

Unnamed: 0                      int64
trip_id                        object
arrival_time                   object
departure_time                 object
stop_id                        object
stop_sequence                   int64
stop_headsign                 float64
pickup_type                     int64
drop_off_type                   int64
shape_dist_traveled           float64
city                           object
date                   datetime64[ns]
dtype: object

In [5]:
def check_destination(destination, layer):
    return len(layer.loc[layer["city"] == destination]) > 0

def path_back_propagation(layers, destination):
    paths = []
    city_to_fetch = destination
    for layer in reversed(layers):
        data = layer.loc[layer["city"] == city_to_fetch]
        if len(data) == 0:
            return "error"
        data = data.iloc[0]
        paths.append(data['current_path'])
        if data["previous_path"] == "":
            return paths[::-1]
        city_to_fetch = data["previous_path"].split(';')[1]


def filter_direction_trip(data, namesList, start_date=False):
    find_name = False
    trip_id = ""
    end_date = ""
    default_value = False
    rows = []

    if start_date != False:
        end_date = start_date + timedelta(days=1)
        default_value = True

    data = data.sort_values(by=["trip_id", "stop_sequence"])

    for row in data.itertuples():
        if not default_value:
            date_str = row.previous_path.split(';')[-1]
            date = datetime.strptime(date_str, "%d/%m/%Y, %H:%M:%S")
            start_date = date + timedelta(minutes=5)
            end_date = start_date + timedelta(days=1)

        if row.trip_id != trip_id:
            trip_id = row.trip_id
            find_name = False

        if row.city in namesList :#and (row.date >= start_date and row.date <= end_date):
            find_name = True

        if find_name:
            rows.append(row)
 
    return pd.DataFrame(rows)

def shortest_path(depart, destination, start_date):
    layers = []
    not_founded = 0 # vérifie qu'on a pas trouvé un chemin vers l'arrivée

    start_date = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
    end_date = start_date + timedelta(days=1)
    date_mask = (liste_stop_times_ter["date"] > start_date) & (liste_stop_times_ter["date"] < end_date)

    #Raccourci le jeu de donnée nécessaire pour la recherche pour une date spécifique
    period_stop_time = liste_stop_times_ter.loc[date_mask]

    trips_id = set(period_stop_time.loc[(period_stop_time["city"] == depart)]["trip_id"])
    layer = period_stop_time.loc[(period_stop_time["trip_id"].isin(trips_id))]
    
    layer["previous_path"] = ""
    layer = filter_direction_trip(layer, [depart], start_date)  
    layer = layer.sort_values(by="date").drop_duplicates(subset=["stop_id"])

    
    layer["current_path"] = layer["trip_id"] + ";" + layer["city"] + ";" + layer["date"].dt.strftime("%d/%m/%Y, %H:%M:%S")

    layers.append(layer)

    while not_founded < 100:
        if check_destination(destination, layers[-1]):
            print(path_back_propagation(layers, destination))
            return "destination trouver"
        
        filtered_data = period_stop_time.loc[(period_stop_time["city"].isin(layers[-1]["city"].unique()))]

        paths = {}
        for row in filtered_data.itertuples():
            paths[row.trip_id] = row.trip_id + ";" + str(row.city) + ";" + row.date.strftime("%d/%m/%Y, %H:%M:%S")
        
        trips_id = set(filtered_data["trip_id"]) - set(layers[-1]["trip_id"])
        layer = period_stop_time.loc[(period_stop_time["trip_id"].isin(trips_id))]
        
        layer["previous_path"] = [paths[data.trip_id] for data in layer.itertuples()]
        layer = filter_direction_trip(layer, layers[-1]["city"].unique())

        layer = layer.sort_values(by="date").drop_duplicates(subset=["stop_id"])
        
        
        layer["current_path"] = layer["trip_id"] + ";" + layer["city"] + ";" + layer["date"].dt.strftime("%d/%m/%Y, %H:%M:%S")
        layer.to_csv("data_cleaned/test.csv")
        layers.append(layer)
        
        not_founded += 1
    return "not found excedeed 100 search"

In [7]:
depart = "METZERAL"
destination = "L'HERMITAGE"
shortest_path(depart, destination, "2025-01-28 05:00:00")


['OCESN831501F3729725:2025-01-22T00:42:25Z;COLMAR;28/01/2025, 06:29:00', 'OCESN96201F3725715:2025-01-22T00:42:25Z;MULHOUSE;28/01/2025, 06:14:00', 'OCESN839550F3798183:2025-01-22T00:42:25Z;PARIS;28/01/2025, 09:46:00', 'OCESN862507F3752489:2025-01-22T00:42:25Z;LE MANS;28/01/2025, 08:50:00', 'OCESN857691F3532276:2025-01-22T00:42:25Z;RENNES;28/01/2025, 08:19:00', "OCESN855505F3526583:2025-01-22T00:42:25Z;L'HERMITAGE;28/01/2025, 06:12:00"]


'destination trouver'