In [1]:
import pandas as pd
from datetime import datetime, timedelta
pd.options.mode.copy_on_write = True 

In [15]:
# Lit le fichier des temps d'arrêts des trains
liste_stop_times_ter = pd.read_csv("./data_cleaned/stop_times_ter_full.csv", sep=",")

# Lit le fichier des temps d'arrêts des trains sans les dates
liste_stop_times = pd.read_csv("./data_cleaned/stop_times_ter.csv", sep=",")

In [16]:
liste_stop_times_ter = liste_stop_times_ter.astype({"date": "datetime64[ns]"})

In [87]:
def check_destination(destination, layer):
    return len(layer.loc[layer["city"] == destination]) > 0

def path_back_propagation(layers, destination):
    paths = []
    city_to_fetch = destination
    for layer in reversed(layers):
        data = layer.loc[layer["city"] == city_to_fetch]
        if len(data) == 0:
            return "error"
        data = data.iloc[0]
        paths.append(data['current_path'])
        if data["previous_path"] == "":
            return paths[::-1]
        city_to_fetch = data["previous_path"].split(';')[1]

def format_paths(paths, depart):
    final_paths = []
    trips_id = []

    for path in paths:
        trips_id.append(path.split(";")[0])

    trips = liste_stop_times[liste_stop_times['trip_id'].isin(trips_id)].sort_values(by=["trip_id", "stop_sequence"])
    trip_id = ""
    trips_dict = {}

    for trip in trips.itertuples():
        if trip_id != trip.trip_id:
            trip_id = trip.trip_id
            trips_dict[trip_id] = [trip]
        else:
            trips_dict[trip_id].append(trip)

    for path in paths:
        destination = path.split(";")[1]
        date = path.split(";")[-1].split(',')[0]
        
        tmp_dict = {
            "depart": depart,
            "depart_arrival_time": "",
            "depart_departure_time": "",
            "destination": destination,
            "destination_arrival_time": "",
            "destination_departure_time": "",
            "trip": [],
            "date": date
        }
        trip_id = path.split(";")[0]

        for trip in trips_dict[trip_id]:
            if trip.city == depart:
                tmp_dict["depart_arrival_time"] = trip.arrival_time 
                tmp_dict["depart_departure_time"] = trip.departure_time 
    
            if trip.city == destination:
                tmp_dict["destination_arrival_time"] = trip.arrival_time 
                tmp_dict["destination_departure_time"] = trip.departure_time 
                depart = destination
            tmp_dict["trip"].append({
                "city": trip.city,
                "arrival_time": trip.arrival_time,
                "departure_time": trip.departure_time
            })

        final_paths.append(tmp_dict)
        
    return final_paths
    
def filter_direction_trip(data, namesList, start_date=False):
    find_name = False
    trip_id = ""
    end_date = ""
    default_value = False
    rows = []

    if start_date != False:
        end_date = start_date + timedelta(days=1)
        default_value = True

    data = data.sort_values(by=["trip_id", "stop_sequence"])

    for row in data.itertuples():
        if not default_value:
            date_str = row.previous_path.split(';')[-1]
            date = datetime.strptime(date_str, "%d/%m/%Y, %H:%M:%S")
            start_date = date + timedelta(minutes=5)
            end_date = start_date + timedelta(days=1)

        if row.trip_id != trip_id:
            trip_id = row.trip_id
            find_name = False

        if row.city in namesList :#and (row.date >= start_date and row.date <= end_date):
            find_name = True

        if find_name:
            rows.append(row)
 
    return pd.DataFrame(rows)

def shortest_path(depart, destination, start_date):
    layers = []
    not_founded = 0 # vérifie qu'on a pas trouvé un chemin vers l'arrivée

    start_date = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
    end_date = start_date + timedelta(days=1)
    date_mask = (liste_stop_times_ter["date"] > start_date) & (liste_stop_times_ter["date"] < end_date)

    #Raccourci le jeu de donnée nécessaire pour la recherche pour une date spécifique
    period_stop_time = liste_stop_times_ter.loc[date_mask]

    trips_id = set(period_stop_time.loc[(period_stop_time["city"] == depart)]["trip_id"])
    layer = period_stop_time.loc[(period_stop_time["trip_id"].isin(trips_id))]
    
    layer["previous_path"] = ""
    layer = filter_direction_trip(layer, [depart], start_date)  
    layer = layer.sort_values(by="date").drop_duplicates(subset=["stop_id"])

    
    layer["current_path"] = layer["trip_id"] + ";" + layer["city"] + ";" + layer["date"].dt.strftime("%d/%m/%Y, %H:%M:%S")

    layers.append(layer)

    while not_founded < 100:
        if check_destination(destination, layers[-1]):
            paths = path_back_propagation(layers, destination)
            return format_paths(paths, depart)
        
        filtered_data = period_stop_time.loc[(period_stop_time["city"].isin(layers[-1]["city"].unique()))]

        paths = {}
        for row in filtered_data.itertuples():
            paths[row.trip_id] = row.trip_id + ";" + str(row.city) + ";" + row.date.strftime("%d/%m/%Y, %H:%M:%S")
        
        trips_id = set(filtered_data["trip_id"]) - set(layers[-1]["trip_id"])
        layer = period_stop_time.loc[(period_stop_time["trip_id"].isin(trips_id))]
        
        layer["previous_path"] = [paths[data.trip_id] for data in layer.itertuples()]
        layer = filter_direction_trip(layer, layers[-1]["city"].unique())

        layer = layer.sort_values(by="date").drop_duplicates(subset=["stop_id"])
        
        
        layer["current_path"] = layer["trip_id"] + ";" + layer["city"] + ";" + layer["date"].dt.strftime("%d/%m/%Y, %H:%M:%S")
        layer.to_csv("data_cleaned/test.csv")
        layers.append(layer)
        
        not_founded += 1
    return "not found excedeed 100 search"

In [105]:
def get_path(depart, destination, etapes='', start_date=''):
    depart = depart.upper()
    destination = destination.upper()
    etapes = etapes.upper()
    if start_date == "":
        start_date = datetime.now() + timedelta(minutes=2)
        start_date = start_date.strftime("%Y-%m-%d %H:%M:%S")
    if etapes == '':
        return shortest_path(depart, destination, start_date)
    else:
        paths = []
        for etape in etapes.split(","):
            paths += shortest_path(depart, etape, start_date)
            depart = etape
            date_str = paths[-1]["date"] + " " + paths[-1]["destination_arrival_time"]
            start_date = datetime.strptime(str(date_str), "%d/%m/%Y %H:%M:%S") + timedelta(minutes=5)
            start_date = start_date.strftime("%Y-%m-%d %H:%M:%S")
        paths += shortest_path(depart, destination, start_date)
        return paths

In [106]:
depart = "METZERAL"
destination = "L'HERMITAGE"
get_path(depart, destination, etapes="Lyon,Grenoble")


[{'depart': 'METZERAL',
  'depart_arrival_time': '20:53:00',
  'depart_departure_time': '20:53:00',
  'destination': 'COLMAR',
  'destination_arrival_time': '20:53:00',
  'destination_departure_time': '20:53:00',
  'trip': [{'city': 'METZERAL',
    'arrival_time': '20:19:00',
    'departure_time': '20:19:00'},
   {'city': 'MUHLBACH-SUR-MUNSTER',
    'arrival_time': '20:21:00',
    'departure_time': '20:22:00'},
   {'city': 'BREITENBACH-HAUT-RHIN',
    'arrival_time': '20:24:00',
    'departure_time': '20:24:00'},
   {'city': 'MUNSTER',
    'arrival_time': '20:27:00',
    'departure_time': '20:28:00'},
   {'city': 'MUNSTER',
    'arrival_time': '20:30:00',
    'departure_time': '20:31:00'},
   {'city': 'GRIESBACH-AU-VAL',
    'arrival_time': '20:33:00',
    'departure_time': '20:33:00'},
   {'city': 'WIHR-AU-VAL',
    'arrival_time': '20:36:00',
    'departure_time': '20:36:00'},
   {'city': 'TURCKHEIM',
    'arrival_time': '20:42:00',
    'departure_time': '20:44:00'},
   {'city': 'WIN