In [1]:
'''
The leg is structured as follows:
(trip_id, departure_node, departure_time, arrival_node, arrival_time, route_id, service_id)

trip_id = leg.iloc[0]
departure_node = leg[1]
departure_time = leg[2]
arrival_node = leg[3]
arrival_time = leg[4]
route_id = leg[5]
service_id = leg[6]
'''

'\nThe leg is structured as follows:\n(trip_id, departure_node, departure_time, arrival_node, arrival_time, route_id, service_id)\n\ntrip_id = leg.iloc[0]\ndeparture_node = leg[1]\ndeparture_time = leg[2]\narrival_node = leg[3]\narrival_time = leg[4]\nroute_id = leg[5]\nservice_id = leg[6]\n'

In [2]:
from datetime import datetime, timedelta
import pandas as pd
import math
import scipy.stats as stats
from data_preparation import prepare_data,import_data
pd.set_option('display.max_colwidth', None)

In [3]:
agency_df, stops_df, routes_df, trips_df, stop_times_df, calendar_df,calendar_dates_df = import_data()
legs_df = prepare_data(stops_df,trips_df,stop_times_df)

In [4]:
print(legs_df.head())

0                             (1.TA.1-1-j24-1.1.H, Bruck/Mur Bahnhof, 01:30:00, Kapfenberg Bahnhof, 01:34:00, 1-1-j24-1, TA+a4530)
1                     (1.TA.1-1-j24-1.1.H, Kapfenberg Bahnhof, 01:35:00, Kapfenberg Fachhochschule, 01:37:00, 1-1-j24-1, TA+a4530)
2          (1.TA.1-1-j24-1.1.H, Kapfenberg Fachhochschule, 01:38:00, St.Marein-St.Lorenzen Bahnhof, 01:41:00, 1-1-j24-1, TA+a4530)
3    (1.TA.1-1-j24-1.1.H, St.Marein-St.Lorenzen Bahnhof, 01:41:00, Allerheiligen-Mürzhofen Bahnhof, 01:44:00, 1-1-j24-1, TA+a4530)
4                 (1.TA.1-1-j24-1.1.H, Allerheiligen-Mürzhofen Bahnhof, 01:44:00, Kindberg Bahnhof, 01:47:00, 1-1-j24-1, TA+a4530)
Name: leg, dtype: object


In [5]:
# Function to calculate probability of successful transfer between two subsequent legs
def calculate_transfer_probability(prev_leg: pd.Series, next_leg: pd.Series) -> float:
    if prev_leg[5] == next_leg[5]:
        return 1
    else:
        prev_arrival_time = datetime.strptime(prev_leg[4], "%H:%M:%S")
        next_departure_time = datetime.strptime(next_leg[2], "%H:%M:%S")
        
        # Calculate transfer time in minutes
        transfer_time = (next_departure_time - prev_arrival_time).total_seconds() / 60
        if transfer_time < 2:
            return 0
        #If tripId = next tripId , then probability = 1 since the traveler remains on the same line
        else:
            return min(stats.gamma.cdf(transfer_time, a=2, scale=2),0.95)
            #return 0.5


In [6]:
stats.gamma.cdf(100, a=2, scale=2)

1.0

In [7]:
# i hope it is cumulative distribution
def calculate_cumulative_probability(itinerary) -> list[float]:
    cumulative_probabilities = [1] 
    for i in range(len(itinerary) - 1):
        prev_leg = itinerary[i]
        next_leg = itinerary[i+1]

        transfer_prob = calculate_transfer_probability(prev_leg, next_leg)
        cumulative_probabilities.append(transfer_prob)
    return cumulative_probabilities
  

In [8]:
# if there is a chance to have all succesful transfers, is the arrival time in time budget
def calculate_arrival_probability(itinerary, start_time, time_budget) -> int:
    # Check if all previous transfers were successful
    if math.prod(calculate_cumulative_probability(itinerary)) > 0:  # Only proceed if all transfers are successful( naka can be possibly made)
        # Calculate the actual arrival time at the final leg
        destination_leg = itinerary[-1]
        destination_arrival_time = destination_leg[4]
        
        start_time = datetime.strptime(start_time, "%H:%M:%S")
        destination_arrival_time = datetime.strptime(destination_arrival_time, "%H:%M:%S")

        total_travel_time = destination_arrival_time - start_time

        # Check if total travel time is within the budget
        if total_travel_time <= time_budget:
            return 1  # Probability of 1 if arrival is within the time budget
        else:
            return 0  # Probability of 0 if arrival is beyond the time budget
    else:
        return 0  # Probability of 0 if any prior transfer was unsuccessful

In [9]:
def primary_itinerary_reliability(itinerary, start_time, time_budget) -> float:
    
    cumulative_probabilities = calculate_cumulative_probability(itinerary)
    product_of_probabilities= math.prod(cumulative_probabilities)
    arrival_probability = calculate_arrival_probability(itinerary,start_time,time_budget)

    reliability = arrival_probability * product_of_probabilities
    return reliability

To change this

In [10]:
#im not sure how the "backup" will look like (like is it starting from the origin or from the missed transfer point)
# backup = (leg of the prim it where is transfer, [sequence of legs of backup starting from the next after transfer], reliability, arrival time)
#backup_legs = [list of backups excluding prim]
def backup_itinerary_reliability(itinerary, backup:tuple, start_time, time_budget) -> float:
    
    #change the format
    backup_itinerary = backup[0]
    arrival_probability = calculate_arrival_probability(backup_itinerary, start_time, time_budget)
    cumulative_probabilities = calculate_cumulative_probability(backup_itinerary)
    product_of_probabilities= math.prod(cumulative_probabilities)
    


    initial_transfer_prob = 1 # Default value in case no missed transfer is identified
    
    #but what if backup not to the last leg????
    for idx, leg in enumerate(itinerary[:-1]):  # Exclude last item since we're accessing i+1
        #basically comparing if it is the same leg
        if leg[3] == backup_itinerary[0][1]:
            prev_leg = itinerary[idx]
            missed_leg = itinerary[idx + 1]
            initial_transfer_prob = calculate_transfer_probability(prev_leg,missed_leg)

            primary_itinerary_rel_before_transfer = primary_itinerary_reliability(itinerary[:idx+1],start_time,time_budget)
            #print("init_tr",initial_transfer_prob)
            break
    
    
    
    backup_reliability = arrival_probability * product_of_probabilities * (1 - initial_transfer_prob)* primary_itinerary_rel_before_transfer
    
    return backup_reliability


In [11]:
#Reliability of a complete itinerary 
#Backups will be set of backups of prim itineary
# backup itineary= (leg of the prim it where is transfer, [sequence of legs of backup starting from the next after transfer], reliability, arrival time)
def itinerary_reliability(itinerary : list[pd.Series],Backups : list[tuple], start_time:str, time_budget: timedelta) -> float:
    primary_reliability = primary_itinerary_reliability(itinerary,start_time,time_budget)
    #print("prim",primary_reliability)
    if primary_reliability > 0 :
        added_reliability = 0
        for backup,reliability in Backups:
            #this will be fucked up, as we dont know yet how to make backups ( like on what format)
            backup_reliability = backup_itinerary_reliability(itinerary,backup,start_time,time_budget)
            added_reliability += backup_reliability
        #added_reliability = min(added_reliability, 1-primary_reliability)
        complete_reliability = primary_reliability + added_reliability
        return complete_reliability
    else:
        return 0.0

In [12]:
Bak = [(([('72.TA.10-A12-j24-1.18.R', 'Wiener Neustadt Hauptbahnhof', '15:30:00', 'Wien Meidling', '15:55:00', '10-A12-j24-1', 'TA+n4'), ('72.TA.10-A12-j24-1.18.R', 'Wien Meidling', '15:57:00', 'Wien Hauptbahnhof', '16:02:00', '10-A12-j24-1', 'TA+n4'), ('72.TA.10-A12-j24-1.18.R', 'Wien Hauptbahnhof', '16:12:00', 'Flughafen Wien Bahnhof', '16:27:00', '10-A12-j24-1', 'TA+n4')], timedelta(seconds=7020)), 0.13588822540043322), 
       (([('65.TA.4-CX9-W-j24-1.31.R', 'Wien Meidling', '16:04:00', 'Wien Hauptbahnhof', '16:08:00', '4-CX9-W-j24-1', 'TA+o2'), ('72.TA.10-A12-j24-1.18.R', 'Wien Hauptbahnhof', '16:12:00', 'Flughafen Wien Bahnhof', '16:27:00', '10-A12-j24-1', 'TA+n4')], timedelta(seconds=7020)), 0.2411650997112305), 
       (([('72.TA.10-A12-j24-1.18.R', 'Wien Hauptbahnhof', '16:12:00', 'Flughafen Wien Bahnhof', '16:27:00', '10-A12-j24-1', 'TA+n4')], timedelta(seconds=7020)), 0.1991482734714558)]

In [13]:
a = [('150.TA.20-SV5-L-j24-1.13.H', 'Schattendorf Kirchengasse', '14:43:00', 'Loipersbach-Schattendorf Bhf.', '14:48:00', '20-SV5-L-j24-1', 'TA+byk20'), ('42.TA.3-R93-A-j24-1.7.R', 'Loipersbach-Schattendorf Bhf.', '14:54:00', 'Marz-Rohrbach Bahnhof', '14:58:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Marz-Rohrbach Bahnhof', '14:59:00', 'Mattersburg Bahnhof', '15:01:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Mattersburg Bahnhof', '15:02:00', 'Mattersburg Nord Bahnhof', '15:03:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Mattersburg Nord Bahnhof', '15:04:00', 'Wiesen-Sigleß Bahnhof', '15:07:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Wiesen-Sigleß Bahnhof', '15:07:00', 'Bad Sauerbrunn Bahnhof', '15:11:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Bad Sauerbrunn Bahnhof', '15:13:00', 'Neudörfl Bahnhof', '15:16:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Neudörfl Bahnhof', '15:17:00', 'Katzelsdorf Bahnhof', '15:20:00', '3-R93-A-j24-1', 'TA+3j030'), ('42.TA.3-R93-A-j24-1.7.R', 'Katzelsdorf Bahnhof', '15:20:00', 'Wiener Neustadt Hauptbahnhof', '15:25:00', '3-R93-A-j24-1', 'TA+3j030'), ('30.TA.10-A12-j24-1.7.R', 'Wiener Neustadt Hauptbahnhof', '15:27:00', 'Wien Meidling', '15:55:00', '10-A12-j24-1', 'TA+o2p10'), ('45.TA.2-RX2-W-j24-1.27.R', 'Wien Meidling', '15:59:00', 'Wien Matzleinsdorfer Platz', '16:01:00', '2-RX2-W-j24-1', 'TA+n4'), ('45.TA.2-RX2-W-j24-1.27.R', 'Wien Matzleinsdorfer Platz', '16:02:00', 'Wien Hauptbahnhof', '16:03:00', '2-RX2-W-j24-1', 'TA+n4'), ('124.TA.10-A3-j24-1.29.H', 'Wien Hauptbahnhof', '16:09:00', 'Flughafen Wien Bahnhof', '16:24:00', '10-A3-j24-1', 'TA+7m520')]
res = itinerary_reliability(a,Bak,"14:30:00", timedelta(hours=2,minutes=10))
print(res)

0.7659684640644566


In [14]:
def get_available_service_ids(start_date):
    # Convert start_date to datetime and then to integer-like string (YYYYMMDD) without dashes
    start_date_datetime = datetime.strptime(start_date, "%Y-%m-%d")
    start_date_str = start_date_datetime.strftime("%Y%m%d")
    weekday = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"][start_date_datetime.weekday()]

    available_service_ids = []
    expetion_1 = []


    # Step 2: Check for regular service in calendar
    for _, service in calendar_df.iterrows():
        service_id = service["service_id"]
        # Check if date is within the start_date and end_date range
        if int(service["start_date"]) <= int(start_date_str) <= int(service["end_date"]):
            # Check if the service operates on this weekday
            if service[weekday] == 1:
                if service_id not in available_service_ids:
                    available_service_ids.append(service_id)
                    
                    
    # Step 1: Check for exceptions in calendar_dates
    exceptions = calendar_dates_df[calendar_dates_df["date"] == int(start_date_str)]
    
    # Go through each service_id to check for exceptions
    for _, exception in exceptions.iterrows():
        service_id = exception["service_id"]
        if exception["exception_type"] == 2:  # Service is added as an exception
            if service_id not in available_service_ids:
                available_service_ids.append(service_id)
                
        elif exception["exception_type"] == 1:  # Service is removed as an exception
            if service_id in available_service_ids:
                available_service_ids.remove(service_id)
                

    return available_service_ids

In [15]:
get_available_service_ids("2024-12-12")

['TA',
 'TA+00730',
 'TA+00b00',
 'TA+01720',
 'TA+01m20',
 'TA+01w20',
 'TA+02430',
 'TA+02t20',
 'TA+02x20',
 'TA+04030',
 'TA+05j20',
 'TA+08g20',
 'TA+0a230',
 'TA+0a720',
 'TA+0as10',
 'TA+0b700',
 'TA+0br00',
 'TA+0bv20',
 'TA+0C',
 'TA+0c130',
 'TA+0f130',
 'TA+0f730',
 'TA+0fj10',
 'TA+0g630',
 'TA+0g730',
 'TA+0gm10',
 'TA+0h730',
 'TA+0ha20',
 'TA+0i920',
 'TA+0k620',
 'TA+0k630',
 'TA+0k730',
 'TA+0l130',
 'TA+0l730',
 'TA+0m730',
 'TA+0n630',
 'TA+0n730',
 'TA+0nj20',
 'TA+0o130',
 'TA+0o730',
 'TA+0q130',
 'TA+0q630',
 'TA+0rt00',
 'TA+0rw20',
 'TA+0s730',
 'TA+0t130',
 'TA+0t730',
 'TA+0t900',
 'TA+0un00',
 'TA+0vq20',
 'TA+0w630',
 'TA+0x330',
 'TA+0x720',
 'TA+0x730',
 'TA+0xn20',
 'TA+0y130',
 'TA+0yk20',
 'TA+0z630',
 'TA+10b00',
 'TA+11k20',
 'TA+12230',
 'TA+12u20',
 'TA+13j00',
 'TA+13x20',
 'TA+15v20',
 'TA+15y20',
 'TA+16820',
 'TA+16z20',
 'TA+18g20',
 'TA+19430',
 'TA+19730',
 'TA+1a230',
 'TA+1ac10',
 'TA+1as10',
 'TA+1bp20',
 'TA+1br00',
 'TA+1c130',
 'TA+1c4

In [16]:
def filter_network(start_time,start_date,time_budget):
    available_sevices = get_available_service_ids(start_date)

    filtered_network= []
    start_time = datetime.strptime(start_time, "%H:%M:%S")
    end_time = start_time + time_budget
    print(end_time)
    for row in legs_df:
        leg_departure_time = datetime.strptime(row[2],"%H:%M:%S")
        leg_arrival_time = datetime.strptime(row[4],"%H:%M:%S")
        if leg_departure_time >= start_time and leg_departure_time<= end_time and leg_arrival_time <= end_time:
            service_id = row[6]
            if service_id in available_sevices:
                filtered_network.append(row)
    print("Network",len(filtered_network))
    return filtered_network

In [17]:
#not sure if here we should add time dependancy
def search_adjecent_legs(node_id,arrival_time,filtered_legs):
    arrival_time = datetime.strptime(arrival_time, "%H:%M:%S")
    adjecent_legs = []
    for i in range(0,len(filtered_legs)):
        leg = filtered_legs[i]
        leg_departure_time = datetime.strptime(leg[2],"%H:%M:%S")
        if leg[1] == node_id and leg_departure_time >= arrival_time :
            adjecent_legs.append(leg)
    #print("ADJ",len(adjecent_legs))
    return adjecent_legs

In [18]:
# need to add to arrival probability
def travel_time(itinerary, start_time):
    destination_leg = itinerary[-1]
    destination_arrival_time = destination_leg[4]
        
    start_time = datetime.strptime(start_time, "%H:%M:%S")
    destination_arrival_time = datetime.strptime(destination_arrival_time, "%H:%M:%S")

    total_travel_time = destination_arrival_time - start_time
    return total_travel_time

In [19]:
def is_transfer(itinerary) -> bool:
    prev_leg = itinerary[-2]
    next_leg = itinerary[-1]
    if prev_leg[5] == next_leg[5]:
        return False
    else:
        return True

In [20]:
def find_min_index(LIST:list):
    """Find the index of the itinerary with the shortest duration."""
    #adjust for bachup
    min_index = 0
    min_value =  LIST[0][1] 
    for i in range(1, len(LIST)):
        current_value = LIST[i][1]
        if current_value < min_value:
            min_value = current_value
            min_index = i

    return min_index

In [21]:
def update_visited_stops(visited_stops, leg):
    """Update the visited stops dictionary with a new leg."""
    destination_stop = leg[1]
    arrival_time = datetime.strptime(leg[4], "%H:%M:%S")
    trip_id = leg[5]
    visited_stops[destination_stop] = (arrival_time, trip_id)

In [22]:
def check_and_update_mrib(shortest_path, MRIB_reliability, MRIB, start_time, time_budget):
    """Check and update the most reliable path (MRIB) if applicable."""
    Backups = shortest_path[4][:]
    rel = itinerary_reliability(shortest_path[0], Backups, start_time, time_budget)
    if rel > MRIB_reliability:
        print("New most reliable path found")
        MRIB_reliability = rel
        MRIB = shortest_path
    return MRIB_reliability, MRIB

In [23]:
def can_visit_leg(leg, visited_stops, shortest_path_trip_id, destination_node):


    arrival_stop = leg[3]  # Get destination stop ID
    arrival_time = datetime.strptime(leg[4], "%H:%M:%S")  # Arrival time
    trip_id = leg[5]  # Assuming trip_id is at index 0
    if destination_node != arrival_stop:
        if arrival_stop in visited_stops:
            visited_time, visited_trip_id = visited_stops[arrival_stop]
            if trip_id == shortest_path_trip_id:
                if arrival_time <= visited_time:
                    visited_stops[arrival_stop] = (arrival_time, trip_id)
                return True
            else : 
                if arrival_time > visited_time:
                    return False
                elif arrival_time == visited_time and trip_id == visited_trip_id:
                    return True
                elif arrival_time == visited_time:
                    return False
                else:
                    visited_stops[arrival_stop] = (arrival_time, trip_id)
                    return True

        else:
            visited_stops[arrival_stop] = (arrival_time, trip_id)
            return True
    return True

In [24]:
def origin_node_filtering(origin_node: int, start_time: str, time_budget: timedelta, legs: list):
    """
    Filters legs to ensure departure from the origin occurs within the first 25% of the time budget.

    Parameters:
    origin_node (int): The origin stop/node.
    start_time (str): The starting time in "%H:%M:%S" format.
    time_budget (timedelta): The total allowed time budget.
    legs (list): The list of available legs to filter.

    Returns:
    list: Filtered legs meeting the departure condition.
    """
    
    # Calculate the 20% time limit from the time budget
    max_departure_time = datetime.strptime(start_time, "%H:%M:%S") + time_budget * 0.15

    # Filter legs based on departure time
    filtered_legs = []
    for leg in legs:
        departure_time = datetime.strptime(leg[2], "%H:%M:%S")  # Assuming departure time is stored in this location
        if departure_time <= max_departure_time:
            filtered_legs.append(leg)

    return filtered_legs


In [25]:
def transform_route_info(MRIB,MRIB_reliability,Backups):
    primary_itinerary = MRIB  # Primary itinerary (first item in best_result_fast)
    reliability = MRIB_reliability  # Reliability score
    duration = MRIB[1]  # Duration of the trip
    arrival_time = primary_itinerary[-1][4]  # Arrival time

    
    # Initialize the grouped_routes list
    grouped_routes = []
    last_route = None  # To track the previous route for grouping

    # Step 1: Process the primary itinerary
    for i in range(len(primary_itinerary)):
        current_stop = primary_itinerary[i][1]
        route_id = primary_itinerary[i][5]
        departure_time = primary_itinerary[i][2]
        arrival_time = primary_itinerary[i][4]
        next_stop = primary_itinerary[i][3]

        # Grouping the routes based on the route ID
        if route_id == last_route:
            grouped_routes[-1]["stops"].append((next_stop, arrival_time))
        else:
            grouped_routes.append({
                "route_id": route_id,
                "start_stop": current_stop,
                "departure_time": departure_time,
                "stops": [(next_stop, arrival_time)]
            })
        last_route = route_id

    # Step 2: Print the grouped routes for the primary itinerary
    for segment in grouped_routes:
        start = segment["start_stop"]
        dep_time = segment["departure_time"]
        route = segment["route_id"]
        stops = " → ".join([f"{stop} (Ankunft: {arr})" for stop, arr in segment["stops"]])
        print(f"  🚆 {start} (Abfahrt: {dep_time}) → {stops} mit Linie {route}")

    # Step 3: Print additional details
    print(f"\n🎯 Endstation: {primary_itinerary[-1][3]} (Ankunft: {arrival_time})")
    print(f"🔹 Gesamt-Zuverlässigkeit der Route: {reliability:.2f}\n")
    
    # Step 4: Process and print the backup routes
    if Backups:
        print("🔄 Backups:")
        for backup in Backups:
            backup_path = backup[0][0]
            backup_reliability = backup[1]
            grouped_backup_routes = []
            last_backup_route = None
            transfer_point = backup_path[0][1]

            print(f"\nBackup from {transfer_point} :")
            for i in range(len(backup_path)):
                current_stop = backup_path[i][1]
                route_id = backup_path[i][5]
                departure_time = backup_path[i][2]
                arrival_time = backup_path[i][4]
                next_stop = backup_path[i][3]

                # Group backup routes by route ID
                if route_id == last_backup_route:
                    grouped_backup_routes[-1]["stops"].append((next_stop, arrival_time))
                else:
                    grouped_backup_routes.append({
                        "route_id": route_id,
                        "start_stop": current_stop,
                        "departure_time": departure_time,
                        "stops": [(next_stop, arrival_time)]
                    })
                last_backup_route = route_id

            # Print backup route segments
            for segment in grouped_backup_routes:
                start = segment["start_stop"]
                dep_time = segment["departure_time"]
                route = segment["route_id"]
                stops = " → ".join([f"{stop} (Ankunft: {arr})" for stop, arr in segment["stops"]])
                print(f"  🚆 {start} (Abfahrt: {dep_time}) → {stops} mit Linie {route}")

            print(f"🔹 Gesamt-Zuverlässigkeit der Backup-Route: {backup_reliability:.2f}\n")
            


In [26]:
def find_primary_path(origin_node: int,destination_node: int, start_time : str, time_budget: timedelta, filtered_legs):
    
    '''Initial setup'''
    MRIB_reliability = 0
    LISTofTRIPS = []
    LISTofCompletedTRIPS = []
    visited_stops = {}
    new_time_budget = time_budget
    n = 0

    visited_stops[origin_node] = (datetime.strptime(start_time, "%H:%M:%S"), "")


    ''' Search initial adjacent legs'''
    adjecent_legs = search_adjecent_legs(origin_node,start_time,filtered_legs)
    filtered_adj_legs = origin_node_filtering(origin_node,start_time,time_budget,adjecent_legs)
    

    if origin_node == "Wien Hauptbahnhof" :
        
        filtered_adj_legs = [
            leg for leg in filtered_adj_legs 
            if can_visit_leg(leg, visited_stops, "",destination_node)
        ]
                
        filtered_adj_legs = [
            leg for leg in adjecent_legs 
            if can_visit_leg(leg, visited_stops,"",destination_node)
        ]
        
        
    for leg in filtered_adj_legs:
        itinerary  = [leg]
        duration = travel_time(itinerary,start_time)
        reliability = primary_itinerary_reliability(itinerary,start_time,time_budget)
        if reliability > 0 and timedelta(seconds=0) < duration <= time_budget:
            LISTofTRIPS.append([itinerary,duration])
        #update_visited_stops(visited_stops, leg)
    #print("visited stops", visited_stops)
    #print(len(LISTofTRIPS))
    
    '''Main Loop'''
    while LISTofTRIPS:
        #n +=1 #just count
        #print("New iteration", n)

        visited_stops_n = visited_stops.copy()
        passed_stops_n = []

        # Find the shortest itinerary(no, min function is not working)
        min_index = find_min_index(LISTofTRIPS)
        shortest_path = LISTofTRIPS.pop(min_index)
        

        tail = shortest_path[0][-1] # last leg of trip

        # Check if destination is reached
        if tail[3] == destination_node:
            if len(LISTofCompletedTRIPS) == 0:
                new_time_budget = min(1.2*shortest_path[1],time_budget)
                print("newTB",new_time_budget)
            # list of completed trips
            #reliability = primary_itinerary_reliability(shortest_path[0],start_time,time_budget)
            #if reliability >= MRIB_reliability:
            print("dest_reached")
            LISTofCompletedTRIPS.append(shortest_path)
            #continue
            
            if len(LISTofCompletedTRIPS) <= 15:
                continue
            else:
                break
            

        for leg in shortest_path[0]:
            update_visited_stops(visited_stops_n, leg)
            destination_stop = leg[3]
            passed_stops_n.append(destination_stop)
        
        # Explore connections from the current tail
        #print("Search for the next connection")
        #LIST_NEXT = []
        #set of all legs adjacent to tail leg m
        next_legs = search_adjecent_legs(tail[3],tail[4],filtered_legs)
        #that was some idea, but bad idea
        #next_legs_filtered = filter_adjecent_legs(next_legs,tail.iloc[0])
        #another idea of keeping track of visited nodes
        
        
        # no passed stops!
        next_legs = [leg for leg in next_legs if leg[3] not in passed_stops_n]

        next_legs = [
            leg for leg in next_legs 
            if leg[5] == tail[5] or 
            (datetime.strptime(leg[2], "%H:%M:%S") >= (datetime.strptime(tail[4], "%H:%M:%S") + timedelta(minutes=2)) 
            and datetime.strptime(leg[2], "%H:%M:%S") <=(datetime.strptime(tail[4], "%H:%M:%S") + timedelta(minutes=20)))
        ]
        
        #print("before filter",len(next_legs))
        next_legs = [
            leg for leg in next_legs 
            if can_visit_leg(leg, visited_stops_n, tail[5],destination_node)
        ]
        #print("1st filter",len(next_legs))
        next_legs = [
            leg for leg in next_legs 
            if can_visit_leg(leg, visited_stops_n,tail[5],destination_node)
            ]
        
        
        
        for leg in next_legs:
            itinerary = shortest_path[0] + [leg] #combine previous legs and adjecent
            duration = travel_time(itinerary,start_time)
            reliability = primary_itinerary_reliability(itinerary,start_time,new_time_budget)
            if reliability > 0 and timedelta(seconds=0) < duration <= new_time_budget:
                LISTofTRIPS.append([itinerary,duration])
            #update_visited_stops(visited_stops_n, leg)
    print("Primary_path_found")
    return LISTofCompletedTRIPS, visited_stops
       

In [27]:
def backup_search(shortest_next_itinerary,destination_node,start_time,time_budget,filtered_legs,visited_stops):

    #Initial Setup
    MRB_reliability = 0
    MRB = None
    LIST_Backups = []
    new_time_budget = time_budget
    
    visited_stops_b = visited_stops.copy()
    transfer_leg = shortest_next_itinerary[-2]
    primary_itinerary = shortest_next_itinerary[:-1]
    missed_leg_departure = datetime.strptime(shortest_next_itinerary[-1][2], "%H:%M:%S")

    passed_stops_b =[]
    for leg in primary_itinerary:
        update_visited_stops(visited_stops_b,leg)
        destination_stop = leg[3]
        passed_stops_b.append(destination_stop)

    transfer_point = transfer_leg[3]


    adjecent_legs = search_adjecent_legs(transfer_point,primary_itinerary[-1][2],filtered_legs)

    adjecent_legs = [leg for leg in adjecent_legs if leg[3] not in passed_stops_b]

    adjecent_legs = [
            leg for leg in adjecent_legs 
            if datetime.strptime(leg[2], "%H:%M:%S") >= (missed_leg_departure + timedelta(minutes=2)) 
        ]
    
    adjecent_legs = [
        leg for leg in adjecent_legs 
        if can_visit_leg(leg, visited_stops_b, "",destination_node)
    ]
            
    adjecent_legs = [
        leg for leg in adjecent_legs 
        if can_visit_leg(leg, visited_stops_b,"",destination_node)
    ]


    for leg in adjecent_legs:
        backup_legs = [leg]
        b_duration = travel_time(backup_legs,start_time)
        backup_full = (backup_legs,b_duration)
        # maybe compare to MRIB rel
        if timedelta(seconds=0) < b_duration <= time_budget:
            LIST_Backups.append(backup_full)
            
        #update_visited_stops(visited_stops_b, b_tail)
    

    #Backup loop
    while len(LIST_Backups) > 0:  
        min_index_b = 0
        min_value_b =  LIST_Backups[0][1]# durarion 
        for i in range(1, len(LIST_Backups)):
            current_value_b = LIST_Backups[i][1]
            if current_value_b < min_value_b:
                min_value_b = current_value_b
                min_index_b = i
    	
        shortest_backup = LIST_Backups.pop(min_index_b)
        #print("Shortest backup",shortest_backup)
        
        
        for leg in shortest_backup[0]:
            destination_stop = leg[3]
            passed_stops_b.append(destination_stop)

        b_tail = shortest_backup[0][-1]

        if b_tail[3] == destination_node:
            if MRB == None:
                new_time_budget = min(1.2*shortest_backup[1],time_budget)
            rel = backup_itinerary_reliability(shortest_next_itinerary,shortest_backup,start_time,time_budget)
            if round(rel, 4) > MRB_reliability:
                MRB_reliability = round(rel, 4)
                MRB = shortest_backup
                #print("dest reached, MRB upd",MRB_reliability)
        else:
            
            next_legs_b = search_adjecent_legs(b_tail[3],b_tail[4],filtered_legs)
            
            next_legs_b = [leg for leg in next_legs_b if leg[3] not in passed_stops_b]
            
            next_legs_b = [
            leg for leg in next_legs_b 
            if leg[5] == b_tail[5] or datetime.strptime(leg[2], "%H:%M:%S") >= (datetime.strptime(b_tail[4], "%H:%M:%S") + timedelta(minutes=2))
        ]
            next_legs_b = [
                leg for leg in next_legs_b 
                if can_visit_leg(leg, visited_stops_b, b_tail[5],destination_node)
            ]
            
            next_legs_b = [
                leg for leg in next_legs_b 
                if can_visit_leg(leg, visited_stops_b, b_tail[5],destination_node)
            ]


            for leg in next_legs_b:
                backup_legs = shortest_backup[0][:]
                backup_legs.append(leg)
                b_duration = travel_time(backup_legs,start_time)
                backup_full = (backup_legs,b_duration)
                b_reliability = backup_itinerary_reliability(shortest_next_itinerary,backup_full,start_time,new_time_budget)
                if timedelta(seconds=0) < b_duration <= new_time_budget and round(b_reliability,4) >MRB_reliability:
                    LIST_Backups.append(backup_full)
                   
                #else:
                    #print("Backup Reliability 0 -2")
    
    # end of while len(LIST_Backups) > 0 loop       
    #print("MRB", MRB)
    return MRB,MRB_reliability
                 
            
# end of while len(LIST_Backups) > 0 loop     

In [28]:
def find_path(origin_node,destination_node,start_datetime,time_budget):
    MRIB_reliability = 0
    MRIB = None
    MRIB_Backups = None
    start_date, start_time = start_datetime.split()
    transfer_dict = {}
    filtered_legs = filter_network(start_time,start_date,time_budget)
    completed_primary_trips, visited_stops = find_primary_path(origin_node,destination_node,start_time,time_budget,filtered_legs)
    print(completed_primary_trips)
    for trips in completed_primary_trips:
        primary_itinerary = trips[0]  # Extract the primary itinerary
        growing_itinerary = []  # Start with an empty list for the growing itinerary
        Backups = []
        
        print("Prim_it", primary_itinerary)
        for leg in primary_itinerary:
            growing_itinerary.append(leg)  # Add the current leg to the growing itinerary
            
            # Only start checking for transfers once there are at least two legs
            if len(growing_itinerary) > 1:

                if is_transfer(growing_itinerary):
                    transfer = (
                        growing_itinerary[-2][3],
                        growing_itinerary[-2][4],
                        growing_itinerary[-1][1],
                        growing_itinerary[-1][2])
                    
                    MRB_reliability = 0
                    MRB = None
                    if transfer not in transfer_dict.keys():
                        MRB, MRB_reliability = backup_search(growing_itinerary,destination_node,start_time,time_budget,filtered_legs,visited_stops)
                        transfer_dict[transfer] = MRB,MRB_reliability
                    else:
                        MRB,MRB_reliability = transfer_dict.get(transfer)
                    print("MRB",MRB)
                    if MRB: 
                        Backups.append((MRB,MRB_reliability))

        reliability = itinerary_reliability(primary_itinerary,Backups,start_time,time_budget)
        if round(reliability, 4) > MRIB_reliability:
            MRIB_reliability = round(reliability, 4)
            MRIB = primary_itinerary
            MRIB_Backups = Backups
    print(transfer_dict)
    return MRIB_reliability, MRIB, MRIB_Backups

In [29]:
origin_node = "Bruck/Mur Bahnhof"  #"Bischofshofen Bahnhof" #"Wien Blumental" #"Laa/Thaya Bahnhof" #"Schattendorf Kirchengasse" #"Gmunden Bahnhof" # for now the departure node is 1
destination_node = "Kitzbühel Bahnhof" #"Laa/Thaya Bahnhof" #"Enns Bahnhof" #"Wien Rennweg" #"Wien Leopoldau" #Prinzersdorf Bahnhof"
start_time = "2024-12-13 10:00:00"
time_budget = timedelta(hours=10, minutes = 20)

MRIB_reliability, MRIB, Backups = find_path(origin_node,destination_node,start_time,time_budget)

1900-01-01 20:20:00
Network 90855


KeyboardInterrupt: 

In [72]:
print(MRIB_reliability, MRIB)
for i in Backups:
    print(i)


0.6233 [('137.TA.20-SV5-L-j24-1.13.H', 'Schattendorf Kirchengasse', '14:43:00', 'Loipersbach-Schattendorf Bhf.', '14:48:00', '20-SV5-L-j24-1', 'TA+byk20#1'), ('42.TA.3-R93-A-j24-1.7.R', 'Loipersbach-Schattendorf Bhf.', '14:54:00', 'Marz-Rohrbach Bahnhof', '14:58:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Marz-Rohrbach Bahnhof', '14:59:00', 'Mattersburg Bahnhof', '15:01:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Mattersburg Bahnhof', '15:02:00', 'Mattersburg Nord Bahnhof', '15:03:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Mattersburg Nord Bahnhof', '15:04:00', 'Wiesen-Sigleß Bahnhof', '15:07:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Wiesen-Sigleß Bahnhof', '15:07:00', 'Bad Sauerbrunn Bahnhof', '15:11:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Bad Sauerbrunn Bahnhof', '15:13:00', 'Neudörfl Bahnhof', '15:16:00', '3-R93-A-j24-1', 'TA+cs730'), ('42.TA.3-R93-A-j24-1.7.R', 'Neudörfl Bahnh

In [73]:
a =pr

NameError: name 'pr' is not defined

In [76]:
print(transform_route_info(MRIB,MRIB_reliability,Backups))

  🚆 Flughafen Wien Bahnhof (Abfahrt: 14:33:00) → Wien Hauptbahnhof (Ankunft: 14:48:00) mit Linie 10-A1-j24-1
  🚆 Wien Hauptbahnhof (Abfahrt: 14:50:00) → Wien Matzleinsdorfer Platz (Ankunft: 14:52:00) → Wien Meidling (Ankunft: 14:55:00) mit Linie 1-S3-W-j24-1
  🚆 Wien Meidling (Abfahrt: 15:02:00) → Baden Bahnhof (Ankunft: 15:14:00) → Wiener Neustadt Hauptbahnhof (Ankunft: 15:27:00) mit Linie 4-CX9-W-j24-1
  🚆 Wiener Neustadt Hauptbahnhof (Abfahrt: 15:31:00) → Mattersburg Bahnhof (Ankunft: 15:44:00) → Marz-Rohrbach Bahnhof (Ankunft: 15:47:00) → Loipersbach-Schattendorf Bhf. (Ankunft: 15:53:00) mit Linie 2-R93-A-j24-1
  🚆 Loipersbach-Schattendorf Bhf. (Abfahrt: 16:00:00) → Schattendorf Kirchengasse (Ankunft: 16:06:00) mit Linie 20-SV5-L-j24-1

🎯 Endstation: Schattendorf Kirchengasse (Ankunft: 16:06:00)
🔹 Gesamt-Zuverlässigkeit der Route: 0.49

🔄 Backups:

Backup from Wien Hauptbahnhof :
  🚆 Wien Hauptbahnhof (Abfahrt: 14:53:00) → Wien Meidling (Ankunft: 14:58:00) → Baden Bahnhof (Ankunft:

🚆 Laa/Thaya Bahnhof (Abfahrt: 09:09:00) → Kottingneusiedl Bahnhof (Ankunft: 09:13:00) → Staatz Bahnhof (Ankunft: 09:15:00) → Enzersdorf bei Staatz Bahnhof (Ankunft: 09:19:00) → Frättingsdorf Bahnhof (Ankunft: 09:23:00) → Hörersdorf Bahnhof (Ankunft: 09:27:00) → Siebenhirten (NÖ) Haltestelle (Ankunft: 09:29:00) → Mistelbach Stadt (Ankunft: 09:34:00) → Mistelbach Bahnhof (Ankunft: 09:36:00) → Wolkersdorf Bahnhof (Ankunft: 09:58:00) → Obersdorf Bahnhof (Ankunft: 10:03:00) → Wien Leopoldau (Ankunft: 10:13:00) mit Linie 58.TA.2-RX2-W-j24-1.31.H 
🚆 Wien Leopoldau (Abfahrt: 10:20:00) → Wien Siemensstraße (Ankunft: 10:22:00) → Wien Floridsdorf (Ankunft: 10:26:00) mit Linie 368.TA.1-S1-W-j24-1.121.H 
🚆 Wien Floridsdorf (Abfahrt: 10:30:00) → Wien Handelskai (Ankunft: 10:32:00) → Wien Traisengasse (Ankunft: 10:35:00) → Wien Praterstern (Ankunft: 10:37:00) → Wien Mitte-Landstraße (Ankunft: 10:41:00) → Wien Rennweg (Ankunft: 10:45:00) → Wien Quartier Belvedere (Ankunft: 10:47:00) → Wien Hauptbahnhof (Ankunft: 10:49:00) mit Linie 365.TA.1-S3-W-j24-1.105.H 
🎯 Endstation: Wien Hauptbahnhof (Ankunft: 10:49:00) 🔹 Gesamt-Zuverlässigkeit der Route: 0.94 
🔄 Backups: Backup from Wien Leopoldau : 
🚆 Wien Leopoldau (Abfahrt: 10:29:00) → Wien Siemensstraße (Ankunft: 10:31:00) → Wien Floridsdorf (Ankunft: 10:35:00) mit Linie 187.TA.1-S7-W-j24-1.39.H 
🚆 Wien Floridsdorf (Abfahrt: 10:39:00) → Wien Handelskai (Ankunft: 10:41:00) → Wien Traisengasse (Ankunft: 10:44:00) → Wien Praterstern (Ankunft: 10:46:00) → Wien Mitte-Landstraße (Ankunft: 10:50:00) → Wien Rennweg (Ankunft: 10:54:00) → Wien Quartier Belvedere (Ankunft: 10:56:00) → Wien Hauptbahnhof (Ankunft: 10:58:00) mit Linie 393.TA.2-RX1-W-j24-1.157.H 
🔹 Gesamt-Zuverlässigkeit der Backup-Route: 0.08 

Backup from Wien Floridsdorf : 
🚆 Wien Floridsdorf (Abfahrt: 10:33:00) → Wien Handelskai (Ankunft: 10:35:00) → Wien Traisengasse (Ankunft: 10:38:00) → Wien Praterstern (Ankunft: 10:40:00) → Wien Mitte-Landstraße (Ankunft: 10:44:00) → Wien Rennweg (Ankunft: 10:48:00) → Wien Quartier Belvedere (Ankunft: 10:50:00) → Wien Hauptbahnhof (Ankunft: 10:52:00) mit Linie 215.TA.4-CX9-W-j24-1.102.H 
🔹 Gesamt-Zuverlässigkeit der Backup-Route: 0.35