In [10]:
import pandas as pd
from collections import defaultdict

def generate_dynamic_route_graph(data, start_stop, start_time, target_stop=None):
    graph = defaultdict(list)
    paths = set()
    path_times = {}
    
    def build_line_path(stop, time, trip_identifier):
        next_stops = data[
            (data['TRIP_IDENTIFIER'] == trip_identifier) &
            (data['PLANNED_ARRIVAL'] > time)
        ].sort_values('PLANNED_ARRIVAL')
        
        transfers = data[
            (data['STOP_ID'] == stop) &
            (data['PLANNED_ARRIVAL'] > time) &
            (data['TRIP_IDENTIFIER'] != trip_identifier)
        ].sort_values('PLANNED_ARRIVAL')
        
        if not next_stops.empty:
            next_stop_data = next_stops.iloc[0]
            next_stop = next_stop_data['STOP_ID']
            next_time = next_stop_data['PLANNED_ARRIVAL']
            
            transition = (1, next_stop, next_time, trip_identifier)
            if transition not in graph[stop]:
                graph[stop].append(transition)
                build_line_path(next_stop, next_time, trip_identifier)
        
        for _, transfer in transfers.iterrows():
            new_trip_identifier = transfer['TRIP_IDENTIFIER']
            new_time = transfer['PLANNED_ARRIVAL']
            
            transition = (0, stop, new_time, new_trip_identifier)
            if transition not in graph[stop]:
                graph[stop].append(transition)
                build_line_path(stop, new_time, new_trip_identifier)
    
    def find_path(stop, time, trip_identifier, path=None, times=None, made_transfer=False):
        if path is None:
            path = []
        if times is None:
            times = []
            
        current_path = path + [f"{trip_identifier}'{stop}'"]
        current_times = times + [time]
        
        if stop == target_stop:
            path_str = '-'.join(current_path)
            paths.add(path_str)
            path_times[path_str] = current_times
            return
            
        for is_same_line, next_stop, next_time, next_trip_identifier in graph[stop]:
            if next_time <= time:
                continue
                
            if is_same_line == 0:
                if not made_transfer:
                    find_path(next_stop, next_time, next_trip_identifier, current_path, current_times, True)
            elif is_same_line == 1 and trip_identifier == next_trip_identifier:
                find_path(next_stop, next_time, next_trip_identifier, current_path, current_times, made_transfer)
    
    initial_routes = data[
        (data['STOP_ID'] == start_stop) & 
        (data['PLANNED_ARRIVAL'] >= start_time)
    ].sort_values('PLANNED_ARRIVAL')
    
    if not initial_routes.empty:
        initial_route = initial_routes.iloc[0]
        initial_trip_identifier = initial_route['TRIP_IDENTIFIER']
        initial_time = initial_route['PLANNED_ARRIVAL']
        
        build_line_path(start_stop, initial_time, initial_trip_identifier)
        find_path(start_stop, initial_time, initial_trip_identifier)
    
    formatted_graph = "Graph:\n"
    for stop in sorted(graph.keys()):
        formatted_graph += f"'{stop}': {graph[stop]}\n"
    
    # Format paths with times
    formatted_paths = []
    for path in sorted(list(paths)):
        times = path_times[path]
        stops = path.split('-')
        path_with_times = ' -> '.join([f"{stop}({time})" for stop, time in zip(stops, times)])
        formatted_paths.append(path_with_times)
    
    return formatted_graph, formatted_paths

# Test verisi
data = pd.DataFrame({
    'STOP_ID': ['A', 'B', 'C', 'D', 'E', 'B', 'C', 'D', 'E'],
    'TRIP_IDENTIFIER': ['T1', 'T1', 'T1', 'T1', 'T1', 'T2', 'T2', 'T2', 'T2'],
    'PLANNED_ARRIVAL': [800, 820, 840, 860, 880, 900, 920, 940, 960],
    'LINE_ID': ['L1', 'L1', 'L1', 'L1', 'L1', 'L2', 'L2', 'L2', 'L2']
})

start_stop = 'A'
start_time = 800
target_stop = 'E'
graph, paths = generate_dynamic_route_graph(data, start_stop, start_time, target_stop)
print(graph)
print("\nPossible Paths:")
for path in paths:
    print(path)

Graph:
'A': [(1, 'B', 820, 'T1')]
'B': [(1, 'C', 840, 'T1'), (0, 'B', 900, 'T2'), (1, 'C', 920, 'T2')]
'C': [(1, 'D', 860, 'T1'), (0, 'C', 920, 'T2'), (1, 'D', 940, 'T2')]
'D': [(1, 'E', 880, 'T1'), (0, 'D', 940, 'T2'), (1, 'E', 960, 'T2')]
'E': [(0, 'E', 960, 'T2')]


Possible Paths:
T1'A'(800) -> T1'B'(820) -> T1'C'(840) -> T1'D'(860) -> T1'E'(880)
T1'A'(800) -> T1'B'(820) -> T1'C'(840) -> T1'D'(860) -> T2'D'(940) -> T2'E'(960)
T1'A'(800) -> T1'B'(820) -> T1'C'(840) -> T2'C'(920) -> T2'D'(940) -> T2'E'(960)
T1'A'(800) -> T1'B'(820) -> T2'B'(900) -> T2'C'(920) -> T2'D'(940) -> T2'E'(960)


In [4]:
import duckdb
import pandas as pd

# Connect to the database
connection = duckdb.connect("transport_data.db", read_only=False)

In [5]:
#Filter 1 month data
query = f""" 
SELECT *
FROM services 
WHERE PRODUCT_ID='Zug' 
AND ARRIVAL_PREDICTION_STATUS='REAL' 
AND DEPARTURE_PREDICTION_STATUS='REAL' 
AND strftime('%m',OPERATING_DAY)='10'
"""
df_Zug = connection.execute(query).df()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [6]:
#select one day 
df_filtered=df_Zug[df_Zug['OPERATING_DAY']=='2024-10-02']

In [7]:
#create time column
df_filtered['time']=pd.to_datetime(df_filtered['DEPARTURE_TIME']).dt.time

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['time']=pd.to_datetime(df_filtered['DEPARTURE_TIME']).dt.time


In [8]:
#Filter related time interval
mask = (df_filtered['time'] >= pd.to_datetime('09:00').time()) & (df_filtered['time'] <= pd.to_datetime('11:00').time())
df_filtered = df_filtered[mask]

In [12]:
data =  df_filtered[['BPUIC', 'TRIP_IDENTIFIER', 'time', 'LINE_ID']].rename(columns={
   'BPUIC': 'STOP_ID',
   'time': 'PLANNED_ARRIVAL'
}).sort_values(['TRIP_IDENTIFIER','PLANNED_ARRIVAL'])
# Test
start_stop = 8503000
start_time = pd.to_datetime('09:00').time()
target_stop =8500218
graph, paths = generate_dynamic_route_graph(data, start_stop, start_time, target_stop)
print(graph)
print("\nPossible Paths:")
print('\n'.join(paths))

Graph:
'8500010': [(1, 8500136, datetime.time(10, 40), 'ch:1:sjyid:100001:17334-001')]
'8500020': [(0, 8500020, datetime.time(10, 54), 'ch:1:sjyid:100001:17239-002'), (1, 8500021, datetime.time(10, 58), 'ch:1:sjyid:100001:17239-002'), (1, 8500010, datetime.time(10, 37), 'ch:1:sjyid:100001:17334-001'), (0, 8500020, datetime.time(10, 24), 'ch:1:sjyid:100001:17137-001'), (1, 8500021, datetime.time(10, 28), 'ch:1:sjyid:100001:17137-001'), (0, 8500020, datetime.time(10, 37), 'ch:1:sjyid:100001:17335-001'), (1, 8500021, datetime.time(10, 41), 'ch:1:sjyid:100001:17335-001'), (0, 8500020, datetime.time(10, 49), 'ch:1:sjyid:100001:17336-001'), (0, 8500020, datetime.time(10, 34), 'ch:1:sjyid:100001:17136-001')]
'8500021': [(1, 8500020, datetime.time(10, 49), 'ch:1:sjyid:100001:17336-001'), (1, 8517131, datetime.time(11, 0), 'ch:1:sjyid:100001:17239-002'), (0, 8500021, datetime.time(10, 58), 'ch:1:sjyid:100001:17239-002'), (1, 8500020, datetime.time(10, 21), 'ch:1:sjyid:100001:17334-001'), (1, 85