In [None]:
import pandas as pd

# Read the CSV file
routes = pd.read_csv('cleaned_data/routes_cleaned.csv')
trips = pd.read_csv('trains_data/trips_all_trains.txt')
stop_times = pd.read_csv('trains_data/stop_times_all_trains.txt')
stops = pd.read_csv('trains_data/stops_all_trains.txt')
calendar_dates = pd.read_csv('trains_data/calendar_dates_all_trains.txt')

In [None]:
import networkx as nx

# Create a new directed graph
G = nx.DiGraph()

# Add edges to the graph
for index, row in routes.iterrows():
    route = [station.strip() for station in row['route_long_name'].split('|')]
    for i in range(len(route) - 1):
        G.add_edge(route[i], route[i+1],
                   route_long_name=row['route_long_name'],
                   # Add route_id as an edge attribute
                   route_id=row['route_id'])

# Find the shortest path
user_origin = 'Lille'
user_destination = 'Paris'
if user_origin in G and user_destination in G:
    try:
        shortest_path = nx.shortest_path(
            G, source=user_origin, target=user_destination)
    except nx.NetworkXNoPath:
        print(f'No path found from {user_origin} to {user_destination}.')
else:
    print(f'Either source {user_origin} or target {user_destination} is not in the graph.')


In [None]:
# Initialize variables to keep track of the current route and stations
current_route_id = None
first_station = None
last_station = None
optimizaed_shortest_path = []
route_ids = []
itinerary = []  # This will hold the itinerary

# Iterate over the edges in the shortest path
for i in range(len(shortest_path) - 1):
    edge_data = G.get_edge_data(shortest_path[i], shortest_path[i+1])

    # If the route_id of the current edge is different from the current route_id
    if edge_data['route_id'] != current_route_id:
        # If there is a current route
        if first_station is not None:
            # Print the first and last station in the current route
            print('Train route:', first_station, '->',
                  last_station, ', Route ID:', current_route_id)
            route_ids.append(current_route_id)
            # Add the last station of the current route to the unique stations list
            if last_station not in optimizaed_shortest_path:
                optimizaed_shortest_path.append(last_station)

            # Add the route to the itinerary
            itinerary.append({'start_station': first_station, 'end_station': last_station})

        # Update the current route_id and set the first and last station
        current_route_id = edge_data['route_id']
        first_station = shortest_path[i]
        last_station = shortest_path[i+1]
        # Add the first station of the new route to the unique stations list
        if first_station not in optimizaed_shortest_path:
            optimizaed_shortest_path.append(first_station)
    else:
        # If the route_id is the same, just update the last station
        last_station = shortest_path[i+1]

# Print the last route if it has not been printed yet
if first_station is not None:
    print('Train route:', first_station, '->',
          last_station, ', Route ID:', current_route_id)
    route_ids.append(current_route_id)
    # Add the last station of the last route to the unique stations list
    if last_station not in optimizaed_shortest_path:
        optimizaed_shortest_path.append(last_station)

    # Add the last route to the itinerary
    itinerary.append({'start_station': first_station, 'end_station': last_station})

# Print the unique stations joined by " => "
print('Shortest Path:', ' => '.join(optimizaed_shortest_path))

# Print the itinerary
print('Itinerary:', itinerary)
