In [1]:
import pandas as pd
import networkx as nx
from datetime import datetime, timedelta
import heapq


# Function to load a schedule dataset
def load_schedule(schedule_path):
    schedule_data = pd.read_csv(schedule_path)
    schedule_data['Arrival time'] = pd.to_datetime(schedule_data['Arrival time'].str.strip("'"), format='%H:%M:%S').dt.time
    schedule_data['Departure time'] = pd.to_datetime(schedule_data['Departure time'].str.strip("'"), format='%H:%M:%S').dt.time
    schedule_data['station Code'] = schedule_data['station Code'].str.strip()
    return schedule_data

# Function to create a directed graph from the schedule data
def create_graph(schedule_data):
    G = nx.DiGraph()
    def add_edge(train_no, from_station, to_station, from_islno, to_islno, dep_time, arr_time, distance):
        dep_time = datetime.strptime(dep_time, '%H:%M:%S')
        arr_time = datetime.strptime(arr_time, '%H:%M:%S')
        if arr_time < dep_time:
            arr_time += timedelta(days=1)
        travel_time = (arr_time - dep_time).total_seconds()
        G.add_edge(from_station, to_station, train_no=train_no, from_islno=from_islno, to_islno=to_islno,
                   dep_time=dep_time, arr_time=arr_time, travel_time=travel_time, distance=distance)

    for idx, row in schedule_data.iterrows():
        if idx > 0 and schedule_data.iloc[idx]['Train No.'] == schedule_data.iloc[idx-1]['Train No.']:
            prev_row = schedule_data.iloc[idx-1]
            add_edge(row['Train No.'].strip("'"), prev_row['station Code'], row['station Code'],
                     prev_row['islno'], row['islno'],
                     prev_row['Departure time'].strftime('%H:%M:%S'), row['Arrival time'].strftime('%H:%M:%S'),
                     row['Distance'])
    return G



# Helper functions for cost calculations
def calculate_stops(path):
    return len(path) - 1

def calculate_travel_time(path):
    total_time = 0
    for i in range(len(path) - 1):
        edge_data = G[path[i]][path[i+1]]
        total_time += edge_data['travel_time']
    return total_time

def calculate_price(path):
    total_price = 0
    current_train = None
    stop_count = 0
    for i in range(len(path) - 1):
        edge_data = G[path[i]][path[i+1]]
        if edge_data['train_no'] == current_train:
            stop_count += 1
        else:
            if current_train is not None:
                total_price += min(10, stop_count)
            current_train = edge_data['train_no']
            stop_count = 1
    total_price += min(10, stop_count)
    return total_price

def calculate_arrival_time(path, departure_time):
    current_time = departure_time
    days_count = 0  # Start counting days from 0
    for i in range(len(path) - 1):
        edge_data = G[path[i]][path[i+1]]
        dep_time = edge_data['dep_time'].time()  # Ensure this is a time object

        # Combine current date with departure time
        dep_time = datetime.combine(current_time.date(), dep_time)

        # Adjust departure time if it crosses midnight
        if current_time > dep_time:
            dep_time += timedelta(days=1)
            days_count += 1

        current_time = dep_time
        travel_seconds = edge_data['travel_time']
        current_time += timedelta(seconds=travel_seconds)

        arr_time = edge_data['arr_time'].time()  # Ensure this is a time object
        # Combine current date with arrival time
        arr_time = datetime.combine(current_time.date(), arr_time)

        # Adjust arrival time if it crosses midnight
        if current_time > arr_time:
            arr_time += timedelta(days=1)
            days_count += 1

        current_time = arr_time
    
    return current_time, days_count

# Load the problems dataset
problems_path = 'problems.csv'
problems_data = pd.read_csv(problems_path)

# Define paths to the schedule files
schedule_paths = {
    'schedule.csv': 'schedule.csv',
    'mini-schedule.csv': 'mini-schedule.csv'
}

# Adjusted Dijkstra's algorithm for better path finding
def dijkstra(source, target, cost_function, departure_time=None):
    queue = [(0, source, departure_time, [])]
    visited = set()
    
    while queue:
        cost, node, current_time, path = heapq.heappop(queue)
        if node in visited:
            continue
        path = path + [node]
        visited.add(node)
        if node == target:
            return path, cost
        for neighbor in G.neighbors(node):
            if neighbor not in visited:
                edge_data = G[node][neighbor]
                new_cost = cost
                new_time = current_time
                if cost_function == 'stops':
                    new_cost = calculate_stops(path + [neighbor])
                elif cost_function == 'traveltime':
                    new_cost += edge_data['travel_time']
                elif cost_function == 'price':
                    new_cost = calculate_price(path + [neighbor])
                elif cost_function == 'arrivaltime':
                    new_time, days_count = calculate_arrival_time(path + [neighbor], current_time)
                    if new_time - current_time < timedelta(minutes=15):
                        continue
                    new_cost = (new_time - datetime(1970, 1, 1)).total_seconds()  # Calculate timestamp relative to epoch
                heapq.heappush(queue, (new_cost, neighbor, new_time, path))
    return None, float('inf')

# Helper function to format the connection properly by merging consecutive segments of the same train
def format_connection(path):
    connection_parts = []
    current_train = None
    from_islno = None

    for i in range(len(path) - 1):
        edge_data = G[path[i]][path[i + 1]]
        if edge_data['train_no'] == current_train:
            to_islno = edge_data['to_islno']
        else:
            if current_train is not None:
                connection_parts.append(f"{current_train} : {from_islno} -> {to_islno}")
            current_train = edge_data['train_no']
            from_islno = edge_data['from_islno']
            to_islno = edge_data['to_islno']

    # Add the last segment
    if current_train is not None:
        connection_parts.append(f"{current_train} : {from_islno} -> {to_islno}")

    return ' ; '.join(connection_parts)

# Updated connection format in the solution output section
solutions = []
for idx, row in problems_data.iterrows():
    schedule_file = row['Schedule']
    schedule_data = load_schedule(schedule_paths[schedule_file])
    G = create_graph(schedule_data)
    
    from_station = row['FromStation']
    to_station = row['ToStation']
    cost_function = row['CostFunction']
    if cost_function.startswith('arrivaltime'):
        departure_time_str = cost_function.split()[1]
        departure_time = datetime.strptime(departure_time_str, '%H:%M:%S')
        path, cost = dijkstra(from_station, to_station, 'arrivaltime', departure_time)
        if path is not None:
            connection = format_connection(path)
            arrival_time, days_count = calculate_arrival_time(path, departure_time)
            cost = arrival_time.strftime(f'0{days_count}:%H:%M:%S')
            solutions.append({'ProblemNo': row['ProblemNo'], 'Connection': connection, 'Cost': cost})
        else:
            solutions.append({'ProblemNo': row['ProblemNo'], 'Connection': 'No path found', 'Cost': float('inf')})
    else:
        path, cost = dijkstra(from_station, to_station, cost_function)
        if path is not None:
            connection = format_connection(path)
            solutions.append({'ProblemNo': row['ProblemNo'], 'Connection': connection, 'Cost': cost})
        else:
            solutions.append({'ProblemNo': row['ProblemNo'], 'Connection': 'No path found', 'Cost': float('inf')})

# Create a DataFrame for the solutions and save to CSV
solutions_df = pd.DataFrame(solutions)
solutions_df.to_csv('solutions.csv', index=False)