Enter:
- working hours
- preference for overnight stays
- fixed appointments

In [387]:
from src.routing import create_nodes_dataframe, custom_clustering, plot_refined_clusters, assign_weekdays_to_clusters, plot_ind_route, plot_all_cluster_routes, create_data_model
import pandas as pd

from collections import Counter

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

num_large_clusters = 1
num_small_clusters = 3

In [388]:
nodes_df, time_matrix = create_nodes_dataframe(num_nodes=100, min_work_days=5, home_node_id=0, visiting_interval_min=10, visiting_interval_max=30, max_last_visit=20, frac_fixed_app=.2)

In [389]:
clusters = custom_clustering(time_matrix.values, nodes_df, num_small_clusters=num_small_clusters, num_large_clusters=num_large_clusters, overnight_factor=1.3, precision=30)

In [390]:
node_to_cluster = {node: cluster for cluster, nodes in clusters.items() for node in nodes}
nodes_df['cluster'] = nodes_df['node_id'].map(node_to_cluster)

In [391]:
nodes_df = assign_weekdays_to_clusters(nodes_df)

The below will Fail if there are lunch breaks

In [392]:
nodes_df['visit_day'] = nodes_df['visit_day'].apply(frozenset)

def adjust_opening_hours(row):
    visit_days = row['visit_day']
    opening_hours = row['opening_hours']
    adjusted_hours = {}
    for day in visit_days:
        if day in opening_hours:
            open_time, close_time = opening_hours[day]
            adjusted_open = open_time.hour * 60 + open_time.minute
            adjusted_close = close_time.hour * 60 + close_time.minute
            if len(visit_days) == 2 and max(visit_days) == day: # fail here if lunch break
                adjusted_open += 1440
                adjusted_close += 1440
            adjusted_hours[int(day)] = (adjusted_open, adjusted_close)
    return adjusted_hours

nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_opening_hours, axis=1)

use margin as hyperparameter

In [393]:
margin = 5

def time_to_minutes(t):
    return t.hour * 60 + t.minute + t.second / 60

def adjust_hours(row):
    opening_hours = row['adjusted_opening_hours']
    appointment = row['fixed_appointment']
    if appointment:
        day, start_time, end_time = appointment
        start_minutes = int(time_to_minutes(start_time) - margin)
        end_minutes = int(time_to_minutes(end_time) + margin)

        # Get the highest day key in the adjusted_opening_hours
        max_day_key = max(opening_hours.keys()) if opening_hours else None
        
        # Check if the appointment is on the last (highest) day
        if (day == max_day_key) & (day != 0):
            start_minutes += 1440  # Add 24 hours in minutes
            end_minutes += 1440

        # Adjust opening hours for the appointment day
        if day in opening_hours:
            opening_hours = {day: (start_minutes, end_minutes)}
        else:
            # Add new day if it does not exist
            opening_hours[day] = (start_minutes, end_minutes)
    # make opening_hours a list of tuples
    opening_hours = [[v[0], v[1]] for _, v in opening_hours.items()]
    return opening_hours

# Applying the function
nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_hours, axis=1)
nodes_df['forbidden_starts'] = nodes_df['adjusted_opening_hours'].apply(lambda x: [0] + [i[1] for i in x])
nodes_df['forbidden_ends'] = nodes_df['adjusted_opening_hours'].apply(lambda x: [i[0] for i in x] + [4880])
# potentially ensure that all forbidden ends/starts are of equal length
# [len(i) for i in nodes_df['forbidden_ends']].count(2), [len(i) for i in nodes_df['forbidden_ends']].count(3)

In [394]:
nodes_df['cluster_size'] = nodes_df['cluster'].str.split('_').str[0]

In [395]:
def define_clusters(dataframe):
    dataframe['visit_day'] = dataframe['visit_day'].apply(lambda x: tuple(x) if isinstance(x, list) else (x,))
    dataframe['new_clusters'] = dataframe['visit_day'].astype(str).factorize()[0]
    return dataframe

# Apply the function to the DataFrame
clustered_df = define_clusters(nodes_df)

# Convert the DataFrame to the required dictionary format for plotting
refined_clusters = clustered_df.groupby('new_clusters')['node_id'].apply(list).to_dict()

In [396]:
depot_node_data = nodes_df[nodes_df['node_id'] == 0].iloc[0]  # Assuming there is always a row for node 0 in the original DataFrame

result_dfs = {}
for index, group in nodes_df.groupby('visit_day'):
    # Check if depot node is in the current group
    if 0 not in group['node_id'].values:
        # Append depot node data to the group
        group = pd.concat([pd.DataFrame([depot_node_data]), group], ignore_index=True)
    # Now group is guaranteed to include the depot node
    result_dfs[index] = group[['node_id', 'priority', 'adjusted_opening_hours', 'cluster_size', 'visit_day', 'on_site_time']]

- allow not considering node with a certain penalty (larger for high prio [incl. distance to home] nodes and impossible if node has prio 1. nodes with fixed appointments have prio 1)
- add non-consecutive time-window restrictions per node
- ensure that routes end where they started

In [397]:
solutions = {}
working_hours = [8 * 60, 18 * 60]
span_cost_coefficient = 20000 # adjust
slack = 20000 # adjust
route_lists = {}

def create_data_model(sub_nodes_df, sub_time_matrix):
    """Stores the data for the problem."""
    data = {}
    data['time_matrix'] = sub_time_matrix
    data['windows'] = sub_nodes_df['adjusted_opening_hours'].tolist()
    data['priorities'] = sub_nodes_df['priority'].tolist()
    data['num_vehicles'] = 1
    data['on_site_time'] = sub_nodes_df['on_site_time'].tolist()
    data['depot'] = 0
    return data

for key, sub_nodes_df in result_dfs.items():
    max_travel_time = 10000 if len(sub_nodes_df['visit_day'].iloc[0]) == 1 else 20000 # adjust
    nodes = sub_nodes_df['node_id'].tolist()
    sub_time_matrix = time_matrix.loc[nodes, nodes].values.tolist()
    sub_time_matrix = [[int(x) for x in row] for row in sub_time_matrix]
    data = create_data_model(sub_nodes_df, sub_time_matrix)
    manager = pywrapcp.RoutingIndexManager(len(data["time_matrix"]), data["num_vehicles"], data["depot"])
    routing = pywrapcp.RoutingModel(manager)
    def time_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data["time_matrix"][from_node][to_node] + data['on_site_time'][from_node]
    transit_callback_index = routing.RegisterTransitCallback(time_callback)
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    dimension_name = "total_time"
    routing.AddDimension(
        transit_callback_index,
        slack,  # upper bound for slack / waiting time
        max_travel_time,  # upper bound for vehicle maximum travel time
        True,  # start cumul to zero
        dimension_name
    )
    time_dimension = routing.GetDimensionOrDie(dimension_name)
    # time_dimension.SetGlobalSpanCostCoefficient(span_cost_coefficient)

    # for location_index, priority in enumerate(data['priorities']):
    #     index = manager.NodeToIndex(location_index)
    #     if index == 0:
    #         continue
    #     else:
    #         routing.AddDisjunction([index], int(round((priority*100)**2/100, 0)))
    for location_index in range(1, len(data['priorities'])):
        routing.AddDisjunction([manager.NodeToIndex(location_index)], 100)

    # for location_index, windows in enumerate(data['windows']):
    #     index = manager.NodeToIndex(location_index)
    #     time_dimension.CumulVar(index).SetRange(0, 20000) # adjust
    #     days = len(windows)
    #     start_time = 0
    #     end_time = days * 1440
        
    #     # remove start to windows[0][0] and windows[-1][1] to end
    #     latest_start = max(start_time, windows[0][0])
    #     time_dimension.CumulVar(index).RemoveInterval(start_time, windows[0][0])
    #     time_dimension.CumulVar(index).RemoveInterval(start_time, working_hours[0])
        
    #     earliest_end = min(end_time, windows[-1][1])
    #     time_dimension.CumulVar(index).RemoveInterval(windows[-1][1], end_time)
    #     time_dimension.CumulVar(index).RemoveInterval(windows[-1][1], working_hours[1])

    #     # remove time between days
    #     if days > 1:
    #         for day in range(1, days):
    #             time_dimension.CumulVar(index).RemoveInterval(windows[day-1][1], windows[day][0])
    #             time_dimension.CumulVar(index).RemoveInterval(working_hours[1] + 1440 * (day-1), working_hours[0] + 1440 * day)
    for location_index, windows in enumerate(data['windows']):
        index = manager.NodeToIndex(location_index)
        time_dimension.CumulVar(index).SetRange(0, max_travel_time)

    # Instantiate route start and end times to produce feasible times
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.Start(0)))
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.End(0)))

    # Setting first solution heuristic
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # search_parameters.local_search_metaheuristic = (
    #     routing_enums_pb2.LocalSearchMetaheuristic.SIMULATED_ANNEALING)
    search_parameters.time_limit.seconds = 60
    search_parameters.log_search = False

    # Solve the problem
    solution = routing.SolveWithParameters(search_parameters)

    def return_route_and_times(solution, manager, routing, original_node_ids):
        """Returns the route along with the start times at each node."""
        index = routing.Start(0)  # Start at the depot.
        route_with_times = []
        time_dimension = routing.GetDimensionOrDie('total_time')  # Make sure this matches the dimension name used
        while not routing.IsEnd(index):
            node_index = manager.IndexToNode(index)
            original_node_id = original_node_ids[node_index]  # Map back to original node ID
            time_var = time_dimension.CumulVar(index)
            start_time = solution.Min(time_var)
            route_with_times.append((original_node_id, start_time))  # Use original node ID here
            index = solution.Value(routing.NextVar(index))
        # Add the final node
        route_with_times.append((original_node_ids[manager.IndexToNode(index)], solution.Min(time_dimension.CumulVar(index))))
        return route_with_times

    if solution:
        route_and_times = return_route_and_times(solution, manager, routing, nodes)
        route_lists[key] = route_and_times

    if not solution:
        print("NoSolution")

route_lists[list(route_lists.keys())[1]]

[(0, 0),
 (65, 69),
 (75, 115),
 (79, 195),
 (11, 238),
 (7, 284),
 (95, 337),
 (64, 387),
 (81, 438),
 (40, 498),
 (1, 562),
 (20, 624),
 (13, 673),
 (18, 715),
 (16, 772),
 (96, 836),
 (42, 888),
 (4, 955),
 (63, 1021),
 (92, 1076),
 (0, 1148)]

In [398]:
dropped = []
for node in range(routing.Size()):
  if routing.IsStart(node) or routing.IsEnd(node):
    continue
  if solution.Value(routing.NextVar(node)) == node:
    dropped.append(manager.IndexToNode(node))
print(f"dropped: {dropped}")

dropped: []


- print more info (opening hours, fixed appointments, priority, decisions [removing a node, replacing a node to another day, priority, etc])
- use data to fine tune hyperparameters

evtl:
- Store state and if it was possible to find routes for all solutions iteratively add nodes based on node priority and distance to root node (those further away should be included more likely)
- Test Discrete Priority (must be visited this week; must be visited next week; ...)
- Compare routes with and without overnight stays / large clusters

In [399]:
plot_refined_clusters(refined_clusters, nodes_df, home_node_id=0)

In [400]:
plot_all_cluster_routes(route_lists, nodes_df)