Enter:
- working hours
- preference for overnight stays
- fixed appointments

In [328]:
from src.routing import create_nodes_dataframe, custom_clustering, plot_refined_clusters, assign_weekdays_to_clusters, plot_ind_route, plot_all_cluster_routes, create_data_model, plot_all_nodes_with_angles
import pandas as pd
import numpy as np
from collections import Counter
import concurrent.futures

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

num_large_clusters = 1
num_small_clusters = 0

In [329]:
nodes_df, time_matrix = create_nodes_dataframe(num_nodes=15, min_work_days=5, home_node_id=0, visiting_interval_min=10, visiting_interval_max=30, max_last_visit=20, frac_fixed_app=.05)

In [330]:
# plot_all_nodes_with_angles(nodes_df)

- in rare cases the below will make trouble because there are two large gaps and a cluster is entirely contained within the second largest leading to size = nan

In [331]:
clusters = custom_clustering(time_matrix.values, nodes_df, num_small_clusters=num_small_clusters, num_large_clusters=num_large_clusters, overnight_factor=1.3, precision=30, verbose=False)

In [332]:
node_to_cluster = {node: cluster for cluster, nodes in clusters.items() for node in nodes}
nodes_df['cluster'] = nodes_df['node_id'].map(node_to_cluster)

In [333]:
nodes_df = assign_weekdays_to_clusters(nodes_df)

- the below will Fail if there are lunch breaks

In [334]:
nodes_df['visit_day'] = nodes_df['visit_day'].apply(frozenset)

def adjust_opening_hours(row):
    visit_days = row['visit_day']
    opening_hours = row['opening_hours']
    adjusted_hours = {}
    for day in visit_days:
        if day in opening_hours:
            open_time, close_time = opening_hours[day]
            adjusted_open = open_time.hour * 60 + open_time.minute
            adjusted_close = close_time.hour * 60 + close_time.minute
            if len(visit_days) == 2 and max(visit_days) == day: # fail here if lunch break
                adjusted_open += 1440
                adjusted_close += 1440
            adjusted_hours[int(day)] = (adjusted_open, adjusted_close)
    return adjusted_hours

nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_opening_hours, axis=1)

- use margin as hyperparameter

In [335]:
margin = 5

def time_to_minutes(t):
    return t.hour * 60 + t.minute + t.second / 60

def adjust_hours(row):
    opening_hours = row['adjusted_opening_hours']
    appointment = row['fixed_appointment']
    if appointment:
        day, start_time, end_time = appointment
        start_minutes = int(time_to_minutes(start_time) - margin)
        end_minutes = int(time_to_minutes(end_time) + margin)

        # Get the highest day key in the adjusted_opening_hours
        max_day_key = max(opening_hours.keys()) if opening_hours else None
        
        # Check if the appointment is on the last (highest) day
        if (day == max_day_key) & (day != 0):
            start_minutes += 1440  # Add 24 hours in minutes
            end_minutes += 1440

        # Adjust opening hours for the appointment day
        if day in opening_hours:
            opening_hours = {day: (start_minutes, end_minutes)}
        else:
            # Add new day if it does not exist
            opening_hours[day] = (start_minutes, end_minutes)
    # make opening_hours a list of tuples
    opening_hours = [[v[0], v[1]] for _, v in opening_hours.items()]
    return opening_hours

# Applying the function
nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_hours, axis=1)
nodes_df['forbidden_starts'] = nodes_df['adjusted_opening_hours'].apply(lambda x: [0] + [i[1] for i in x])
nodes_df['forbidden_ends'] = nodes_df['adjusted_opening_hours'].apply(lambda x: [i[0] for i in x] + [4880])
# potentially ensure that all forbidden ends/starts are of equal length
# [len(i) for i in nodes_df['forbidden_ends']].count(2), [len(i) for i in nodes_df['forbidden_ends']].count(3)

In [336]:
nodes_df['cluster_size'] = nodes_df['cluster'].str.split('_').str[0]

In [337]:
def define_clusters(dataframe):
    dataframe['visit_day'] = dataframe['visit_day'].apply(lambda x: tuple(x) if isinstance(x, list) else (x,))
    dataframe['new_clusters'] = dataframe['visit_day'].astype(str).factorize()[0]
    return dataframe

# Apply the function to the DataFrame
clustered_df = define_clusters(nodes_df)

# Convert the DataFrame to the required dictionary format for plotting
refined_clusters = clustered_df.groupby('new_clusters')['node_id'].apply(list).to_dict()

In [338]:
depot_node_data = nodes_df[nodes_df['node_id'] == 0].iloc[0]  # Assuming there is always a row for node 0 in the original DataFrame

result_dfs = {}
for index, group in nodes_df.groupby('visit_day'):
    # Check if depot node is in the current group
    if 0 not in group['node_id'].values:
        # Append depot node data to the group
        group = pd.concat([pd.DataFrame([depot_node_data]), group], ignore_index=True)
    # Now group is guaranteed to include the depot node
    result_dfs[index] = group[['node_id', 'priority', 'adjusted_opening_hours', 'cluster_size', 'visit_day', 'on_site_time']]

- nodes with fixed appointments should have prio 1
- add margin to not arrive before closing - amount of time staying
- would require changes for a 3 day trip

In [340]:
solutions = {}
working_hours = [8 * 60, 18 * 60]
span_cost_coefficient = 20000 # adjust
slack = 20000 # adjust
penalty_factor = 300000
route_lists = {}

def create_data_model(sub_nodes_df, sub_time_matrix):
    """Stores the data for the problem."""
    data = {}
    data['time_matrix'] = sub_time_matrix
    data['windows'] = sub_nodes_df['adjusted_opening_hours'].tolist()
    data['priorities'] = sub_nodes_df['priority'].tolist()
    data['num_vehicles'] = 1
    data['on_site_time'] = sub_nodes_df['on_site_time'].tolist()
    data['depot'] = 0
    return data

def return_route_and_times(solution, manager, routing, original_node_ids):
        """Returns the route along with the start times at each node."""
        index = routing.Start(0)  # Start at the depot.
        route_with_times = []
        time_dimension = routing.GetDimensionOrDie('total_time')  # Make sure this matches the dimension name used
        while not routing.IsEnd(index):
            node_index = manager.IndexToNode(index)
            original_node_id = original_node_ids[node_index]  # Map back to original node ID
            time_var = time_dimension.CumulVar(index)
            start_time = solution.Min(time_var)
            route_with_times.append((original_node_id, start_time))  # Use original node ID here
            index = solution.Value(routing.NextVar(index))
        # Add the final node
        route_with_times.append((original_node_ids[manager.IndexToNode(index)], solution.Min(time_dimension.CumulVar(index))))
        return route_with_times

def solve_vrp(key, sub_nodes_df):
    max_travel_time = 10000 if len(sub_nodes_df['visit_day'].iloc[0]) == 1 else 20000 # adjust
    nodes = sub_nodes_df['node_id'].tolist()
    sub_time_matrix = time_matrix.loc[nodes, nodes].values.tolist()
    sub_time_matrix = [[int(x) for x in row] for row in sub_time_matrix]
    data = create_data_model(sub_nodes_df, sub_time_matrix)
    manager = pywrapcp.RoutingIndexManager(len(data["time_matrix"]), data["num_vehicles"], data["depot"])
    routing = pywrapcp.RoutingModel(manager)
    def time_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data["time_matrix"][from_node][to_node] + data['on_site_time'][from_node]
    transit_callback_index = routing.RegisterTransitCallback(time_callback)
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    dimension_name = "total_time"
    routing.AddDimension(
        transit_callback_index,
        slack,  # upper bound for slack / waiting time
        max_travel_time,  # upper bound for vehicle maximum travel time
        False,  # start cumul to zero
        dimension_name
    )
    time_dimension = routing.GetDimensionOrDie(dimension_name)
    time_dimension.SetGlobalSpanCostCoefficient(span_cost_coefficient)

    for location_index, priority in enumerate(data['priorities']):
        index = manager.NodeToIndex(location_index)
        if index == 0:
            continue
        else:
            routing.AddDisjunction([index], int(round((priority*100)**2*penalty_factor, 0)))

    for location_index, windows in enumerate(data['windows']):
        index = manager.NodeToIndex(location_index)
        days = len(windows)
        start_time = 0
        end_time = days * 1440
        if days > 1:
            if index < manager.GetNumberOfNodes():
                latest_start = max(start_time, windows[0][0])
                earliest_end = min(end_time, windows[-1][1])
                print(f'setting the time window from {latest_start} to {earliest_end} for node {location_index}')
                time_dimension.CumulVar(index).SetRange(latest_start, earliest_end)
            for day in range(1, days):
                print(f'and removing time between days from {working_hours[1] + 1440 * (day-1)} to {working_hours[0] + 1440 * day} for node {location_index}')
                time_dimension.CumulVar(index).RemoveInterval(windows[day-1][1], windows[day][0])
                time_dimension.CumulVar(index).RemoveInterval(working_hours[1] + 1440 * (day-1), working_hours[0] + 1440 * day)
        else:
            if windows[0][0] > 1440:
                print('work on day 2')
                work_start = working_hours[0] + 1440
                work_end = working_hours[1] + 1440
                print(f'work starts at {work_start} and ends at {work_end}')
                print(f'window starts at {windows[0][0]} and ends at {windows[0][1]}')
                day_start = max(windows[0][0], work_start)
                day_end = min(windows[0][1], work_end)
            else:
                day_start = windows[0][0]
                day_end = windows[0][1]
            print(f'setting the time window from {day_start} to {day_end} for node {location_index}')
            time_dimension.CumulVar(index).SetRange(day_start, day_end)            

    # Instantiate route start and end times to produce feasible times
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.Start(0)))
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.End(0)))

    # Setting first solution heuristic
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # search_parameters.local_search_metaheuristic = (
    #     routing_enums_pb2.LocalSearchMetaheuristic.SIMULATED_ANNEALING)
    search_parameters.time_limit.seconds = 300
    search_parameters.log_search = False

    # Solve the problem
    solution = routing.SolveWithParameters(search_parameters)

    if solution:
        dropped = []
        for node in range(routing.Size()):
            if routing.IsStart(node) or routing.IsEnd(node):
                continue
            if solution.Value(routing.NextVar(node)) == node:
                dropped.append(manager.IndexToNode(node))
        if len(dropped) > 0:
            print(f"dropped: {dropped}")
        return key, return_route_and_times(solution, manager, routing, nodes)
        
    else:
        print(f"No solution for key {key}")
        return key, None
    
with concurrent.futures.ThreadPoolExecutor() as executor:
    future_to_key = {executor.submit(solve_vrp, key, sub_nodes_df): key for key, sub_nodes_df in result_dfs.items()}
    for future in concurrent.futures.as_completed(future_to_key):
        key = future_to_key[future]
        print(f"Finding solution for key: {key}")
        try:
            key, result = future.result()
            if result:
                route_lists[key] = result
        except Exception as e:
            print(f"Error for key {key}: {e}")

setting the time window from 540 to 2400 for node 0
and removing time between days from 1080 to 1920 for node 0
setting the time window from 480 to 2640 for node 1
and removing time between days from 1080 to 1920 for node 1
setting the time window from 540 to 2520 for node 2
and removing time between days from 1080 to 1920 for node 2
setting the time window from 480 to 2640 for node 3
and removing time between days from 1080 to 1920 for node 3
setting the time window from 540 to 2520 for node 4
and removing time between days from 1080 to 1920 for node 4
setting the time window from 540 to 2400 for node 5
and removing time between days from 1080 to 1920 for node 5
setting the time window from 600 to 2520 for node 6
and removing time between days from 1080 to 1920 for node 6
work on day 2
work starts at 1920 and ends at 2520
window starts at 2069 and ends at 2109
setting the time window from 2069 to 2109 for node 7
setting the time window from 480 to 2460 for node 8
and removing time bet

In [341]:
import sys
route_and_times = route_lists[list(route_lists.keys())[0]]
route_df = pd.DataFrame(route_and_times, columns=['node_id', 'arrival_time'])
merged_df = pd.merge(nodes_df, route_df, on='node_id', how='left')[['adjusted_opening_hours', 'arrival_time', 'node_id']]
merged_df = merged_df[merged_df['node_id'] != 0]
merged_df['closed_range'] = merged_df['adjusted_opening_hours'].apply(
    lambda tw: [tw[0][1], tw[1][0]] if len(tw) > 1 else None
)
merged_df['time_check'] = merged_df.apply(
    lambda row: row['arrival_time'] >= row['closed_range'][0] and row['arrival_time'] <= row['closed_range'][1] 
                if row['closed_range'] is not None else False, axis=1
)
if not merged_df['time_check'].any():
    print('VIOLATION OF TIME CONSTRAINTS')
merged_df[merged_df['closed_range'].isna()][['node_id', 'adjusted_opening_hours', 'closed_range', 'arrival_time', 'time_check']]

VIOLATION OF TIME CONSTRAINTS


Unnamed: 0,node_id,adjusted_opening_hours,closed_range,arrival_time,time_check
8,7,"[[2069, 2109]]",,2085,False


- print more info (opening hours, fixed appointments, priority, decisions [removing a node, replacing a node to another day, priority, etc])
- use data to fine tune hyperparameters

evtl:
- Store state and if it was possible to find routes for all solutions iteratively add nodes based on node priority and distance to root node (those further away should be included more likely)
- Test Discrete Priority (must be visited this week; must be visited next week; ...)
- Compare routes with and without overnight stays / large clusters

In [342]:
plot_refined_clusters(refined_clusters, nodes_df, home_node_id=0)

In [343]:
plot_all_cluster_routes(route_lists, nodes_df)