#  create dummy data and calculate clusters / routes

In [969]:
num_nodes = 20
margin = 5 # time margin for arrival before appointment and stay beyond
working_hours = [8 * 60, 18 * 60] # 8am to 6pm
percentage_of_appointments = 0.05
span_cost_coefficient = 20000 # adjust
slack = 20000 # adjust
penalty_factor = 300000
num_large_clusters = 1
num_small_clusters = 0
lunch_start = 11 * 60  # 12 PM in minutes
lunch_end = 13 * 60  # 2 PM in minutes
lunch_duration = 30

# week constraints
max_days_off = 1
days_off = {6}
no_overnight_stays = {1}
max_overnight_stays = 4

In [970]:
from src.routing import create_nodes_dataframe, custom_clustering, plot_refined_clusters, assign_weekdays_to_clusters, plot_ind_route, plot_all_cluster_routes, create_data_model, plot_all_nodes_with_angles
import pandas as pd
import numpy as np
import concurrent.futures
from datetime import datetime, timedelta
import random

from scipy.spatial import distance

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

In [971]:
nodes_df, time_matrix = create_nodes_dataframe(num_nodes=num_nodes, min_work_days=5, home_node_id=0, visiting_interval_min=10, visiting_interval_max=30, max_last_visit=20, frac_fixed_app=percentage_of_appointments)
nodes_df['weekdays_fixed_appointments'] = nodes_df['fixed_appointment'].apply(lambda x: x[0] if isinstance(x, tuple) else None)

In [972]:
all_days = set(range(1, 8))
# Calculate open days as sets from the dictionary keys
nodes_df['open_days'] = nodes_df['opening_hours'].apply(lambda x: set(x.keys()))

# Calculate closed days
nodes_df['closed_days'] = nodes_df['open_days'].apply(lambda x: list(all_days - x))

# Filter for VIP nodes based on priority
VIP_nodes = nodes_df[nodes_df['priority'] > 0.8]

# For each VIP node, identify specific closed days that match the days off
for index, row in VIP_nodes.iterrows():
    closed_days_off = set(row['closed_days']) & days_off
    if closed_days_off:
        closed_days_off_str = ', '.join(map(str, sorted(closed_days_off)))
        print(f"Warning: VIP node {row['node_id']} is closed on day(s) {closed_days_off_str}.")



In [973]:
weekdays_fixed_appointments = nodes_df['weekdays_fixed_appointments'].dropna().unique()
# if any(days_off in weekdays_fixed_appointments for days_off in days_off):
#     raise ValueError("Fixed appointments are scheduled on days off")

In [997]:
# Generate valid lists as provided
def find_lists(current_list, current_sum, max_length, target_sum):
    if current_sum > target_sum or len(current_list) > max_length:
        return []
    if current_sum == target_sum and len(current_list) <= max_length:
        return [current_list]
    results = []
    for i in range(0, 8):
        adjusted_sum = current_sum + (i if i > 0 else 1)  # Adjust sum for 0s treated as 1
        results.extend(find_lists(current_list + [i], adjusted_sum, max_length, target_sum))
    return results

# Generate valid lists
valid_lists = find_lists([], 0, 7, 7)

# Function to repeat values in the list according to their integer value
def repeat_values(lst, overnight_trips=0):
    repeated_list = []
    for num in lst:
        if overnight_trips == 1:
            repeated_list.extend([num] * (num - 1) + [0] if num > 0 else [0])
        else:
            repeated_list.extend([num] * num if num > 0 else [0])
    return repeated_list

# Calculate days off for each list
def calculate_days_off(lst):
    repeated_lst = repeat_values(lst)
    days_off = []
    for i in range(min(len(repeated_lst), 7)):
        if repeated_lst[i] == 0:
            days_off.append(i + 1)  # Use 1-based indexing for days
    return days_off

# Calculate trip days for each list
def calculate_overnight_trips(lst):
    repeated_lst = repeat_values(lst, overnight_trips=1)  
    trip_days = []
    for i in range(min(len(repeated_lst), 7)):
        if repeated_lst[i] > 1:
            trip_days.append(i + 1)  # Use 1-based indexing for days
    return trip_days

def calculate_overnight_stays(lst):
    overnight_stays = 0
    for i in range(min(len(lst), 7)):
        if lst[i] > 1:
            overnight_stays += lst[i] - 1
    return overnight_stays

# Create a DataFrame
data = {
    'List': valid_lists,
    # 'Sum': [sum(lst) for lst in valid_lists],  # Calculating sum normally, 0s count as 0
    # 'Length': [len(lst) for lst in valid_lists],
    'n_Overnight_trips': [calculate_overnight_stays(lst) for lst in valid_lists],
    'Overnight_days': [calculate_overnight_trips(lst) for lst in valid_lists],
    'Off_days': [calculate_days_off(lst) for lst in valid_lists],
    'n_Days_off': [lst.count(0) for lst in valid_lists], 
}

df = pd.DataFrame(data)

# Filter the DataFrame based on the constraints
df = df[df['Off_days'].apply(lambda x: all(day in x for day in days_off))]
df = df[df['Overnight_days'].apply(lambda x: not any(day in x for day in no_overnight_stays))]
df = df[df['n_Overnight_trips'].apply(lambda x: x <= max_overnight_stays)]
df = df[df['n_Days_off'].apply(lambda x: x <= max_days_off)]
# add a column to df containing a dictionary counting the number of times an integer >1 appears in the list
from collections import Counter
df['Count'] = df['List'].apply(lambda x: Counter([item for item in x if item > 0]))
combinations = df['Count'].drop_duplicates().tolist()
combinations

[Counter({1: 6}),
 Counter({1: 4, 2: 1}),
 Counter({1: 3, 3: 1}),
 Counter({1: 2, 2: 2}),
 Counter({1: 2, 4: 1})]

- do simple solution finding for the above combinations, find best solution give cost of overnight stay, improve best solution

In [975]:
# plot_all_nodes_with_angles(nodes_df)

- in rare cases the below will make trouble because there are two large gaps and a cluster is entirely contained within the second largest leading to size = nan

In [976]:
def calculate_metric(distance_matrix, nodes_df, global_max_dist, node_ids, print_ind_metrics=True):
    filtered_nodes_df = nodes_df[nodes_df['node_id'].isin(node_ids)]
    
    # Identify indices corresponding to the filtered node IDs
    # index_map = {node_id: index for index, node_id in enumerate(nodes_df['node_id'])}
    # filtered_indices = [index_map[node_id] for node_id in node_ids]
    # Extract relevant submatrix from the distance matrix
    # filtered_distance_matrix = distance_matrix[np.ix_(filtered_indices, filtered_indices)]
    # Calculate mean and variance of distances between nodes using the distance matrix
    # distances = filtered_distance_matrix[np.triu_indices_from(filtered_distance_matrix, k=1)]
    # mean_dist_between_nodes = np.mean(distances)
    # var_dist_between_nodes = np.var(distances, ddof=1)

    num_nodes_metric = len(filtered_nodes_df) / len(nodes_df)
    
    priority_metric = filtered_nodes_df['priority'].nlargest(int(0.3 * len(filtered_nodes_df))).mean()
    
    max_dist_to_root = filtered_nodes_df['dist_to_home'].max()
    dist_metric = max_dist_to_root / global_max_dist

    # prevent any metric from being nan
    # if np.isnan(num_nodes_metric):
    #     num_nodes_metric = 0.5
    if np.isnan(priority_metric):
        priority_metric = 0.5
    # if np.isnan(dist_metric):
    #     dist_metric = 0.5

    metric = (num_nodes_metric + dist_metric) / 2

    if print_ind_metrics:
        print(f"Number of nodes metric: {num_nodes_metric}")
        print(f"Priority metric: {priority_metric}")
        print(f"Distance metric: {dist_metric}")
        print(f"Overall metric: {metric}")
    
    return metric

def adjust_angles(clusters, nodes_df, angle_sizes, degree_adj, global_max_dist, num_small_clusters, num_large_clusters, overnight_factor, distance_matrix, verbose):    
    metrics = {}
    for cluster_id, node_ids in clusters.items():
        metric = calculate_metric(distance_matrix, nodes_df, global_max_dist, node_ids, print_ind_metrics=False)
        metrics[cluster_id] = metric

    metric_sum = sum(metrics.values())
    small_soll = metric_sum / (num_large_clusters * overnight_factor + num_small_clusters)
    large_soll = small_soll * overnight_factor

    deviations = {cluster_id: metrics[cluster_id] - (small_soll if 'small' in cluster_id else large_soll) for cluster_id in clusters}
    
    # assigne the degree_adj to the clusters based on the deviation
    for cluster_id, deviation in deviations.items():
        angle_sizes[cluster_id] -= deviation * degree_adj

    if verbose:
        print("Deviations, metrics and new angle sizes:")
        for cluster_id in clusters:
            print(f"Cluster {cluster_id} \
                with deviation {round(deviations[cluster_id], 2)} \
                and metric {round(metrics[cluster_id], 2)} \
                has new angle size {round(angle_sizes[cluster_id], 2)} \
                spanning from {round(angle_sizes[cluster_id] - deviations[cluster_id] * degree_adj, 2)}° to {round(angle_sizes[cluster_id], 2)}°.")
        
    return angle_sizes

def custom_clustering(distance_matrix, nodes_df, num_small_clusters, num_large_clusters, overnight_factor, precision, home_node_id=0, verbose=False):
    # remove the home node from the nodes_df
    if nodes_df.index[0] == 0:
        nodes_df_copy = nodes_df.drop(0).copy()
    
    # define the largest gap to limit the span of all clusters
    clusters = {f'small_{i}': [home_node_id] for i in range(num_small_clusters)}
    clusters.update({f'large_{i}': [home_node_id] for i in range(num_large_clusters)})
    
    angles = sorted(nodes_df_copy['angle_to_home'])
    diffs = [angles[i + 1] - angles[i] for i in range(len(angles) - 1)]
    diffs.append(360 - angles[-1] + angles[0])
    
    max_gap = max(diffs)
    gap_start = angles[diffs.index(max_gap)]
    gap_end = angles[(diffs.index(max_gap) + 1) % len(angles)]

    if verbose == True:
        print(f"Largest gap spans from {gap_start}° to {gap_end}°, covering {max_gap}°.")

    cluster_start = gap_end
    num_clusters = num_small_clusters + num_large_clusters

    # Initialize the angle sizes for the clusters
    total_span = (360 - max_gap)
    degree_adj = total_span / 10
    small_step = total_span / (num_small_clusters + num_large_clusters * overnight_factor)
    large_step = small_step * overnight_factor

    if verbose == True:
        print(f"Small step size is {small_step}°, large step size is {large_step}°.")

    angle_sizes = {}
    for i in range(num_clusters):
        if i < num_small_clusters:
            angle_sizes[f'small_{i}'] = small_step
        else:
            angle_sizes[f'large_{i-num_small_clusters}'] = large_step

    for key, size in angle_sizes.items():
        if verbose == True:
            print(f"Cluster {key} spans {size}°")
    
    global_max_dist = nodes_df_copy['dist_to_home'].max()

    # Initial assignment of nodes to clusters
    for i in range(precision):
        current_angle = cluster_start  # Reset the start angle for each precision iteration
            
        # add the home node to each cluster
        for key in clusters.keys():
            clusters[key] = [home_node_id]

        # Assign nodes to clusters based on their angle to the home node
        for cluster_id, size in angle_sizes.items():
            start_angle = current_angle
            # round up to the nearest integer
            start_angle = int(start_angle)
            end_angle = (current_angle + size) % 360
            end_angle = int(np.ceil(end_angle))
            # Ensuring all nodes are assigned, handling the wrap-around scenario more cleanly
            if end_angle < start_angle:  # This handles the case where the segment wraps past 360 degrees
                nodes_in_cluster = [index for index, row in nodes_df_copy.iterrows() if 
                                    (row['angle_to_home'] >= start_angle or row['angle_to_home'] < end_angle)]
            else:  # No wrap-around, normal case
                nodes_in_cluster = [index for index, row in nodes_df_copy.iterrows() if 
                                    (start_angle <= row['angle_to_home'] < end_angle)]

            clusters[cluster_id] = [home_node_id] + nodes_in_cluster
            current_angle = end_angle

        if (i == 0) & (verbose == True):
            print("Initial clusters:")
            for key, value in clusters.items():
                print(f"Cluster {key} with nodes {value}")

        angle_sizes = adjust_angles(clusters, nodes_df_copy, angle_sizes, degree_adj, global_max_dist, num_small_clusters, num_large_clusters, overnight_factor, distance_matrix, verbose)
        degree_adj *= 0.9

        #plot_refined_clusters(clusters, nodes_df)
        
    return clusters

In [977]:
clusters = custom_clustering(time_matrix.values, nodes_df, num_small_clusters=num_small_clusters, num_large_clusters=num_large_clusters, overnight_factor=1.3, precision=30, verbose=False)

In [978]:
node_to_cluster = {node: cluster for cluster, nodes in clusters.items() for node in nodes}
nodes_df['cluster'] = nodes_df['node_id'].map(node_to_cluster)

- if there are no fixed appointments the below will fail
- requires adjustements to cases where consecutive days > 2

In [979]:
def assign_weekdays_to_clusters(nodes_df):
    # Handel the case of no fixed appointments and consective random assignment here
    if nodes_df['weekdays_fixed_appointments'].isnull().all():
        pass
    
    grouped_data = nodes_df.groupby(['weekdays_fixed_appointments', 'cluster']).size().unstack(fill_value=0)
    weekdays = range(1, 8)

    cluster_assignments = {}

    # Identify missing weekdays
    missing_weekdays = set(weekdays) - set(grouped_data.index)

    # Add missing weekdays as new rows filled with zeros
    for weekday in missing_weekdays:
        grouped_data.loc[weekday] = [0] * len(grouped_data.columns)  # Create a row of zeros

    # Sort the DataFrame by index to ensure weekdays are in order
    grouped_data.sort_index(inplace=True)

    # Generate priority list focusing on appropriate assignment of large clusters
    priority_list = []
    for cluster in grouped_data.columns:
        for i in range(len(weekdays)):
            if 'large' in cluster and i < len(weekdays) - 1:  # ensure there's a next day for large clusters
                avg_appointments = (grouped_data.loc[weekdays[i], cluster] + grouped_data.loc[weekdays[i+1], cluster]) / 2
                priority_list.append((weekdays[i], cluster, avg_appointments, f"Days {weekdays[i]}-{weekdays[i+1]}"))
            elif 'small' in cluster:
                priority_list.append((weekdays[i], cluster, grouped_data.loc[weekdays[i], cluster], f"Day {weekdays[i]}"))

    # Sort priority list
    priority_df = pd.DataFrame(priority_list, columns=['day', 'cluster', 'appointments', 'description'])
    priority_df = priority_df.sort_values(by=['cluster', 'appointments'], ascending=[True, False])
    all_clusters = nodes_df['cluster'].unique()
    large_clusters = [cluster for cluster in all_clusters if 'large' in cluster]
    large_clusters_not_in_priority = [cluster for cluster in large_clusters if cluster not in set(priority_df['cluster'])]

    # check if there is any entry in priority_df['description'] containing 'Days'
    for index, large_cluster in enumerate(large_clusters_not_in_priority):
        # create a list of consecutive days based on weekdays
        consecutive_day_pairs = []
        for i in range(len(weekdays)):
            if i < len(weekdays) - 1:
                consecutive_day_pairs.append((weekdays[i], weekdays[i+1]))

        # get the number of appointments for each pair of consecutive_days
        appointments_per_pair = {}
        try:
            for consecutive_days in consecutive_day_pairs:
                day1 = sum(priority_df[priority_df['day'] == consecutive_days[0]]['appointments'])
                day2 = sum(priority_df[priority_df['day'] == consecutive_days[1]]['appointments'])
                appointments_per_pair[consecutive_days] = day1 + day2

            # get the pair of consecutive days with the least number of appointments
            min_appointments = min(appointments_per_pair.values())

            # get the pair of consecutive days with the least number of appointments
            min_appointments_days = [k for k, v in appointments_per_pair.items() if v == min_appointments]

        except:
            min_appointments_days = [(1.0, 2.0), (3.0, 4.0)]

        cluster_assignments[large_cluster] = min_appointments_days[index]

    # Assign clusters to days
    used_days = set()
    for _, row in priority_df.iterrows():
        cluster = row['cluster']
        description = row['description']

        if 'Days' in description and cluster not in cluster_assignments:
            day1, day2 = map(float, description.split(' ')[1].split('-'))
            if day1 not in used_days and day2 not in used_days:
                cluster_assignments[cluster] = {day1, day2}
                used_days.update([day1, day2])
        elif 'Day' in description and cluster not in cluster_assignments and float(description.split(' ')[1]) not in used_days:
            day = float(description.split(' ')[1])
            cluster_assignments[cluster] = {day,}
            used_days.add(day)

    # find the clusters that are not assigned to any day
    unassigned_clusters = set(all_clusters) - set(cluster_assignments.keys())
    unassigned_days = set(weekdays) - used_days

    # randomly assign unassigned clusters to unassigned days
    for cluster in unassigned_clusters:
        day = unassigned_days.pop()
        cluster_assignments[cluster] = {day,}
    
    # Add a new column to nodes_df mapping each node's cluster to the weekday
    nodes_df['visit_day'] = nodes_df['cluster'].map(cluster_assignments)

    def update_visit_days(row):
        fixed_day = row['weekdays_fixed_appointments']
        visit_days = row['visit_day']
        
        # If there's a fixed appointment day and it's not in visit days, update the visit days
        if not pd.isna(fixed_day) and fixed_day not in visit_days:
            unique_visit_days = nodes_df['visit_day'].apply(lambda x: tuple(sorted(x))).unique()
            for unique_days in unique_visit_days:
                if fixed_day in unique_days:
                    return set(unique_days)
        return visit_days

    # Apply the function to the dataframe
    nodes_df['visit_day'] = nodes_df.apply(update_visit_days, axis=1)
    
    return nodes_df

- sometimes the below assigns clusters to weekdays on which they are not opened up

In [980]:
nodes_df = assign_weekdays_to_clusters(nodes_df)

In [981]:
nodes_df['visit_day'][0]

{2.0, 3.0}

- the below will Fail if there are lunch breaks
- deduce on_site_time from closing times

In [982]:
# Convert visit_day to a frozenset
nodes_df['visit_day'] = nodes_df['visit_day'].apply(frozenset)

# reduce opening hours to those relevant to visit days
def adjust_opening_hours(row):
    visit_days = row['visit_day']
    opening_hours = row['opening_hours']
    adjusted_hours = {}
    for day in visit_days:
        if day in opening_hours:
            open_time, close_time = opening_hours[day]
            adjusted_open = open_time.hour * 60 + open_time.minute
            adjusted_close = close_time.hour * 60 + close_time.minute
            if len(visit_days) == 2 and max(visit_days) == day: # fail here if lunch break
                adjusted_open += 1440
                adjusted_close += 1440
            adjusted_hours[int(day)] = (adjusted_open, adjusted_close)
    return adjusted_hours

nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_opening_hours, axis=1)

In [983]:
nodes_df['adjusted_opening_hours']

0                      {2: (480, 1020)}
1                     {3: (1920, 2460)}
2                      {2: (540, 1020)}
3                       {2: (480, 960)}
4                     {3: (1980, 2460)}
5     {2: (480, 1020), 3: (1920, 2400)}
6     {2: (540, 1020), 3: (1920, 2460)}
7     {2: (540, 1080), 3: (1920, 2400)}
8                     {3: (1920, 2460)}
9     {2: (540, 1020), 3: (1980, 2460)}
10                     {2: (540, 1020)}
11                    {3: (1920, 2400)}
12                                   {}
13                    {3: (1920, 2460)}
14                    {3: (1920, 2460)}
15                     {2: (540, 1080)}
16    {2: (480, 1020), 3: (1980, 2520)}
17                     {2: (480, 1020)}
18                     {2: (540, 1020)}
19                    {3: (1920, 2400)}
Name: adjusted_opening_hours, dtype: object

In [984]:
nodes_df.iloc[1,1]

{1: (datetime.time(9, 0), datetime.time(18, 0)),
 3: (datetime.time(8, 0), datetime.time(17, 0)),
 4: (datetime.time(8, 0), datetime.time(16, 0)),
 5: (datetime.time(9, 0), datetime.time(17, 0)),
 6: (datetime.time(8, 0), datetime.time(17, 0))}

In [985]:
# Replace opening hours with fixed appointments if any exist
def time_to_minutes(t):
    return t.hour * 60 + t.minute + t.second / 60

def adjust_hours(row):
    opening_hours = row['adjusted_opening_hours']
    appointment = row['fixed_appointment']
    if appointment:
        day, start_time, end_time = appointment
        start_minutes = int(time_to_minutes(start_time) - margin)
        end_minutes = int(time_to_minutes(end_time) + margin)

        # Get the highest day key in the adjusted_opening_hours
        max_day_key = max(opening_hours.keys()) if opening_hours else None
        
        # Check if the appointment is on the last (highest) day
        if (day == max_day_key) & (day != 0):
            start_minutes += 1440  # Add 24 hours in minutes
            end_minutes += 1440

        # Adjust opening hours for the appointment day
        if day in opening_hours:
            opening_hours = {day: (start_minutes, end_minutes)}
        else:
            # Add new day if it does not exist
            opening_hours[day] = (start_minutes, end_minutes)
    # make opening_hours a list of tuples
    opening_hours = [[v[0], v[1]] for _, v in opening_hours.items()]
    return opening_hours

# Applying the function
nodes_df['adjusted_opening_hours'] = nodes_df.apply(adjust_hours, axis=1)

In [986]:
nodes_df['adjusted_opening_hours']

0                   [[480, 1020]]
1                  [[1920, 2460]]
2                   [[540, 1020]]
3                    [[480, 960]]
4                  [[1980, 2460]]
5     [[480, 1020], [1920, 2400]]
6     [[540, 1020], [1920, 2460]]
7     [[540, 1080], [1920, 2400]]
8                  [[1920, 2460]]
9     [[540, 1020], [1980, 2460]]
10                  [[540, 1020]]
11                 [[1920, 2400]]
12                             []
13                 [[1920, 2460]]
14                 [[1920, 2460]]
15                  [[540, 1080]]
16                 [[2030, 2070]]
17                  [[480, 1020]]
18                  [[540, 1020]]
19                 [[1920, 2400]]
Name: adjusted_opening_hours, dtype: object

In [987]:
nodes_df['cluster_size'] = nodes_df['cluster'].str.split('_').str[0]

In [988]:
def define_clusters(dataframe):
    dataframe['visit_day'] = dataframe['visit_day'].apply(lambda x: tuple(x) if isinstance(x, list) else (x,))
    dataframe['new_clusters'] = dataframe['visit_day'].astype(str).factorize()[0]
    return dataframe

# Apply the function to the DataFrame
clustered_df = define_clusters(nodes_df)

# Convert the DataFrame to the required dictionary format for plotting
refined_clusters = clustered_df.groupby('new_clusters')['node_id'].apply(list).to_dict()

In [989]:
depot_node_data = nodes_df[nodes_df['node_id'] == 0].iloc[0]  # Assuming there is always a row for node 0 in the original DataFrame

result_dfs = {}
for index, group in nodes_df.groupby('visit_day'):
    # Check if depot node is in the current group
    if 0 not in group['node_id'].values:
        # Append depot node data to the group
        group = pd.concat([pd.DataFrame([depot_node_data]), group], ignore_index=True)
    # Now group is guaranteed to include the depot node
    result_dfs[index] = group[['node_id', 'priority', 'adjusted_opening_hours', 'cluster_size', 'visit_day', 'on_site_time']]

- nodes with fixed appointments should have prio 1
- add margin to not arrive before closing - amount of time staying
- would require changes for a 3 day trip
- correct index for printing dropped nodes
- optimize route beyond priority choice

In [990]:
sub_nodes_df = result_dfs[list(result_dfs.keys())[0]]
solutions = {}
route_lists = {}
route_lists_with_travel = {}

def minutes_to_hhmm(minutes_am):
    minutes_am = minutes_am % 1440  # Ensure minutes are within a day
    minutes = minutes_am % 60
    hours = (minutes_am - minutes) // 60  # Use integer division for hours
    return f'{hours:02}:{minutes:02}'

def create_data_model(sub_nodes_df, sub_time_matrix):
    """Stores the data for the problem."""
    data = {}
    data['time_matrix'] = sub_time_matrix
    data['windows'] = sub_nodes_df['adjusted_opening_hours'].tolist()
    data['priorities'] = sub_nodes_df['priority'].tolist()
    data['num_vehicles'] = 1
    data['on_site_time'] = sub_nodes_df['on_site_time'].tolist()
    data['depot'] = 0
    return data

def return_route_and_times(solution, manager, routing, original_node_ids, data):
    """Returns the route along with the start times at each node."""
    index = routing.Start(0)  # Start at the depot.
    route_with_travel = []
    route_without_travel = []
    time_dimension = routing.GetDimensionOrDie('total_time')  # Make sure this matches the dimension name used

    while not routing.IsEnd(index):
        node_index = manager.IndexToNode(index)
        original_node_id = original_node_ids[node_index]  # Map back to original node ID
        time_var = time_dimension.CumulVar(index)
        start_time = solution.Min(time_var)
        end_time = start_time + data['on_site_time'][node_index]  # Include on-site time
        route_with_travel.append((original_node_id, start_time, end_time))  # Include end time for better clarity
        route_without_travel.append((original_node_id, start_time))  # Include end time for better clarity
        next_index = solution.Value(routing.NextVar(index))
        
        travel_time = routing.GetArcCostForVehicle(index, next_index, 0) - data['on_site_time'][index]  # Get travel time
        route_with_travel.append(("road", travel_time))
        
        index = next_index

    # Add the final node
    final_node_index = manager.IndexToNode(index)
    final_node_id = original_node_ids[final_node_index]
    final_time_var = time_dimension.CumulVar(index)
    final_start_time = solution.Min(final_time_var)
    final_end_time = final_start_time + data['on_site_time'][final_node_index]
    route_with_travel.append((final_node_id, final_start_time, final_end_time))
    route_without_travel.append((final_node_id, final_start_time))

    return route_with_travel, route_without_travel

def print_route(route_with_times):
    """Prints the route in the desired format."""
    route_str = ""
    for segment in route_with_times:
        if segment[0] == "road":
            route_str += f" - road ({segment[1]}) - "
        else:
            node_id, start_time, end_time = segment
            route_str += f"{node_id} ({minutes_to_hhmm(start_time)}-{minutes_to_hhmm(end_time)})"
    print(route_str)

# def solve_vrp(key, sub_nodes_df):
if True:
    max_travel_time = 10000 if len(sub_nodes_df['visit_day'].iloc[0]) == 1 else 20000 # adjust
    nodes = sub_nodes_df['node_id'].tolist()
    sub_time_matrix = time_matrix.loc[nodes, nodes].values.tolist()
    sub_time_matrix = [[int(x) for x in row] for row in sub_time_matrix]
    data = create_data_model(sub_nodes_df, sub_time_matrix)
    manager = pywrapcp.RoutingIndexManager(len(data["time_matrix"]), data["num_vehicles"], data["depot"])
    routing = pywrapcp.RoutingModel(manager)
    def time_callback(from_index, to_index):
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return data["time_matrix"][from_node][to_node] + data['on_site_time'][from_node]
    transit_callback_index = routing.RegisterTransitCallback(time_callback)
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
    routing.AddDimension(
        transit_callback_index,
        slack,  # upper bound for slack / waiting time
        max_travel_time,  # upper bound for vehicle maximum travel time
        False,  # start cumul to zero
        "total_time"
    )
    time_dimension = routing.GetDimensionOrDie("total_time")
    time_dimension.SetGlobalSpanCostCoefficient(span_cost_coefficient)

    # PENALTY
    for location_index, priority in enumerate(data['priorities']):
        index = manager.NodeToIndex(location_index)
        if index == 0:
            continue
        else:
            routing.AddDisjunction([index], int(round((priority*100)**2*penalty_factor, 0)))

    # OPENING HOURS, LUNCH AND OVERNIGHT BREAKS
    for location_index, windows in enumerate(data['windows']):
        index = manager.NodeToIndex(location_index)
        days = len(windows)
        if days > 1:
            if index < manager.GetNumberOfNodes():
                work_start = working_hours[0]
                work_end = working_hours[1] + 1440
                latest_start = max(work_start, windows[0][0])
                earliest_end = min(work_end, windows[-1][1])
                # print(f'setting the time window from {latest_start} to {earliest_end} for node {location_index}')
                time_dimension.CumulVar(index).SetRange(latest_start, earliest_end)
            for day in range(1, days):
                # print(f'and removing time between days from {working_hours[1] + 1440 * (day-1)} to {working_hours[0] + 1440 * day} for node {location_index}')
                time_dimension.CumulVar(index).RemoveInterval(windows[day-1][1], windows[day][0])
                time_dimension.CumulVar(index).RemoveInterval(working_hours[1] + 1440 * (day-1), working_hours[0] + 1440 * day)
        else:
            if windows[0][0] > 1440:
                # print('work on day 2')
                work_start = working_hours[0] + 1440
                work_end = working_hours[1] + 1440
                # print(f'work starts at {work_start} and ends at {work_end}')
                # print(f'window starts at {windows[0][0]} and ends at {windows[0][1]}')
                day_start = max(windows[0][0], work_start)
                day_end = min(windows[0][1], work_end)
            else:
                day_start = windows[0][0]
                day_end = windows[0][1]
            # print(f'setting the time window from {day_start} to {day_end} for node {location_index}')
            time_dimension.CumulVar(index).SetRange(day_start, day_end)  

    node_visit_transit = {}
    for index in range(routing.Size()):
        node = manager.IndexToNode(index)
        node_visit_transit[index] = data['on_site_time'][node]

    lunch_break_interval = routing.solver().FixedDurationIntervalVar(
        lunch_start, lunch_end, lunch_duration, False, 'lunch_break'
    )

    # Assign the break interval to the single vehicle
    time_dimension.SetBreakIntervalsOfVehicle([lunch_break_interval], 0, node_visit_transit)

    # Instantiate route start and end times to produce feasible times
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.Start(0)))
    routing.AddVariableMinimizedByFinalizer(time_dimension.CumulVar(routing.End(0)))

    # Setting first solution heuristic
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # search_parameters.local_search_metaheuristic = (
    #     routing_enums_pb2.LocalSearchMetaheuristic.SIMULATED_ANNEALING)
    search_parameters.time_limit.seconds = 300
    search_parameters.log_search = False

    # Solve the problem
    solution = routing.SolveWithParameters(search_parameters)

#     if solution:
#         dropped = []
#         for node in range(routing.Size()):
#             if routing.IsStart(node) or routing.IsEnd(node):
#                 continue
#             if solution.Value(routing.NextVar(node)) == node:
#                 dropped.append(manager.IndexToNode(node))
#         if len(dropped) > 0:
#             print(f"dropped from {key}: {dropped}")
#         return key, return_route_and_times(solution, manager, routing, nodes, data)
        
#     else:
#         print(f"No solution for key {key}")
#         return key, None
    
# with concurrent.futures.ThreadPoolExecutor() as executor:
#     future_to_key = {executor.submit(solve_vrp, key, sub_nodes_df): key for key, sub_nodes_df in result_dfs.items()}
#     for future in concurrent.futures.as_completed(future_to_key):
#         key = future_to_key[future]
#         # print(f"Finding solution for key: {key}")
#         try:
#             key, result = future.result()
#             if result:
#                 route_lists_with_travel[key] = result[0]
#                 route_lists[key] = result[1]
#         except Exception as e:
#             print(f"Error for key {key}: {e}")

# for key, route in route_lists_with_travel.items():
#     print(f"Route for key {key}:")
#     print_route(route)

IndexError: list index out of range

# check and visualize results

In [None]:
route_and_times = route_lists[list(route_lists.keys())[0]]
route_df = pd.DataFrame(route_and_times, columns=['node_id', 'arrival_time'])
merged_df = pd.merge(nodes_df, route_df, on='node_id', how='left')[['adjusted_opening_hours', 'arrival_time', 'node_id']]
merged_df = merged_df[merged_df['node_id'] != 0]

def check_within_ranges(arrival_time, ranges):
    if len(ranges) == 1:
        return ranges[0][0] <= arrival_time <= ranges[0][1]
    elif len(ranges) == 2:
        return (ranges[0][0] <= arrival_time <= ranges[0][1]) or (ranges[1][0] <= arrival_time <= ranges[1][1])
    return False

merged_df['time_check'] = merged_df.apply(
    lambda row: check_within_ranges(row['arrival_time'], row['adjusted_opening_hours']), axis=1
)

if merged_df['time_check'].all():
    print('VIOLATION OF TIME CONSTRAINTS')
    print(merged_df[merged_df['time_check'] == False])

# merged_df[['node_id', 'adjusted_opening_hours', 'arrival_time', 'time_check']]

IndexError: list index out of range

- print more info (opening hours, fixed appointments, priority, decisions [removing a node, replacing a node to another day, priority, etc])
- use data to fine tune hyperparameters

evtl:
- Store state and if it was possible to find routes for all solutions iteratively add nodes based on node priority and distance to root node (those further away should be included more likely)
- Test Discrete Priority (must be visited this week; must be visited next week; ...)
- Compare routes with and without overnight stays / large clusters

In [None]:
plot_refined_clusters(refined_clusters, nodes_df, home_node_id=0)

In [None]:
plot_all_cluster_routes(route_lists, nodes_df)