In [3]:
# RPB
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
# from scipy.spatial.distance import euclidean

import plotly.graph_objects as go
import plotly.io as pio
# For standard Jupyter Notebook or JupyterLab
pio.renderers.default = "notebook"
# Load LaDe dataset (replace path with actual download)
# Dataset available at: https://github.com/wenhaomin/LaDe
try:
    df = pd.read_csv('../datasets/delivery_sh.csv')
    print(f"Loaded {len(df)} delivery records from Shanghai")
except FileNotFoundError:
    print("Dataset not found! Using synthetic data instead")
    # Generate synthetic delivery points if dataset unavailable
    np.random.seed(42)
    delivery_points = np.random.uniform(low=[31.0, 121.0], high=[31.3, 121.5], size=(50,2))
    depot = np.array([31.15, 121.3])
else:
    # Preprocess actual data
    delivery_points = df[['lat', 'lng']].values
    depot = np.mean(delivery_points, axis=0)
delivery_counts = df.groupby(["courier_id", "ds"]).agg(count=("order_id", "count")).reset_index().sort_values(by="count")
sample_deliveries = delivery_counts[(delivery_counts["count"] > 14) & (delivery_counts["count"] < 19)].sample(5)

Loaded 1483864 delivery records from Shanghai


In [4]:
print(sample_deliveries.courier_id.to_list())
print(sample_deliveries.ds.to_list())
sample_deliveries

[2019, 2259, 2683, 4118, 856]
[909, 626, 614, 711, 709]


Unnamed: 0,courier_id,ds,count
28712,2019,909,18
32808,2259,626,18
39438,2683,614,17
59864,4118,711,15
12168,856,709,15


In [None]:
delivery_points = df[(df.courier_id==1061) & (df.ds==621)][['lat', 'lng']].values
depot = np.mean(delivery_points, axis=0)

In [None]:
# NUM_POINTS: Defines the genetic makeup and complexity of the problem.
# - Genetic Analogy: The number of genes on a chromosome, where each gene is a delivery point defining the route.
# - Computational Impact: Directly sets the problem's search space; more points exponentially increase complexity.
NUM_POINTS = 122

# POPULATION_SIZE: The size of the evolving gene pool.
# - Genetic Analogy: The number of chromosomes (routes) in each generation, representing the population's genetic diversity.
# - Computational Impact: A larger population improves solution exploration but increases the computational load per generation.
POPULATION_SIZE = 100

# GENERATIONS: The duration of the evolutionary process.
# - Genetic Analogy: The number of evolutionary cycles for the population of routes to adapt and improve their fitness.
# - Computational Impact: More generations allow for better convergence toward an optimal route but increase total runtime.
GENERATIONS = 500

# MUTATION_RATE: The rate of spontaneous genetic change.
# - Genetic Analogy: The probability of a random gene (delivery point) swap, introducing novel traits to escape local optima.
# - Computational Impact: A low-cost operation crucial for maintaining diversity and preventing premature convergence.
MUTATION_RATE = 0.02

# TOURNAMENT_SIZE: The intensity of natural selection.
# - Genetic Analogy: The size of the "survival of the fittest" competition that determines which routes reproduce.
# - Computational Impact: A larger size increases selection pressure, which can speed up convergence but may reduce diversity.
TOURNAMENT_SIZE = 5

# ELITISM_COUNT: The mechanism for preserving elite traits.
# - Genetic Analogy: The number of elite chromosomes (best routes) whose superior genetic code is passed on unchanged.
# - Computational Impact: A computationally cheap way to ensure the best-found solution is never lost, accelerating progress.
ELITISM_COUNT = 2

In [None]:
# Vertically stack the depot and the selected number of delivery points into a single NumPy array.
# The depot is placed at index 0, and the delivery points follow. This standardized structure
# is essential for easily calculating route distances later. `NUM_POINTS` determines the problem size.
points = np.vstack([depot, delivery_points[:NUM_POINTS]])

In [None]:
def calculate_distance_matrix(points):
    n_points = len(points)
    distances = np.zeros((n_points, n_points))
    for i in range(n_points):
        for j in range(i + 1, n_points):
            dist = np.linalg.norm(points[i] - points[j])
            distances[i, j] = distances[j, i] = dist
    return distances
    
def create_route(points):
    """
    Generates a random delivery route by creating a permutation of all delivery point indices except the depot.
    
    Args:
        points (array-like): List or array of all points, where the depot is at index 0 and delivery points follow.
    
    Returns:
        list: A randomly ordered list of indices representing a delivery route, excluding the depot.
    """
    return random.sample(range(1, len(points)), len(points)-1)

def route_distance(route, distance_matrix):
    """
    Calculates the total travel distance for a delivery route that starts and ends at the depot.
    
    Args:
        route (list): A sequence of point indices representing the order of deliveries, excluding the depot.
    
    Returns:
        float: The total distance of the route, including travel from the depot to the first delivery, 
        between all deliveries, and back to the depot.
    """
    total = distance_matrix[0][route[0]]  # Depot to first point
    for i in range(len(route)-1):
        total += distance_matrix[route[i]][route[i+1]]
    total += distance_matrix[route[-1]][0]  # Last point to depot
    return total

def tournament_selection(population, fitness):
    """
    Selects a parent route from the population using tournament selection.
    
    Args:
        population (list): List of candidate routes (chromosomes) in the current generation.
        fitness (list): List of fitness values corresponding to each route, where lower values indicate better routes.
    
    Returns:
        list: The selected parent route (chromosome) with the lowest fitness among the randomly chosen tournament participants.
    """
    tournament = random.sample(list(zip(population, fitness)), TOURNAMENT_SIZE)
    return min(tournament, key=lambda x: x[1])[0]

def ordered_crossover(parent1, parent2):
    """
    Performs Ordered Crossover (OX) between two parent routes to produce a single offspring.
    
    This genetic operator is designed for permutation-based problems like the Traveling Salesperson Problem. 
    It constructs a child route by first copying a random, contiguous segment from the first parent. 
    The remaining delivery points are then added from the second parent in the order they appear, 
    skipping any points that are already present in the child's route.
    
    Args:
        parent1 (list): The first parent route (chromosome).
        parent2 (list): The second parent route (chromosome).
    
    Returns:
        list: A new child route resulting from the crossover of the two parents.
    """
    start, end = sorted(random.sample(range(len(parent1)), 2))
    child = [-1] * len(parent1)
    child[start:end+1] = parent1[start:end+1]
    # Fill remaining positions from parent2
    current_pos = 0
    for gene in parent2:
        if gene not in child:
            while current_pos < len(child) and child[current_pos] != -1:
                current_pos += 1
            if current_pos < len(child):
                child[current_pos] = gene
    return child

def swap_mutation(route):
    """
    Applies swap mutation to a given route based on the MUTATION_RATE.
    
    This operator introduces genetic diversity into the population by randomly selecting two points (genes)
    in a route (chromosome) and swapping their positions. The mutation is probabilistic and only occurs 
    if a random float is less than the globally defined MUTATION_RATE. 
    This helps the algorithm explore new solutions and avoid premature convergence to a local optimum.
    
    Args:
        route (list): The individual route (chromosome) to be subjected to mutation.
    
    Returns:
        list: The route after the potential swap mutation. If the mutation condition is not met, 
        the original route is returned unmodified.
    """
    if random.random() < MUTATION_RATE:
        i, j = random.sample(range(len(route)), 2)
        route[i], route[j] = route[j], route[i]
    return route

# def plot_route(points, best_path):
#     """
#     Plots the final, optimized route on a 2D scatter plot.
#     """
#     # Reorder points according to the best path found
#     ordered_points = points[best_path + [best_path[0]]] # Add start point to the end to close the loop
    
#     plt.figure(figsize=(10, 8))
#     # Plot the route as a line
#     plt.plot(ordered_points[:, 1], ordered_points[:, 0], 'b-', label='Optimized Route')
#     # Plot the depot (start/end point)
#     plt.plot(points[0, 1], points[0, 0], 'go', markersize=12, label='Depot (Start/End)')
#     # Plot the delivery locations
#     plt.plot(points[1:, 1], points[1:, 0], 'ro', markersize=8, label='Delivery Points')
    
#     plt.title('Optimized Delivery Route')
#     plt.xlabel('Longitude')
#     plt.ylabel('Latitude')
#     plt.legend()
#     plt.grid(True)
#     plt.show()

def plot_route_plotly(points, best_path):
    """
    Plots the final, optimized route on an interactive 2D scatter plot using Plotly.
    Hover data shows the route order like Depot -> 1 -> 2 -> ... -> N.
    
    Args:
        points (numpy.array): An array of all coordinates, with the depot at index 0.
        best_path (list): A list of indices representing the optimized route.
    """
    # --- FIX 1: Ensure the path always starts at the depot (index 0) ---
    # The ACO algorithm might return a valid tour that doesn't start at the depot.
    # We rotate the path list to ensure it begins with index 0.
    if best_path[0] != 0:
        depot_start_index = best_path.index(0)
        best_path = best_path[depot_start_index:] + best_path[:depot_start_index]

    # Create the full path for plotting, including the return to the depot.
    path_order = best_path + [best_path[0]]
    ordered_points = points[path_order]
    
    # --- FIX 2: Correct the hover text numbering for stops ---
    hover_texts = []
    for i, point_index in enumerate(path_order):
        if i == 0:
            hover_texts.append("Depot (Start)")
        elif i == len(path_order) - 1:
            # This is the final point, the return to the depot.
            hover_texts.append("Return to Depot")
        else:
            # The original code used i+1, causing an off-by-one error.
            # Using 'i' provides the correct 1-based stop number (Stop 1, Stop 2, etc.).
            hover_texts.append(f"Delivery Stop {i}")
            
    # Initialize the Plotly figure.
    fig = go.Figure()
    
    # Add the main route trace (lines and markers).
    fig.add_trace(go.Scatter(
        x=ordered_points[:, 1],  # Longitude
        y=ordered_points[:, 0],  # Latitude
        hovertext=hover_texts,
        hoverinfo="text", # Show only the hover text on hover
        mode='lines+markers',
        line=dict(color='blue', width=2),
        marker=dict(
            size=8,
            color='lightblue',
            symbol='circle',
            line=dict(width=1, color='DarkSlateGrey')
        ),
        name='Optimized Route'
    ))
    
    # Add a separate, more prominent marker for the depot.
    fig.add_trace(go.Scatter(
        x=[points[0, 1]],
        y=[points[0, 0]],
        hovertext=["Depot (Start/End)"],
        hoverinfo="text",
        mode='markers',
        marker=dict(
            size=14,
            color='green',
            symbol='star',
            line=dict(width=1, color='black')
        ),
        name='Depot'
    ))
    
    # Update the layout for a clean, professional look.
    fig.update_layout(
        title="Optimized Delivery Route",
        xaxis_title="Longitude",
        yaxis_title="Latitude",
        showlegend=False,
        hovermode='closest',
        height=700
    )

    return fig


In [None]:
# Initialize population with random routes until population size is reached
population = [create_route(points) for _ in range(POPULATION_SIZE)]
best_fitness = float('inf')  # Initialize best fitness as infinity for comparison
best_route = None            # Placeholder for the best route found
history = []                 # List to store best fitness value of each generation
distance_matrix = calculate_distance_matrix(points)

for gen in range(GENERATIONS):
    # Evaluate fitness of each route in the population
    fitness = [route_distance(route, distance_matrix) for route in population]
    
    # Track the best solution in the current generation
    current_best = min(fitness)  # Find the shortest route distance
    if current_best < best_fitness:
        best_fitness = current_best                           # Update best fitness if current is better
        best_route = population[fitness.index(current_best)]  # Update best route accordingly
    history.append(current_best)  # Record best fitness for this generation
    
    # Prepare to create the next generation of routes
    new_population = []
    
    # Elitism: preserve top elite routes to ensure best solutions are retained
    elite_indices = np.argsort(fitness)[:ELITISM_COUNT]  # Indices of best routes by fitness
    new_population.extend([population[i] for i in elite_indices])  # Add elites to new population

    # Generate the rest of the new population through selection, crossover, and mutation
    while len(new_population) < POPULATION_SIZE:
        parent1 = tournament_selection(population, fitness)  # Select first parent by tournament
        parent2 = tournament_selection(population, fitness)  # Select second parent by tournament
        child = ordered_crossover(parent1, parent2)          # Create child route by crossover
        child = swap_mutation(child)                         # Mutate child route to maintain diversity
        new_population.append(child)                         # Add child to new population

    # Replace old population with the new generation
    population = new_population

In [None]:
# Visualization
plt.figure(figsize=(10, 8))
# plt.subplot(1, 2, 1)
plt.plot(history)
plt.title('Optimization Progress')
plt.xlabel('Generation')
plt.ylabel('Best Distance (meters)')

fig = plot_route_plotly(points, [0] + best_route)
fig.show()
print(f"Best route distance: {best_fitness:.4f} meters")

In [None]:
fig.write_html("optimized_route_ga.html")

In [None]:
!pwd