In [None]:
import os
import csv
import time
import folium
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from geopy.distance import geodesic
from typing import List, Dict, Tuple, Generator

In [None]:
####### Global performance def #######

"""

The purpose of this code is to define a decorator function called measure_time that 
can be used to measure the execution time of other functions. By applying this decorator 
to a function, you can easily track how long it takes to execute.

When the measure_time decorator is applied to a function, it wraps the original 
function with a wrapper function. The wrapper function checks if the measurement is enabled. 
If it is, the wrapper function records the start time, calls the original function, 
records the end time, calculates the execution time, and prints the results. 
Finally, it returns the result of the original function.


"""

def measure_time(func):
    # Get the enabled value outside the wrapper
    enabled = measure_time.enabled  

    def wrapper(*args, **kwargs):
        # ensure we're using the latest settings
        enabled = measure_time.enabled  
        if enabled:
            
            start_time = time.time()  
            # Call the original function with the provided arguments
            result = func(*args, **kwargs)  
            end_time = time.time()  
            execution_time = end_time - start_time 
            
            print(f" the Function: {func.__name__} took: {execution_time} seconds") 
            return result  # Return the result of the decorated function
        else:
            # If the decorator is disabled, simply call the original function
            return func(*args, **kwargs)  

    # Return the wrapper function only if enabled, else return the original function
    return wrapper if enabled else func


####### ####### ####### ####### #######

In [None]:
####### Global ploting system #######

"""

The purpose of this code is to define a function called plot_cities that creates a map 
with markers and lines to visualize city data. The function takes 
two parameters: city_data, which contains the information about the cities to be 
plotted, and bound (optional), which specifies the order of cities to be plotted.

The plot_cities function contains several helper functions:

* arrange_cities_nearest: This function arranges the cities in city_data in the order 
specified by path. It sorts the cities based on their index in the path list, ensuring 
that the cities are plotted in the desired order.

* create_map_center: This function extracts the coordinates of the first city in 
city_data and returns them as the center of the map.

* create_custom_icon_style: This function defines a custom icon style for the markers 
on the map. It sets the background color, text color, border radius, padding, font weight, 
and font size.

* add_marker_with_icon: This function adds a marker with a custom icon to the map. 
It takes the coordinates, name, icon style, and index as parameters and creates a marker 
with a numbered icon at the specified coordinates.

* draw_line_between_cities: This function draws a line between two cities on the map. 
It takes the coordinates of the current city, the coordinates of the next city, the names 
of both cities, and the index as parameters. It creates a polyline connecting the two cities with 
a tooltip indicating the route number and the city names.

"""

def plot_cities(city_data, bound = None):
    
    def arrange_cities_nearest(city_data, path):
        city_names = citiesSample[:, -1]
        sorted_indices = np.argsort([path.index(city) for city in city_names])
        sorted_citiesSample = citiesSample[sorted_indices]
        return np.vstack((sorted_citiesSample, sorted_citiesSample[0]))

    def create_map_center():
        return [float(city_data[0][2]), float(city_data[0][3])]

    def create_custom_icon_style():
        return """
            background-color: #ff5959;
            color: #ffffff;
            border-radius: 100%;
            padding: 20%;
            text-align: center;
            font-weight: bold;
            font-size: auto;
        """

    def add_marker_with_icon(coords, name, icon_style, i):
        folium.Marker(
            coords,
            popup=name,
            icon=folium.DivIcon(
                icon_size=(24, 24),
                icon_anchor=(12, 12),
                html='<div style="{}">{}</div>'.format(icon_style, i + 1)
            )
        ).add_to(map_obj)

    def draw_line_between_cities(coords, next_coords, name, next_city, i):
        folium.PolyLine(
            [coords, next_coords],
            color='blue',
            weight=2.5,
            opacity=1.0,
            tooltip='Route {}: {} -> {}'.format(i + 1, name, next_city[4])
        ).add_to(map_obj)

    # Create a map centered on the first city's coordinates
    map_center = create_map_center()
    map_obj = folium.Map(location=map_center, zoom_start=6)

    if bound:
        city_data = arrange_cities_nearest(city_data, bound)
        # Iterate over the city data and plot markers and lines
        for i in range(len(city_data) - 1):
            city = city_data[i]
            coords = [float(city[2]), float(city[3])]
            name = city[4]

            icon_style = create_custom_icon_style()

            add_marker_with_icon(coords, name, icon_style, i)

            next_city = city_data[i + 1]
            next_coords = [float(next_city[2]), float(next_city[3])]

            draw_line_between_cities(coords, next_coords, name, next_city, i)
    else:
        # Iterate over the city data and create a folium marker for each city
        for city in city_data:
            folium.Marker(
                location=[float(city[2]), float(city[3])],
                popup=city[4]
            ).add_to(map_obj)

    # Display the map
    return map_obj

####### ####### ####### ####### #######

In [None]:
####### Global notebook configs #######

# Toggle for enabling/disabling the 
# decorator
measure_time.enabled = True 
if measure_time.enabled:
    print("* measure_time is enabled ")

# specify the folder path and files name 
dataset_file_path = os.path.join('../datasets', 'cities.csv')
print(f"* the selected dataset is located at: {dataset_file_path}")

####### ####### ####### ####### #######

# City Generator 

this part contains the folowing logic: we first retrieve data from a dataset and later construct a sample from it that contain the cities name, the ZIP Code, the population count and longitude|latitude 

<u>non-optimized</u>

In [None]:
@measure_time
def read_csv_to_tuple(filename: str):
    with open(filename, "r", encoding='ISO-8859-1') as fh:  # Open the file in read mode
        # Create a CSV reader object with delimiter ';'
        reader = csv.reader(fh, delimiter=';')  
        # Skip the header row
        next(reader, None)  
        # Convert the remaining rows to a tuple
        cities = tuple(reader)  
    return cities  # Return the tuple


@measure_time
def sample_N_from_tuple(cities: tuple, size: int = None):
    totalRows = len(cities)
    # If size is not specified or greater than totalRows Return 
    # an empty tuple
    if size is None or size > totalRows:  
        return ()
    # Return a random sample of 'size' elements from the tuple
    return random.sample(cities, size)  

In [None]:
citiesTuple = read_csv_to_tuple(dataset_file_path)
citiesSample = sample_N_from_tuple(citiesTuple, 10)

<u>optimized</u>

- Reads the CSV file using the pandas library's read_csv function.
    - Efficient CSV reading: The optimized version uses pandas' read_csv function, which is highly optimized for reading CSV files. It takes advantage of optimized file parsing algorithms and efficient memory management, resulting in faster file reading compared to the line-by-line reading in the non-optimized version.

- Converts the DataFrame to a tuple of lists and then to a tuple.
    - In the non-optimized version, each row from the CSV file is converted to a tuple individually. In the optimized version, pandas converts the entire DataFrame to a tuple of lists in one operation, which is more efficient and faster.
    
- Uses pandas and NumPy functions for sampling instead of the random.sample function.
    - The non-optimized version uses the random.sample function to sample elements from the tuple. In the optimized version, NumPy's random.choice function is used, which is implemented in optimized C code and performs faster random sampling.
    - The optimized version uses pandas' iloc function to extract the sampled data based on the selected indices. This indexing operation is optimized in pandas and provides faster access to the desired rows.

In [None]:
@measure_time
def read_csv_to_tuple(filename: str):
    # Read the CSV file using pandas
    df = pd.read_csv(filename, delimiter=';', encoding='ISO-8859-1')
    # Convert the DataFrame to a tuple of lists and then to a tuple
    cities = tuple(df.values.tolist())  
    return cities


@measure_time
def sample_N_from_tuple(cities: tuple, size: int = None):
    # Create a DataFrame from the tuple of lists
    df = pd.DataFrame(list(cities))
    # Get the total number of rows in the DataFrame
    totalRows = len(df)
    
    # If size is not specified or greater than totalRows
    # Return an empty tuple
    if size is None or size > totalRows:  
        return ()
    
    # Randomly select 'size' indices without replacement
    indices = np.random.choice(totalRows, size, replace=False)
    # Extract the sampled data based on the selected indices
    sampled_data = df.iloc[indices].values.tolist()  
    # Return the sampled data as a NumPy array
    return np.array(sampled_data)  

In [None]:
citiesTuple = read_csv_to_tuple(dataset_file_path)
citiesSample = sample_N_from_tuple(citiesTuple, 100)

<u>the actual output of the city generator section</u>

In [None]:
# disable performance profiling for this section 
measure_time.enabled = False

citiesTuple = read_csv_to_tuple(dataset_file_path)
citiesSample = sample_N_from_tuple(citiesTuple, 100)

# display the map with the selected cities
plot_cities(citiesSample)

# location generator 
The purpose of this staged is to generate a series of city names along with their respective longitude and latitude coordinates. It achieves this by extracting the relevant information from a given list of city data

In [None]:
# enable performance profiling for this section 
measure_time.enabled = True

<u>non-optimized</u>

In [None]:
@measure_time
def create_location_generator(citiesSample: List[List[str]]) -> Dict[str, Tuple[float, float]]:
    # Create an empty dictionary to store the location data
    tmp = {}  
    
    for city in citiesSample:  # Iterate over each city in citiesSample
        city_name = city[4]  # Get the city name from the city data
        longitude = float(city[3])  # Get the longitude from the city data and convert it to float
        latitude = float(city[2])  # Get the latitude from the city data and convert it to float
        tmp[city_name] = (longitude, latitude)  # Store the longitude and latitude as a tuple in the dictionary
    
    # Return the dictionary containing the location data
    return tmp

In [None]:
location = create_location_generator(citiesSample)

<u>optimized</u>

- Uses NumPy array indexing to extract city names, longitudes, and latitudes from the citiesSample list in one operation.
    - The optimized version leverages NumPy's array indexing and vectorized operations to extract the necessary data from the citiesSample list. This allows for faster and more efficient data extraction compared to the iterative approach in the non-optimized version.
- Converts the longitudes and latitudes to float using NumPy's astype function.
    - In the non-optimized version, the conversion to float is performed individually for each longitude and latitude. In the optimized version, NumPy's astype function is applied to the entire arrays of longitudes and latitudes in one operation. This bulk conversion is more efficient and faster.
- Utilizes the zip function and generator syntax (yield from) to create a generator that yields tuples of city names and corresponding longitude-latitude pairs.
    - The optimized version uses a generator and the yield from syntax to produce the desired output. Generators provide a memory-efficient way to produce values on-the-fly, as opposed to constructing and returning a complete dictionary in the non-optimized version. This can improve performance, especially when dealing with large datasets.


In [None]:
@measure_time
def create_location_generator(citiesSample: List[List[str]]) -> Generator[Tuple[str, Tuple[float, float]], None, None]:
    # Extract city names from citiesSample using NumPy array indexing
    city_names = np.array(citiesSample)[:, 4]
    # Extract longitudes and convert them to float using NumPy array indexing
    longitudes = np.array(citiesSample)[:, 3].astype(float)
    # Extract latitudes and convert them to float using NumPy array indexing
    latitudes = np.array(citiesSample)[:, 2].astype(float)

    yield from zip(city_names, zip(longitudes, latitudes))

In [None]:
location = create_location_generator(citiesSample)

<u>the actual output of the location generator section</u>

In [None]:
# disable performance profiling for this section 
measure_time.enabled = False

for city_name, coordinates in create_location_generator(citiesSample):
    print(f'{city_name}: {coordinates}')

# distance matrix generator

this part calculate a distance matrix for a set of cities based on their geographic coordinates. 

In [None]:
# enable performance profiling for this section 
measure_time.enabled = True

<u>non-optimized</u>

In [None]:
@measure_time
def calculate_distance_matrix(generator) -> Dict[str, Dict[str, float]]:
    distance_matrix = {}  # Create an empty dictionary to store the distance matrix
    city_coords = []  # Create an empty list to store city names and coordinates
    
    # Iterate over each city name and coordinates from the generator and 
    # append the city name and coordinates as a tuple to city_coords
    for city_name, coordinates in generator:
        city_coords.append((city_name, coordinates)) 
    
    # Iterate over the city name and coordinates using enumerate
    for i, (city1, coords1) in enumerate(city_coords):
        
        # Create an empty dictionary for each city in the distance matrix
        distance_matrix[city1] = {}
        
        # Iterate over the city name and coordinates again
        for j, (city2, coords2) in enumerate(city_coords):  
            if i == j:
                # Set the distance between a city and itself to 0.0
                distance_matrix[city1][city2] = 0.0 
            else:
                # Calculate the geodesic distance between two coordinates
                distance = geodesic(coords1, coords2).kilometers
                # Store the distance in the distance matrix
                distance_matrix[city1][city2] = distance  
    
    return distance_matrix

In [None]:
distance_matrix = calculate_distance_matrix(
    create_location_generator(citiesSample)
)

<u>optimized</u>

- The optimized version directly stores the city coordinates in a dictionary (city_coords), eliminating the need for additional data structures like the city_coords list in the non-optimized version. This reduces memory usage and unnecessary operations, resulting in improved performance.
- The optimized version utilizes NumPy's vectorized operations to calculate distances between pairs of coordinates. By converting the coordinates to a NumPy array and using broadcasting, the calculations can be performed efficiently in parallel, leading to significant speed improvements.
- Instead of constructing an empty dictionary for each city, the optimized version creates a square matrix (distances) with zeros to store the distance values. This allows for efficient indexing and updating of the distances using NumPy operations.
- The optimized version converts the distances matrix to a pandas DataFrame, which provides efficient indexing capabilities and convenient conversion to a dictionary. This avoids nested loops and dictionary updates in the non-optimized version, resulting in improved performance.

<u>the actual output of the distance matrix generator section</u>

In [None]:
# disable performance profiling for this section 
measure_time.enabled = False

calculate_distance_matrix(
    create_location_generator(citiesSample)
)

# ant colony optimization algorithm

In [None]:
"""

The ant_colony_optimizer function takes a distance_matrix as input along with optional parameters 
for the number of ants, iterations, and other factors. It initializes variables for cities, city 
indices, distance matrix, pheromone matrix, and the best path and distance.

It converts the distance matrix into a NumPy array for efficient computations.
The main iteration loop runs for the specified number of iterations. Inside each iteration, 
the following steps are performed:
    a. Paths are constructed for all ants by calling the construct_ant_paths function, 
        which generates a path for each ant.
    b. The pheromone matrix is updated based on the paths constructed by the ants. 
        This is done by calling the update_pheromone_matrix function, which calculates 
        the delta pheromone values and updates the pheromone matrix accordingly.
    c. The best path and distance for the current iteration are determined by calling 
        the get_best_solution function, which compares the distances of all paths and 
        selects the best one.
    d. If the current iteration's best path is better than the overall best path, the 
        best path and distance are updated.

Finally, the function returns the best path and distance found during the iterations.

* The construct_ant_paths function generates paths for each ant. It iterates over the number 
of ants and calls the construct_path function to construct a path for an individual ant. 
The paths are stored in a list and returned.

* The construct_path function constructs a path for a single ant. It randomly selects a 
starting city, creates a set of unvisited cities, and iteratively chooses the next city 
based on pheromone levels and heuristic information until all cities have been visited. 
The path is returned as a list.

* The choose_next_city function selects the next city for an ant to visit. It calculates 
the attractiveness values for the unvisited cities based on pheromone levels and distances. 
The probabilities of choosing each city are calculated, and a city is randomly selected based 
on these probabilities. The index of the chosen city is returned.

* The update_pheromone_matrix function updates the pheromone matrix based on the paths 
constructed by the ants. It iterates over the paths, calculates the path distance, 
and updates the delta pheromone values for the edges along the path. The existing pheromone 
levels are evaporated globally, and the new delta pheromone values are deposited. 
The pheromone matrix is updated in-place.

* The get_path_distance function calculates the total distance of a given path by summing 
the distances between consecutive cities.

* The get_best_solution function finds the best path and distance among all the paths. 
It iterates over the paths, calculates the distance for each path, and compares it with 
the current best distance. If a better path is found, the best path and distance are updated.


"""
def ant_colony_optimizer(distance_matrix, num_ants=10, num_iterations=100, alpha=1.0, beta=5.0, evaporation_rate=0.5, pheromone_deposit=100.0):
    cities = list(distance_matrix.keys())  # Get the list of cities from the distance matrix
    num_cities = len(cities)  # Determine the number of cities
    
    # Create a dictionary mapping each city to its index
    city_indices = {city: index for index, city in enumerate(cities)}

    # Create a NumPy array to store the distance matrix
    distance_matrix_np = np.zeros((num_cities, num_cities))
    for i, city1 in enumerate(cities):  # Iterate over all pairs of cities
        for j, city2 in enumerate(cities):
            # Assign the distance value to the appropriate element in the array
            distance_matrix_np[i, j] = distance_matrix[city1][city2]

    # Initialize the pheromone matrix with all ones
    pheromone_matrix = np.ones((num_cities, num_cities))

    best_path = None  
    best_distance = np.inf  # Initialize the best distance to positive infinity

    # Create a progress bar using tqdm with customized parameters
    progress_bar = tqdm(
        total=num_iterations, 
        desc="Ant Colony Optimization", 
        unit="iter",
        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
    )

    for _ in range(num_iterations):  # Perform iterations
        # Generate paths for all ants
        paths = construct_ant_paths(num_ants, cities, city_indices, pheromone_matrix, distance_matrix_np, alpha, beta)  

        # Update the pheromone matrix based on ant paths
        update_pheromone_matrix(paths, pheromone_matrix, evaporation_rate, pheromone_deposit, distance_matrix_np, city_indices)

        # Determine the best path and its distance for the current iteration
        iteration_best_path, iteration_best_distance = get_best_solution(paths, distance_matrix_np, city_indices)  

        # Check if the current iteration's best path is better than the overall best path
        if iteration_best_distance < best_distance:
            best_path = iteration_best_path  # Update the best path
            best_distance = iteration_best_distance  # Update the best distance

        # Update the progress bar
        progress_bar.update(1)

    # Close the progress bar
    progress_bar.close()

    return best_path, best_distance


def construct_ant_paths(num_ants, cities, city_indices, pheromone_matrix, distance_matrix, alpha, beta):
    paths = []

    for _ in range(num_ants):  # Generate paths for each ant
        
        # Construct a path for a single ant
        path = construct_path(cities, city_indices, pheromone_matrix, distance_matrix, alpha, beta)
        paths.append(path)  # Add the path to the list

    return paths


def construct_path(cities, city_indices, pheromone_matrix, distance_matrix, alpha, beta):
    start_city = np.random.choice(cities)  # Randomly select a starting city
    unvisited_cities = set(range(len(cities)))  # Create a set of unvisited cities
    unvisited_cities.remove(city_indices[start_city])  # Remove the starting city from the set of unvisited cities
    path = [start_city]  # Initialize the path with the starting city

    while unvisited_cities:  # While there are unvisited cities
        # Choose the next city to visit
        next_city_index = choose_next_city(city_indices[start_city], unvisited_cities, pheromone_matrix, distance_matrix, alpha, beta)
        next_city = cities[next_city_index]  # Get the next city
       
        path.append(next_city)  # Add the next city to the path
        unvisited_cities.remove(next_city_index)  # Remove the next city from the set of unvisited cities
        start_city = next_city  # Update the current city

    path.append(path[0])  # Add the first city to the end of the path to return to the starting city

    return path


def choose_next_city(current_city_index, unvisited_cities, pheromone_matrix, distance_matrix, alpha, beta):
    # Get the pheromone values for the unvisited cities
    pheromone_values = np.array([pheromone_matrix[current_city_index, city_index] for city_index in unvisited_cities])
    # Get the distances to the unvisited cities
    distances = np.array([distance_matrix[current_city_index, city_index] for city_index in unvisited_cities])
    
    # Calculate the attractiveness values
    attractiveness = np.power(pheromone_values, alpha) * np.power(1.0 / distances, beta)
     # Calculate the probabilities for choosing the next city
    probabilities = attractiveness / np.sum(attractiveness) 

    # Randomly choose the next city based on the probabilities
    next_city_index = np.random.choice(list(unvisited_cities), p=probabilities)  
    return next_city_index


def update_pheromone_matrix(paths, pheromone_matrix, evaporation_rate, pheromone_deposit, distance_matrix, city_indices):
    # Create a delta pheromone matrix with the same shape as the pheromone matrix
    delta_pheromone = np.zeros_like(pheromone_matrix)

    for path in paths:  
        path_distance = get_path_distance(path, distance_matrix, city_indices)
        
        # Update pheromone values along the path
        for i in range(len(path) - 1):  
            from_city = path[i]
            to_city = path[i + 1]
            from_city_index = city_indices[from_city]
            to_city_index = city_indices[to_city]
            
            # Update the delta pheromone value
            delta_pheromone[from_city_index, to_city_index] += pheromone_deposit / path_distance  

    pheromone_matrix *= evaporation_rate  # Evaporate the existing pheromone
    pheromone_matrix += delta_pheromone  # Deposit the new pheromone


def get_path_distance(path, distance_matrix, city_indices):
    distance = 0

    for i in range(len(path) - 1):
        from_city = path[i]
        to_city = path[i + 1]
        from_city_index = city_indices[from_city]
        to_city_index = city_indices[to_city]
        distance += distance_matrix[from_city_index, to_city_index]

    return distance


def get_best_solution(paths, distance_matrix, city_indices):
    best_path = None
    best_distance = np.inf

    for path in paths:  # Find the best path among all paths
        # Calculate the distance of the path
        path_distance = get_path_distance(path, distance_matrix, city_indices) 
        
        # Check if the path is better than the current best path
        if path_distance < best_distance: 
            best_path = path  # Update the best path
            best_distance = path_distance  # Update the best distance

    return best_path, best_distance


In [None]:
distance_matrix = calculate_distance_matrix(
    create_location_generator(citiesSample)
)

best_path, best_distance = ant_colony_optimizer(
    distance_matrix, 
    num_ants=10, 
    num_iterations=100
)

print(f"Best Distance: {best_distance} km")
print("Best Route:", ' -> '.join(best_path))

In [None]:
plot_cities(citiesSample, best_path)