In [None]:
CURR_DIR = ".."

## Imports

In [None]:
REQUIREMENTS_PATH = f"{CURR_DIR}/requirements/base.txt"

!pip install -r {REQUIREMENTS_PATH}

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys; sys.path.append(CURR_DIR)
import os
import json
from typing import List, Callable, Tuple
from src.file_utils import load_tsplib, plot_fitness
from src.genetic_algorithm import GeneticAlgorithm
from src.crossover import order_crossover, partially_mapped_crossover
from src.mutation import inversion_mutation, relocation_mutation

## Experiments

In [None]:
def run_ga(
    dataset: str,
    population_sizes: List[int] = [200, 300, 400],
    crossover_rates: List[float] = [0.7, 0.8, 0.9],
    crossover_funcs: List[Callable[[List[int], List[int]], Tuple[List[int], List[int]]]] = 
        [order_crossover, partially_mapped_crossover],
    mutation_rates: List[float] = [0.05, 0.1, 0.2],
    mutation_funcs: List[Callable[[List[int]], None]] = [inversion_mutation, relocation_mutation],
    elitism_rate: float = 0.05,
    tournament_size: int = 3,
    generations: int = 4000,
    init_population_random_rate: float = 0.95,
    early_stop_threshold: int = 100
) -> None:
    """
    Runs a genetic algorithm on a dataset for various combinations of population sizes, crossover
    rates, mutation rates, and crossover and mutation functions.
    
    Args:
        dataset: The name of the dataset (should correspond to a `.tsp` file in `data/datasets`).
        population_sizes: A list of population sizes to test (default is [200, 300, 400]).
        crossover_rates: A list of crossover rates to test (default is [0.7, 0.8, 0.9]).
        crossover_funcs: A list of crossover functions to test (default is [order_crossover, partially_mapped_crossover]).
        mutation_rates: A list of mutation rates to test (default is [0.05, 0.1, 0.2]).
        mutation_funcs: A list of mutation functions to test (default is [inversion_mutation, relocation_mutation]).
        generations: The number of generations to run the algorithm for (default is 4000).
        elitism_rate: The proportion of individuals to retain through elitism (default is 0.05).
        tournament_size: The size of the tournament for selection (default is 3).
        init_population_random_rate: The probability of initializing an individual randomly (default is 0.95).
        early_stop_threshold: The number of generations without improvement before stopping (default is 100).
    """
    coords = load_tsplib(os.path.join(CURR_DIR, f"data/datasets/{dataset}.tsp"))
    
    for population_size in population_sizes:
        for crossover_rate in crossover_rates:
            for mutation_rate in mutation_rates:
                for crossover_func in crossover_funcs:
                    for mutation_func in mutation_funcs:
                        ga = GeneticAlgorithm(
                            coords,
                            population_size,
                            crossover_rate,
                            crossover_func,
                            mutation_rate,
                            mutation_func,
                            generations,
                            elitism_rate,
                            tournament_size,
                            init_population_random_rate,
                            early_stop_threshold
                        )
                        ga.run()

                        results_path = os.path.join(
                            CURR_DIR,
                            f"data/results/{dataset}/pop{population_size}_{crossover_rate}"
                            f"{crossover_func.__name__}_{mutation_rate}{mutation_func.__name__}.json"
                        )
                        ga.save_results(results_path)


def analyse_results(dataset: str, skip: int = 0) -> None:
    """
    Analyzes the results of the genetic algorithm for a given dataset. This function loads the 
    result files, finds the best configuration, and plots the fitness over generations.

    Args:
        dataset: The name of the dataset.
        skip: The number of initial generations to skip in the average fitness plot (default is 0).
    """
    results_dir = os.path.join(CURR_DIR, f"data/results/{dataset}")
    results_paths = [
        os.path.join(results_dir, file)
        for file in os.listdir(results_dir)
        if file.endswith(".json")
    ]

    all_results = []
    for path in results_paths:
        with open(path, 'r') as file:
            data = json.load(file)
            all_results.append(data)
    
    if all_results:
        best_idx = min(range(len(all_results)), key=lambda i: all_results[i]["best_distance"])
        best_config = all_results[best_idx]
        best_path = results_paths[best_idx]

        print(f"Best configuration: {best_config}")
        plot_fitness(best_path, dataset, skip)

### `berlin52`

In [None]:
run_ga("berlin52")

In [None]:
analyse_results("berlin52")

### `kroA100`

In [None]:
run_ga("kroA100")

In [None]:
analyse_results("kroA100")

### `pr1002`

In [None]:
run_ga("pr1002", elitism_rate=0.12, tournament_size=15)

In [None]:
analyse_results("pr1002", skip=5)