# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [71]:
from random import choices, randint, random, choice, shuffle
from copy import deepcopy
from tqdm import trange
import numpy as np
import math
from itertools import combinations

import lab9_lib

In [3]:
NUM_GENOMES = 1000
TOURNAMENT_SIZE = 2

In [5]:
class Individual:

    '''
    Class of an individual:
    Attributes:
        genomes: list of 0,1
        fitness: the fitness value of this individual

    Methods :
        mutate: random mutation of a gene
    '''

    genomes: list
    fitness: float

    def __init__(self, genomes=None):
        if genomes == None:
            self.genomes = choices([0, 1], k=1000)
        else:
            self.genomes = genomes

    def mutate(self):
        mutated_genomes = deepcopy(self.genomes)
        index = choice(range(len(self.genomes)))
        if self.genomes[index] == 1:
            mutated_genomes[index] = 0
        else:
            mutated_genomes[index] = 1
        return Individual(genomes=mutated_genomes)


def tournament_selection(population: list[Individual]) -> Individual:
    '''
    Implementation of tournament selection, from a population get a champion.
        population
    Args:
        population: list of individuals

    Return:
        The champion (Individual) of the tournament.
    '''
    pool = choices(population, k=TOURNAMENT_SIZE)
    champion = max(pool, key=lambda i: i.fitness)
    return champion


def uniform_xover(ind1: Individual, ind2: Individual) -> Individual:
    '''
    Implementation of uniform crossover.
        population
    Args:
        ind1: first individual
        ind2: second individual

    Return:
        The combination of ind1 and ind2 (Individual) after uniform crossover.
    '''
    offspring_genotype = [ind1.genomes[i] if random() < 0.5 else ind2.genomes[i] for i in range(NUM_GENOMES)]
    return Individual(genomes=offspring_genotype)


def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    '''
    Implementation of one cut crossover.
        population
    Args:
        ind1: first individual
        ind2: second individual

    Return:
        The combination of ind1 and ind2 (Individual) after one cut crossover.
    '''
    cut_point = randint(0, NUM_GENOMES - 1)
    offspring = Individual(genomes=ind1.genomes[:cut_point] + ind2.genomes[cut_point:])
    return offspring

In [12]:
NUM_GENERATION = 30_000

# not used here
PERCENTAGE_EXTINCTION = None

POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
ONLY_MUTATION_PROBABILITY = 0.5
STD_THRESHOLD = 0.0005

In [111]:
def ga_algorithm(problem_type: int, parent_selection: callable, xover: callable, extinction=False) -> Individual:
    '''
    Implementation of a Genetic Algorithm.

    Args:
        problem_type: Value for the problem instace (1,2,5,10)
        parent_selection: function to performe a parent selection
        xover: function to performe a crossover
        extinction: flag, True to promove diversity with extixtion, False otherwise

    Return:
        The best individual after the GA.
    '''

    fitness = lab9_lib.make_problem(problem_type)
    population = [Individual() for _ in range(POPULATION_SIZE)]

    for i in population:
        i.fitness = fitness(i.genomes)

    population.sort(key=lambda i: i.fitness, reverse=True)

    best_individual = population[0]

    pbar = trange(0, NUM_GENERATION)
    for _ in pbar:
        pbar.set_description(f"Best-individual fitness: {best_individual.fitness}")

        if math.isclose(1, population[0].fitness):
            break

        offspring = list()

        # extinction
        population_fitness = [i.fitness for i in population]
        if extinction and np.std(population_fitness) < STD_THRESHOLD:
            num_indivual_to_extinction = int(POPULATION_SIZE * PERCENTAGE_EXTINCTION)
            population = choices(population, k=POPULATION_SIZE - num_indivual_to_extinction)
            offspring = [Individual() for _ in range(num_indivual_to_extinction)]

        else:
            for _ in range(OFFSPRING_SIZE):
                if random() < ONLY_MUTATION_PROBABILITY:
                    # mutation
                    p = parent_selection(population)
                    o = p.mutate()
                else:
                    # xover and mutation
                    p1 = parent_selection(population)
                    p2 = parent_selection(population)
                    o = xover(p1, p2).mutate()

                offspring.append(o)

        for i in offspring:
            i.fitness = fitness(i.genomes)

        population.extend(offspring)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:POPULATION_SIZE]

        # save the new best individual
        if population[0].fitness > best_individual.fitness:
            best_individual = population[0]

    print(f'Problem {problem_type}\nFitness: {best_individual.fitness}\nSolved with {fitness.calls:,} fitness calls')
    return best_individual

### Problem istance 1



Solution with GA, with Tournament Selection and uniform cross over

In [21]:
best_individual_1 = ga_algorithm(1, tournament_selection, uniform_xover)

Best-individual fitness: 1.0:   3%|▎         | 908/30000 [00:15<08:15, 58.69it/s]  

Problem 1
Fitness: 1.0
Solved with 22,750 fitness calls





Solution with GA, with Tournament Selection and one cut cross over

In [20]:
best_individual_1 = ga_algorithm(1, tournament_selection, one_cut_xover)

Best-individual fitness: 1.0:   5%|▍         | 1375/30000 [00:22<07:51, 60.73it/s]  

Problem 1
Fitness: 1.0
Solved with 34,425 fitness calls





### Problem istance 2
#### Promoting Diversity with **Extinction**
The idea is to change the actual population with new borns based on PERCENTAGE_EXTINCTION, only in the case of a convergence of the fitness in the population.

In [16]:
NUM_GENERATION = 30_000
PERCENTAGE_EXTINCTION = 0.85

POPULATION_SIZE = 50
OFFSPRING_SIZE = 25
ONLY_MUTATION_PROBABILITY = 0.6
STD_THRESHOLD = 0.0005

In [18]:
best_individual_2 = ga_algorithm(2, tournament_selection, uniform_xover, extinction=True)

Best-individual fitness: 1.0:  32%|███▏      | 9580/30000 [02:49<06:02, 56.38it/s]  

Problem 2
Fitness: 1.0
Solved with 240,553 fitness calls





### Problem 5
For the problem istance 5 I inceased the number of the population to 100, and offsprings to 50.

In [94]:
NUM_GENERATION = 50_000
PERCENTAGE_EXTINCTION = 0.95

POPULATION_SIZE = 100
OFFSPRING_SIZE = 50
TOURNAMENT_SIZE = 5
ONLY_MUTATION_PROBABILITY = 0.65
STD_THRESHOLD = 0.0005

In [95]:
best_individual_5 = ga_algorithm(5, tournament_selection, uniform_xover, extinction=True)

Best-individual fitness: 0.875: 100%|██████████| 50000/50000 [18:46<00:00, 44.38it/s]

Problem 5
Fitness: 0.875
Solved with 2,505,500 fitness calls





### Problem 10
For the problem istance 10 I tried two new implementation:
* **Islands**
* **Two level diversity**

In [114]:
NUM_ISLANDS = 4
NUM_MIGRANTS = 5
MIGRATION_STEP = 1_000

NUM_GENERATION = 50_000
PERCENTAGE_EXTINCTION = 0.95

TOURNAMENT_SIZE = 5
POPULATION_SIZE = 100
OFFSPRING_SIZE = 50
ONLY_MUTATION_PROBABILITY = 0.65
STD_THRESHOLD = 0.0005

#### Islands implementation

In [115]:
def ga_algorithm_island(problem_type: int, parent_selection: callable, xover: callable, extinction=False) -> Individual:
    '''
    Implementation of a Genetic Algorithm with Islands and migration between them.

    Args:
        problem_type: Value for the problem instace (1,2,5,10)
        parent_selection: function to performe a parent selection
        xover: function to performe a crossover
        extinction: flag, True to promove diversity with extixtion, False otherwise

    Return:
        The best individual after the GA with Islands.
    '''
    fitness = lab9_lib.make_problem(problem_type)

    populations = [[Individual() for _ in range(POPULATION_SIZE)] for _ in range(NUM_ISLANDS)]

    for population in populations:
        for i in population:
            i.fitness = fitness(i.genomes)

        population.sort(key=lambda i: i.fitness, reverse=True)

    best_individuals = [population[0] for population in populations]
    best_global_individual = max(best_individuals, key=lambda x: x.fitness)

    pbar = trange(0, NUM_GENERATION)
    for generation in pbar:
        # migration
        if (generation + 1) % MIGRATION_STEP == 0:
            # random islands
            shuffle(populations)
            for idx in range(0, NUM_ISLANDS - 1, 2):
                # swap
                tmp = populations[idx][:NUM_MIGRANTS]
                populations[idx + 1][:NUM_MIGRANTS] = populations[idx][:NUM_MIGRANTS]
                populations[idx + 1][:NUM_MIGRANTS] = tmp

        for i in range(NUM_ISLANDS):
            pbar.set_description(f"Best-individual across island fitness: {best_global_individual.fitness}")

            if math.isclose(1, populations[i][0].fitness):
                break

            offspring = list()

            # extinction
            population_fitness = [ind.fitness for ind in populations[i]]
            if extinction and np.std(population_fitness) < STD_THRESHOLD:
                num_indivual_to_extinction = int(POPULATION_SIZE * PERCENTAGE_EXTINCTION)
                populations[i] = choices(populations[i], k=POPULATION_SIZE - num_indivual_to_extinction)
                offspring = [Individual() for _ in range(num_indivual_to_extinction)]

            else:
                for _ in range(OFFSPRING_SIZE):
                    if random() < ONLY_MUTATION_PROBABILITY:
                        # mutation
                        p = parent_selection(populations[i])
                        o = p.mutate()
                    else:
                        # xover and mutation
                        p1 = parent_selection(populations[i])
                        p2 = parent_selection(populations[i])
                        o = xover(p1, p2).mutate()

                    offspring.append(o)

            for ind in offspring:
                ind.fitness = fitness(ind.genomes)

            populations[i].extend(offspring)
            populations[i].sort(key=lambda ind: ind.fitness, reverse=True)
            populations[i] = populations[i][:POPULATION_SIZE]

            # save the new best individual
            if populations[i][0].fitness > best_individuals[i].fitness:
                best_individuals[i] = populations[i][0]

            best_global_individual = max(best_individuals, key=lambda ind: ind.fitness)

    print(
        f'Problem {problem_type}\nFitness: {best_global_individual.fitness}\nSolved with {fitness.calls:,} fitness calls'
    )
    return best_global_individual

In [98]:
best_individual_10 = ga_algorithm_island(10, tournament_selection, uniform_xover, extinction=True)

Best-individual across island fitness: 0.4678: 100%|██████████| 20000/20000 [30:45<00:00, 10.84it/s]             

Problem 10
Fitness: 0.4678
Solved with 4,284,440 fitness calls





#### Two level diversity selection implementation

For this implementation I added a distance metric based on the genotype, to compute the edit distance. So, to compute the number of mutation needed to trasform one individual to the second one I computed the xor between the two individuals and counted the ones.

In [106]:
def edit_distance(ind1: Individual, ind2: Individual) -> int:
    '''
    Compute the edit distance of two Individual as number of mutation that I need to trasform the first individual to the second one and otherwise.

    Args:
        ind1: first individual
        ind2: second individual
    Return:
        The edit distance value
    '''
    dist = sum([e1 ^ e2 for e1, e2 in zip(ind1.genomes, ind2.genomes)])
    return dist


def delete_k_most_similar_individual(population: list[Individual], k: int) -> list[Individual]:
    '''
    Delete from a population the k most similar individuals based on edit distance.

    Args:
        population: the population of individuals
        k: number of individual to delete

    Return:
        New population after the deletion
    '''
    tot_distances = [sum([edit_distance(ind, population[i]) for i in range(POPULATION_SIZE)]) for ind in population]
    indexes = np.argsort(tot_distances)[:k]
    new_population = [population[i] for i in range(POPULATION_SIZE) if i not in indexes]
    return new_population

In [107]:
def ga_algorithm_two_level_diversity(
    problem_type: int, parent_selection: callable, xover: callable, extinction=False
) -> Individual:
    '''
    Implementation of a Genetic Algorithm.

    Args:
        problem_type: Value for the problem instace (1,2,5,10)
        parent_selection: function to performe a parent selection
        xover: function to performe a crossover
        extinction: flag, True to promove diversity with extixtion, False otherwise

    Return:
        The best individual after the GA.
    '''

    fitness = lab9_lib.make_problem(problem_type)
    population = [Individual() for _ in range(POPULATION_SIZE)]

    for i in population:
        i.fitness = fitness(i.genomes)

    population.sort(key=lambda i: i.fitness, reverse=True)

    best_individual = population[0]

    pbar = trange(0, NUM_GENERATION)
    for _ in pbar:
        pbar.set_description(f"Best-individual fitness: {best_individual.fitness}")

        if math.isclose(1, fitness(best_individual.genomes)):
            break

        offspring = list()

        # extinction
        population_fitness = [ind.fitness for ind in population]
        if extinction and np.std(population_fitness) < STD_THRESHOLD:
            num_indivual_to_extinction = int(POPULATION_SIZE * PERCENTAGE_EXTINCTION)
            population = choices(population, k=POPULATION_SIZE - num_indivual_to_extinction)
            offspring = [Individual() for _ in range(num_indivual_to_extinction)]

        else:
            for _ in range(OFFSPRING_SIZE):
                if random() < ONLY_MUTATION_PROBABILITY:
                    # mutation
                    p = parent_selection(population)
                    o = p.mutate()
                else:
                    # xover and mutation
                    p1 = parent_selection(population)
                    p2 = parent_selection(population)
                    p3 = parent_selection(population)

                    # get the 2 parents more distance
                    p1, p2, _ = max(
                        [(i1, i2, edit_distance(i1, i2)) for i1, i2 in combinations([p1, p2, p3], 2)],
                        key=lambda i: i[2],
                    )

                    o = xover(p1, p2).mutate()

                offspring.append(o)

        for i in offspring:
            i.fitness = fitness(i.genomes)

        population.extend(offspring)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:POPULATION_SIZE]

        # save the new best individual
        if population[0].fitness > best_individual.fitness:
            best_individual = population[0]

    print(f'Problem {problem_type}\nFitness: {best_individual.fitness}\nSolved with {fitness.calls:,} fitness calls')
    return best_individual

In [108]:
NUM_GENERATION = 50_000
PERCENTAGE_EXTINCTION = 0.95

TOURNAMENT_SIZE = 5
POPULATION_SIZE = 100
OFFSPRING_SIZE = 50
ONLY_MUTATION_PROBABILITY = 0.65
STD_THRESHOLD = 0.0005

In [103]:
best_individual_10 = ga_algorithm_two_level_diversity(10, tournament_selection, uniform_xover, extinction=True)

Best-individual fitness: 0.495: 100%|██████████| 20000/20000 [06:41<00:00, 49.79it/s]              

Problem 10
Fitness: 0.495
Solved with 1,116,985 fitness calls



