Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [141]:
from random import choices, random, randint
import numpy as np
from copy import deepcopy

import lab9_lib

# GOAL: maximize fitness, minimize calls

In [142]:
fitness = lab9_lib.make_problem(5)
for n in range(20):
    ind = choices([0, 1], k=100)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

0111001100001011100110110101000101001001110100000110011100001001000110000001101100000111001011001101: 9.00%
0110001100001011000000101110110100010100010111101010101010101110010110101000001010001011110010111100: 10.89%
1111000011100000011011001001111000010010110101101111000101001110100000101010101010110011101001101000: 11.80%
1101010111010011010111010001101110100010111110100101010111110001110000010101011010001000101101000100: 11.79%
1111001101001110111101101101011101111010110110010101111110110100111100110110101011100100001001100100: 12.58%
1010001011000100111111110100110100111001101111110011010010011000001010100100110100110011110001100001: 10.78%
0000010011001111100111100111010100100110001110100001110011000110111010010111100101000010101010001011: 12.90%
0100100110101111101110101110100010001110001010101101100000000111101010010111111011001111010000011000: 11.89%
1001101111100110011001110000110000101101110111110111111001011110101000111001110001001101110111110110: 12.57%
10100111110010000110

Our code below

In [143]:
POPULATION_SIZE = 500
OFFSPRING_SIZE = 200
LOCI = 1000
BIT_FLIP_PROBABILITY = 0.15
# SWAP_PROBABILITY = 150 * 1/LOCI
SWAP_PROBABILITY = 0.5
NUM_GENERATION = 20


class Individual:
    def __init__(self):
        self.genotype = choices([0, 1], k=LOCI)
        self.fitness = float("-inf")

In [144]:
# Mutation / recombination or both
def parent_selection(
    population: list[Individual], tournament_size: int
) -> Individual:
    # we also want to take the last best one.
    parents_idx = np.random.choice(
        range(len(population)), size=tournament_size, replace=False
    )
    parents = [population[idx] for idx in parents_idx]
    return max(parents, key=lambda i: i.fitness)


def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    cut_point = randint(0, LOCI - 1)
    new_ind = Individual()
    new_ind.genotype = ind1.genotype[:cut_point] + ind2.genotype[cut_point:]
    assert len(new_ind.genotype) == LOCI
    return new_ind


def two_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    first_cut_point = randint(0, LOCI - 2)
    second_cut_point = randint(first_cut_point, (LOCI - 1))
    new_ind = Individual()
    new_ind.genotype = (
        ind1.genotype[:first_cut_point]
        + ind2.genotype[first_cut_point:second_cut_point]
        + ind1.genotype[second_cut_point:]
    )
    assert len(new_ind.genotype) == LOCI
    return new_ind


def uniform_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    p1 = ind1.fitness / (ind1.fitness + ind2.fitness)
    mask = np.random.choice([True, False], size=LOCI, p=[p1, 1 - p1])
    gene = np.where(mask, ind1.genotype, ind2.genotype)
    new_ind = Individual()
    new_ind.genotype = gene.tolist()
    return new_ind

'''NOT USEFUL'''
# def uniform_cut_xover_old(ind1: Individual, ind2: Individual) -> list[Individual]:
#     ind1.genotype = np.array(ind1.genotype)
#     ind2.genotype = np.array(ind2.genotype)
#     swap_mask = np.random.rand(len(ind1.genotype)) < SWAP_PROBABILITY
#     temp = np.copy(ind1.genotype[swap_mask])
#     ind1.genotype[swap_mask] = ind2.genotype[swap_mask]
#     ind2.genotype[swap_mask] = temp
#     return [ind1, ind2]


def mutate(parent: Individual) -> Individual:
    new_offspring = deepcopy(parent)
    for i in range(LOCI):
        if random() < BIT_FLIP_PROBABILITY:
            new_offspring.genotype[i] = int(not new_offspring.genotype[i])
    return new_offspring


def offspring_generation(
    parent1: Individual, parent2: Individual, mutation_probability: int
) -> Individual:
    offspring = uniform_cut_xover(parent1, parent2)
    offspring = mutate(offspring) if random() < random() < mutation_probability else offspring
    offspring.fitness = fitness(offspring.genotype)
    return offspring

# TODO adattare la population size
# TODO adattare mutation rate e tournament size.
# TODO


def ea() -> Individual:
    fitness_list = [0]
    # starting pouplation of POPULATION_SIZE individuals
    population = [Individual() for _ in range(POPULATION_SIZE)]
    for p in population:
        p.fitness = fitness(p.genotype)
    best_fitness = population[0].fitness
    gen = 0
    fitness_stall = 0 
    mutation_probability = 0.15
    tournament_size = 3
    while fitness_stall < 20 or gen < NUM_GENERATION:
        num_of_better_offspring = 0
        for _ in range(OFFSPRING_SIZE):
            parent1 = parent_selection(population, tournament_size)
            parent2 = parent_selection(population, tournament_size)
            offspring = offspring_generation(
                parent1, parent2, mutation_probability
            )
            #population.extend([offspring]) if isinstance(offspring, Individual) else population.extend(offspring)            # tracking the number of offspring better than the best old one.
            population.extend([offspring])

        population.sort(key=lambda i: i.fitness, reverse=True)
        # always keep the first POPULATION_SIZE best individuals
        population = population[:POPULATION_SIZE]

        #  Self-adapting the values.
        # TODO favour mutation insted of recombination if the fitness it's not high (<0.65).
        if fitness_stall > 5:
            # If we are able to generate 5% of offsprings better than the father,
            # We should explore more.
            mutation_probability *= 1.3

        if best_fitness == population[0].fitness:
            fitness_stall += 1
        else:
            best_fitness = population[0].fitness
            fitness_stall = 0

        # fitness_list.append(best_fitness)
        gen += 1

        print(
            f"gen #{gen}, fitness: {best_fitness}, score: {best_fitness/gen * 1000:.4f}"
        )

    return population[0]

In [145]:
# instance = [1, 2, 5, 10]
instance = [1]
for k in instance:
    fitness = lab9_lib.make_problem(k)
    best_fitness = ea().fitness
    print(f"\nBest individual fitness: {best_fitness}, Fitness calls: {fitness.calls} -> Score2: {best_fitness/fitness.calls*10000000:.4f}")

gen #1, fitness: 0.552, score: 552.0000
gen #2, fitness: 0.565, score: 282.5000
gen #3, fitness: 0.565, score: 188.3333
gen #4, fitness: 0.584, score: 146.0000
gen #5, fitness: 0.584, score: 116.8000
gen #6, fitness: 0.597, score: 99.5000
gen #7, fitness: 0.605, score: 86.4286
gen #8, fitness: 0.619, score: 77.3750
gen #9, fitness: 0.625, score: 69.4444
gen #10, fitness: 0.632, score: 63.2000
gen #11, fitness: 0.639, score: 58.0909
gen #12, fitness: 0.651, score: 54.2500
gen #13, fitness: 0.659, score: 50.6923
gen #14, fitness: 0.659, score: 47.0714
gen #15, fitness: 0.679, score: 45.2667
gen #16, fitness: 0.683, score: 42.6875
gen #17, fitness: 0.692, score: 40.7059
gen #18, fitness: 0.707, score: 39.2778
gen #19, fitness: 0.707, score: 37.2105
gen #20, fitness: 0.712, score: 35.6000
gen #21, fitness: 0.721, score: 34.3333
gen #22, fitness: 0.731, score: 33.2273
gen #23, fitness: 0.737, score: 32.0435
gen #24, fitness: 0.742, score: 30.9167
gen #25, fitness: 0.75, score: 30.0000
gen #