Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [259]:
from random import choices, random, randint
import numpy as np
from copy import deepcopy

import lab9_lib

# GOAL: maximize fitness, minimize calls

In [260]:
fitness = lab9_lib.make_problem(5)
for n in range(20):
    ind = choices([0, 1], k=100)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

0111000010011111100110111100101100100011001110110011001101100100100000100000110011010101111111000101: 9.89%
1011111011001111011011001000110101010101100000010010100101001110110010101100101101101001000010111000: 22.90%
0010000010011100100010110111000010000010011100101001010100011001110111101100010001011110110100001111: 11.89%
1110111111001100101010011111111001011000001110011001111010000010110000000111001100010001011110011110: 11.79%
0010111011101011110001110010111001000010001001000111011110110110100100001011001001100000001110000010: 9.00%
1010110011010011111111011111100010010100001001010111001010001001101100011101000011011011011110000001: 10.78%
0111101110111101100110111100010011010000100110100001011000100010111111011001111111100010100110111010: 11.68%
0011001100101010111110001000001110010101011011111010101010010111110101000101000000001111111011010100: 12.78%
1101011111111101100000110000110100010001110011100010100010111110010000101010000101001100110000100111: 10.79%
100100011000001100111

Our code below

In [261]:
POPULATION_SIZE = 2000
OFFSPRING_SIZE = 250
LOCI = 1000
BIT_FLIP_PROBABILITY = 0.15
NUM_GENERATION = 100


class Individual:
    def __init__(self):
        self.genotype = choices([0, 1], k=LOCI)
        self.fitness = float("-inf")

In [262]:
# Mutation / recombination or both
def parent_selection(
    population: list[Individual], tournament_size: int
) -> Individual:
    # we also want to take the last best one.
    parents_idx = np.random.choice(
        range(len(population)), size=tournament_size, replace=False
    )
    parents = [population[idx] for idx in parents_idx]
    return max(parents, key=lambda i: i.fitness)


def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    cut_point = randint(0, LOCI - 1)
    new_ind = Individual()
    new_ind.genotype = ind1.genotype[:cut_point] + ind2.genotype[cut_point:]
    assert len(new_ind.genotype) == LOCI
    return new_ind


def two_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    first_cut_point = randint(0, LOCI - 2)
    second_cut_point = randint(first_cut_point, (LOCI - 1))
    new_ind = Individual()
    new_ind.genotype = (
        ind1.genotype[:first_cut_point]
        + ind2.genotype[first_cut_point:second_cut_point]
        + ind1.genotype[second_cut_point:]
    )
    assert len(new_ind.genotype) == LOCI
    return new_ind


# This xover function returns a child whose genome
# is created proportionally to the fitenss of parents
# def uniform_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
#     p1 = ind1.fitness / (ind1.fitness + ind2.fitness)
#     gene = [
#         np.random.choice([ind1.genotype[i], ind2.genotype[i]], p=[p1, 1 - p1])
#         for i in range(LOCI)
#     ]
#     new_ind = Individual()
#     new_ind.genotype = gene
#     return new_ind


def uniform_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    p1 = ind1.fitness / (ind1.fitness + ind2.fitness)
    mask = np.random.choice([True, False], size=LOCI, p=[p1, 1 - p1])
    gene = np.where(mask, ind1.genotype, ind2.genotype)
    new_ind = Individual()
    new_ind.genotype = gene.tolist()
    return new_ind


def mutate(parent: Individual) -> Individual:
    new_offspring = deepcopy(parent)
    for i in range(LOCI):
        if random() < BIT_FLIP_PROBABILITY:
            new_offspring.genotype[i] = int(not new_offspring.genotype[i])
    return new_offspring


def offspring_generation(
    parent1: Individual, parent2: Individual, mutation_probability: int
) -> Individual:
    if random() < mutation_probability:
        # mutation
        return mutate(parent1)
    else:
        # cross_over
        return two_cut_xover(parent1, parent2)


# TODO adattare la population size
# TODO adattare mutation rate e tournament size.
# TODO


def ea() -> Individual:
    fitness_list = [0]
    speed_list = []
    # starting pouplation of POPULATION_SIZE individuals
    population = [Individual() for _ in range(POPULATION_SIZE)]
    for p in population:
        p.fitness = fitness(p.genotype)
    best_fitness = population[0].fitness
    gen = 0
    mutation_probability = 0.20
    tournament_size = 50
    while np.mean(speed_list) * 100 >= 0.1 or gen < NUM_GENERATION:
        num_of_better_offspring = 0
        for _ in range(OFFSPRING_SIZE):
            parent1 = parent_selection(population, tournament_size)
            parent2 = parent_selection(population, tournament_size)
            offspring = offspring_generation(
                parent1, parent2, mutation_probability
            )
            offspring.fitness = fitness(offspring.genotype)
            population.extend([offspring])
            # tracking the number of offspring better than the best old one.
            if offspring.fitness > parent1.fitness:
                num_of_better_offspring += 1

        population.sort(key=lambda i: i.fitness, reverse=True)
        # always keep the first POPULATION_SIZE best individuals
        population = population[:POPULATION_SIZE]

        # Self-adapting the values.
        # TODO favour mutation insted of recombination if the fitness it's not high (<0.65).
        # if num_of_better_offspring >= 0.05 * OFFSPRING_SIZE:
        #     # If we are able to generate 5% of offsprings better than the father,
        #     # We should explore more.
        #     tournament_size += 1
        #     mutation_probability *= 1.1
        # else:
        #     # We are probably around a good point, we should favour exploitation.
        #     # tournament_size = tournament_size - 1 if tournament_size > 5 else 5
        #     # mutation_probability /= 1.1

        best_fitness = population[0].fitness

        if len(fitness_list) >= 1:
            speed = best_fitness - fitness_list[-1]
            speed_list.append(speed)
        fitness_list.append(best_fitness)
        gen += 1

        print(
            f"gen #{gen}, fitness: {best_fitness}, speed: {np.mean(speed_list)*100:.4f}, score: {best_fitness/gen * 1000:.4f}"
        )

    return population[0]

In [263]:
# instance = [1, 2, 5, 10]
instance = [2]
for k in instance:
    fitness = lab9_lib.make_problem(k)
    print(f"Best individual fitness: {ea().fitness}")
    print(f"Fitness calls: {fitness.calls}")

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


gen #1, fitness: 0.528, speed: 52.8000, score: 528.0
gen #2, fitness: 0.534, speed: 26.7000, score: 267.0
gen #3, fitness: 0.534, speed: 17.8000, score: 178.00000000000003
gen #4, fitness: 0.542, speed: 13.5500, score: 135.5
gen #5, fitness: 0.548, speed: 10.9600, score: 109.60000000000001
gen #6, fitness: 0.55, speed: 9.1667, score: 91.66666666666667
gen #7, fitness: 0.552, speed: 7.8857, score: 78.85714285714286
gen #8, fitness: 0.554, speed: 6.9250, score: 69.25
gen #9, fitness: 0.556, speed: 6.1778, score: 61.777777777777786
gen #10, fitness: 0.556, speed: 5.5600, score: 55.6
gen #11, fitness: 0.556, speed: 5.0545, score: 50.545454545454554
gen #12, fitness: 0.556, speed: 4.6333, score: 46.333333333333336
gen #13, fitness: 0.556, speed: 4.2769, score: 42.769230769230774
gen #14, fitness: 0.556, speed: 3.9714, score: 39.714285714285715
gen #15, fitness: 0.556, speed: 3.7067, score: 37.06666666666667
gen #16, fitness: 0.558, speed: 3.4875, score: 34.875
gen #17, fitness: 0.558, speed