Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [161]:
from random import choices, random, randint
import numpy as np
from copy import deepcopy

import lab9_lib

# GOAL: maximize fitness, minimize calls

In [162]:
fitness = lab9_lib.make_problem(5)
for n in range(20):
    ind = choices([0, 1], k=100)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

0010101110000110100001111100101110010101001010110100001011011101111011100100100100101100101001001010: 10.89%
1000110011110110010100011011010000111110101010011001100001110110101010101101001110100100000100111001: 11.89%
0100111001010010011101111011101000100001011011110000101010010110001100110011100011001100101011000101: 24.91%
0110010100110101011111100010001101111100001000010010001100100000100100001010100001100010101000011010: 29.35%
1100000111001100001011001100101011010100110101101101101111111100000000100110111111100101000011101011: 22.89%
1000111110011101101010111000101000101010011100100101111011011110101000000111000110110000010010111001: 10.79%
0111001001110010110001001100010001001000001010100111010110111101100001111001101010101101001010000110: 10.89%
0111011010111110001101010011101110101110000101011011000110110111000011001110111001101100010011011111: 14.57%
1101010000010101101100010100101100101111100010010001011001101000100101101111111110110100100101110110: 12.69%
1011101111111101001

Our code below

In [163]:
POPULATION_SIZE = 100
OFFSPRING_SIZE = 100
LOCI = 1000
BIT_FLIP_PROBABILITY = 0.15
NUM_GENERATION = 100


class Individual:
    def __init__(self):
        self.genotype = choices([0, 1], k=LOCI)
        self.fitness = float("-inf")

In [164]:
# Mutation / recombination or both
def parent_selection(
    population: list[Individual], tournament_size: int
) -> list[Individual]:
    # we also want to take the last best one.
    parents_idx = np.random.choice(
        range(len(population)), size=tournament_size, replace=False
    )
    parents = [population[idx] for idx in parents_idx]
    return sorted(parents, key=lambda i: i.fitness, reverse=True)[:2]


def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    cut_point = randint(0, LOCI - 1)
    new_ind = Individual()
    new_ind.genotype = ind1.genotype[:cut_point] + ind2.genotype[cut_point:]
    assert len(new_ind.genotype) == LOCI
    return new_ind


def two_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    first_cut_point = randint(0, LOCI / 2)
    second_cut_point = randint(LOCI / 2, (LOCI - 1))
    new_ind = Individual()
    new_ind.genotype = (
        ind1.genotype[:first_cut_point]
        + ind2.genotype[first_cut_point:second_cut_point]
        + ind1.genotype[second_cut_point:]
    )
    assert len(new_ind.genotype) == LOCI
    return new_ind


# This xover function returns a child whose genome
# is created proportionally to the fitenss of parents
def uniform_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    # p1 = ind1.fitness / (ind1.fitness + ind2.fitness)
    p1 = 0.5
    gene = [
        np.random.choice([ind1.genotype[i], ind2.genotype[i]], p=[p1, 1 - p1])
        for i in range(LOCI)
    ]
    new_ind = Individual()
    new_ind.genotype = gene
    return new_ind


def mutate(parent: Individual) -> Individual:
    new_offspring = deepcopy(parent)
    for i in range(LOCI):
        if random() < BIT_FLIP_PROBABILITY:
            new_offspring.genotype[i] = int(not new_offspring.genotype[i])
    return new_offspring


def offspring_generation(
    parent1: Individual, parent2: Individual, mutation_probability: int
) -> Individual:
    if random() < mutation_probability:
        # mutation
        return mutate(parent1)
    else:
        # cross_over
        return uniform_cut_xover(parent1, parent2)


def ea() -> Individual:
    # starting pouplation of POPULATION_SIZE individuals
    population = [Individual() for _ in range(POPULATION_SIZE)]
    for p in population:
        p.fitness = fitness(p.genotype)
    old_best_fitness = 0
    best_fitness = population[0].fitness
    # for _ in range(NUM_GENERATION)
    gen = 0
    mutation_probability = 0.5
    tournament_size = 10
    while best_fitness > old_best_fitness or gen < NUM_GENERATION:
        old_best_fitness = best_fitness
        num_of_better_offspring = 0
        # best_old_one = best_one
        best_parents = parent_selection(population, tournament_size)
        for _ in range(OFFSPRING_SIZE):
            offspring = offspring_generation(
                best_parents[0], best_parents[1], mutation_probability
            )
            offspring.fitness = fitness(offspring.genotype)
            population.extend([offspring])
            # tracking the number of offspring better than the best old one.
            if offspring.fitness > best_parents[0].fitness:
                num_of_better_offspring += 1

        population.sort(key=lambda i: i.fitness, reverse=True)
        # always keep the first POPULATION_SIZE best individuals
        population = population[:POPULATION_SIZE]

        # Self-adapting the values.
        # TODO favour mutation insted of recombination if the fitness it's not high (<0.65).
        if num_of_better_offspring >= 0.2 * OFFSPRING_SIZE:
            # If we are able to generate 20% of offsprings better than the father,
            # We should explore more.
            tournament_size += 1
            mutation_probability *= 1.2
        else:
            # We are probably around a good point, we should favour exploitation.
            tournament_size = tournament_size - 1 if tournament_size > 5 else 5
            mutation_probability /= 1.2

        best_fitness = population[0].fitness
        gen += 1

        print(f"gen #{gen}, fitness: {best_fitness}")

    return population[0]

In [165]:
# instance = [1, 2, 5, 10]
instance = [2]
for k in instance:
    fitness = lab9_lib.make_problem(k)
    print(f"Best individual fitness: {ea().fitness}")
    print(f"Fitness calls: {fitness.calls}")

gen #1, fitness: 0.53
gen #2, fitness: 0.53
gen #3, fitness: 0.53
gen #4, fitness: 0.53
gen #5, fitness: 0.534
gen #6, fitness: 0.534
gen #7, fitness: 0.538
gen #8, fitness: 0.538
gen #9, fitness: 0.538
gen #10, fitness: 0.538
gen #11, fitness: 0.538
gen #12, fitness: 0.538
gen #13, fitness: 0.54
gen #14, fitness: 0.54
gen #15, fitness: 0.54
gen #16, fitness: 0.544
gen #17, fitness: 0.544
gen #18, fitness: 0.544
gen #19, fitness: 0.544
gen #20, fitness: 0.556
gen #21, fitness: 0.556
gen #22, fitness: 0.556
gen #23, fitness: 0.556
gen #24, fitness: 0.556
gen #25, fitness: 0.556
gen #26, fitness: 0.556
gen #27, fitness: 0.556
gen #28, fitness: 0.556
gen #29, fitness: 0.556
gen #30, fitness: 0.556
gen #31, fitness: 0.556
gen #32, fitness: 0.556
gen #33, fitness: 0.556
gen #34, fitness: 0.556
gen #35, fitness: 0.556
gen #36, fitness: 0.556
gen #37, fitness: 0.558
gen #38, fitness: 0.558
gen #39, fitness: 0.558
gen #40, fitness: 0.558
gen #41, fitness: 0.562
gen #42, fitness: 0.562
gen #43,