Copyright **`(c)`** 2023 Ivan Magistro Contenta `<s314356@polito.it>`  
[`https://github.com/ivanmag22/computational-intelligence`](https://github.com/ivanmag22/computational-intelligence)

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
import logging
from random import random, choices, randint
from functools import reduce
from collections import namedtuple
from dataclasses import dataclass
from copy import copy
import functools

from pprint import pprint

import numpy as np

import lab9_lib

In [2]:
PROBLEM_SIZE = 1_000
POPULATION_SIZE = 110 # 50
OFFSPRING_SIZE = 70 # 60
N_INSTANCES = 1 # as stride to calculate the one-max

NUM_GENERATIONS = 100_000

In [3]:
fitness = lab9_lib.make_problem(N_INSTANCES)
for n in range(POPULATION_SIZE):
    ind = choices([0, 1], k=PROBLEM_SIZE)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}" if PROBLEM_SIZE<=10 else f"ind {n}: {fitness(ind):.2%}")

print(fitness.calls)

ind 0: 49.80%
ind 1: 50.20%
ind 2: 50.80%
ind 3: 50.70%
ind 4: 50.10%
ind 5: 50.40%
ind 6: 48.40%
ind 7: 53.40%
ind 8: 49.60%
ind 9: 51.70%
ind 10: 50.80%
ind 11: 49.60%
ind 12: 47.90%
ind 13: 49.70%
ind 14: 51.20%
ind 15: 49.80%
ind 16: 50.70%
ind 17: 51.60%
ind 18: 50.10%
ind 19: 52.20%
ind 20: 49.70%
ind 21: 48.20%
ind 22: 51.30%
ind 23: 49.40%
ind 24: 51.40%
ind 25: 48.90%
ind 26: 51.00%
ind 27: 49.50%
ind 28: 51.30%
ind 29: 49.20%
ind 30: 50.90%
ind 31: 50.20%
ind 32: 48.20%
ind 33: 54.70%
ind 34: 49.90%
ind 35: 48.30%
ind 36: 50.10%
ind 37: 50.10%
ind 38: 47.50%
ind 39: 50.30%
ind 40: 51.80%
ind 41: 48.70%
ind 42: 49.30%
ind 43: 50.00%
ind 44: 51.60%
ind 45: 51.80%
ind 46: 48.00%
ind 47: 50.60%
ind 48: 54.50%
ind 49: 48.80%
ind 50: 50.30%
ind 51: 49.90%
ind 52: 49.50%
ind 53: 52.90%
ind 54: 54.10%
ind 55: 52.00%
ind 56: 48.60%
ind 57: 49.30%
ind 58: 51.60%
ind 59: 49.60%
ind 60: 47.80%
ind 61: 49.00%
ind 62: 49.60%
ind 63: 48.70%
ind 64: 50.90%
ind 65: 52.90%
ind 66: 50.30%
ind 6

In [4]:
Individual = namedtuple("Individual", ["genome", "fitness"])

def tournament(population, tournament_size=2):
    return max(choices(population, k=tournament_size), key=lambda i: i.fitness)

def one_cut(g1, g2):
    cut = randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def n_cut(g1, g2, n):
    cut = PROBLEM_SIZE//(n+1)

    o = ()
    g = g1
    for i in range(n+1):
        start = i*cut
        if i == n:
            end = PROBLEM_SIZE
        else:
            end = i*cut + cut
        o += g[start : end]
        if g == g1:
            g = g2
        else:
            g = g1

    return o

def mutation(g):
    point = randint(0, PROBLEM_SIZE - 1)
    return g[:point] + (1 - g[point],) + g[point + 1 :]

## Initial Population

In [5]:
population = list()

for genome in [tuple(choices([0, 1], k=PROBLEM_SIZE)) for _ in range(POPULATION_SIZE)]:
    population.append(Individual(genome, fitness(genome)))

logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

In [6]:
NUM_NICHES = 3

niches = list()
p = copy(population)
p.sort(key= lambda x: x.fitness)

for i in range(NUM_NICHES):
    niches.append(p[i*POPULATION_SIZE // NUM_NICHES : (i+1)*POPULATION_SIZE // NUM_NICHES])

for n in niches:
    print(len(n))
    for x in n:
        print("\t",x.fitness)
# niches -> list of NUM_NICHES lists containing Individual objects

logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

36
	 0.466
	 0.467
	 0.468
	 0.468
	 0.468
	 0.469
	 0.47
	 0.471
	 0.474
	 0.474
	 0.475
	 0.477
	 0.479
	 0.479
	 0.482
	 0.482
	 0.482
	 0.485
	 0.485
	 0.485
	 0.486
	 0.486
	 0.486
	 0.487
	 0.487
	 0.487
	 0.488
	 0.49
	 0.49
	 0.49
	 0.491
	 0.492
	 0.492
	 0.493
	 0.493
	 0.493
37
	 0.494
	 0.494
	 0.494
	 0.494
	 0.494
	 0.494
	 0.495
	 0.495
	 0.495
	 0.495
	 0.495
	 0.495
	 0.496
	 0.497
	 0.497
	 0.497
	 0.497
	 0.499
	 0.499
	 0.499
	 0.5
	 0.5
	 0.5
	 0.501
	 0.502
	 0.502
	 0.502
	 0.503
	 0.504
	 0.504
	 0.505
	 0.505
	 0.506
	 0.506
	 0.506
	 0.506
	 0.507
37
	 0.508
	 0.508
	 0.508
	 0.509
	 0.511
	 0.511
	 0.512
	 0.512
	 0.512
	 0.512
	 0.512
	 0.512
	 0.513
	 0.513
	 0.514
	 0.514
	 0.514
	 0.514
	 0.514
	 0.514
	 0.515
	 0.515
	 0.515
	 0.516
	 0.516
	 0.516
	 0.518
	 0.518
	 0.518
	 0.518
	 0.518
	 0.52
	 0.521
	 0.522
	 0.522
	 0.524
	 0.537


## Evolution
### Segregation

È possibile implementare una sorta di self-adaptation in termini di miglioramenti rispetto alla generazione precedente per passare dall'exploration all'exploitation e viceversa.

Bisogna pensare a cosa fare con la popolazione:
- **steady-state GA**: da x genitori passo a y figli avendo una popolazione di x+y individui e poi torno a x individui per la generazione successiva
In a steady-state GA, only a subset of the population is replaced in each generation, typically a small percentage of the total population.
- **generational GA**: come un cambio generazionale, sostituisco i genitori con i figli in modo da avere sempre x individui
In a generational GA, the entire population is replaced in each generation. *Problem*: we could lose the best solution.
- **elitism**: Retains the best individuals from the current generation directly into the next generation without modification.

In [7]:
def steady_state(population, offspring, size):
    """
    steady-state GA: from x parents we obtain y children and from x+y individuals we take only x ones
    """
    
    population += offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size]

    return population

def generational(population, offspring, size):
    """
    generational GA: the entire population is replaced in each generation
    """
    
    population = offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size]

    return population

def elitism(population, offspring, size):
    """
    elitism: it retains the best individuals from the current generation directly into the next generation without modification
    """
    
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size//4]
    offspring = sorted(offspring, key=lambda i: i.fitness, reverse=True)[:size*3//4]

    return population + offspring

Qual è il giusto compromesso tra minor numero di chiamate a fitness e miglior individio (con miglior fitness)?
- minor numero di chiamate a fitness (la fitness viene chiamata quando si vuole applicare o quando si vuole ordinare la popolazione o il niche in base alla fitness)
    - bisogna avere pochi individui da valutare -> pochi individui e più generazioni
- migliore fitness:
    - bisogna avere individui diversi e mutarli/ricombinarli. Bisogna sapere quando applicare la exploration e quando la exploitation -> dipende dalla generazione.
    Si può fare il tutto mediante un confronto tra generazione precedente e generazione corrente in termini di miglioramento:
        - rapporto tra media delle fitness della generazione precedente e media delle fitness della generazione corrente
        - rapporto tra differenza tra fitness massima e fitness minima della generazione precedente e differenza tra fitness massima e fitness minima della generazione corrente

**Condizioni di terminazione**:
- Migliore soluzione trovata
- Numero totale di valutazione: la migliore per valutare un algoritmo
- Numero totale di step: utile in ambienti paralleli
- Wall-clock time: richiesto in pratica, contesti industriali
- Steady-state (a.k.a., give up if chance of improvement is low): leggermente utile, tipicamente usato ad altre condizioni

In [8]:
selection = steady_state
best_fitness = 0
counter = 0

for g in range(NUM_GENERATIONS):
    #print("Generation n.", g)
    tmp_fitness = 0
    
    offspring = list()
    for i in range(OFFSPRING_SIZE):
        if random() < 0.3:
            p = tournament(population)
            o = mutation(p.genome)
        else:
            p1 = tournament(population)
            p2 = tournament(population)
            o = one_cut(p1.genome, p2.genome)
        f = fitness(o)

        if f > tmp_fitness:
            tmp_fitness = f
        offspring.append(Individual(o, f))
    population = selection(population, offspring, POPULATION_SIZE)
    
    if tmp_fitness <= best_fitness:
        counter += 1
        
        if counter >= 100:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif tmp_fitness > best_fitness:
        counter = 0

        best_fitness = tmp_fitness
        if best_fitness == 1:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
    
    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "difference:",best_fitness-tmp_fitness)

population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)

Generation 0 counter: 0 best_fitness: 0.536 difference: 0.0
Generation 1 counter: 0 best_fitness: 0.539 difference: 0.0
Generation 2 counter: 0 best_fitness: 0.546 difference: 0.0
Generation 3 counter: 0 best_fitness: 0.553 difference: 0.0
Generation 4 counter: 0 best_fitness: 0.555 difference: 0.0
Generation 5 counter: 1 best_fitness: 0.555 difference: 0.0
Generation 6 counter: 0 best_fitness: 0.569 difference: 0.0
Generation 7 counter: 0 best_fitness: 0.578 difference: 0.0
Generation 8 counter: 1 best_fitness: 0.578 difference: 0.0050000000000000044
Generation 9 counter: 0 best_fitness: 0.579 difference: 0.0
Generation 10 counter: 0 best_fitness: 0.589 difference: 0.0
Generation 11 counter: 1 best_fitness: 0.589 difference: 0.0040000000000000036
Generation 12 counter: 2 best_fitness: 0.589 difference: 0.0020000000000000018
Generation 13 counter: 0 best_fitness: 0.592 difference: 0.0
Generation 14 counter: 1 best_fitness: 0.592 difference: 0.0
Generation 15 counter: 0 best_fitness: 0.

Generation 28 counter: 1 best_fitness: 0.615 difference: 0.0
Generation 29 counter: 0 best_fitness: 0.616 difference: 0.0
Generation 30 counter: 0 best_fitness: 0.617 difference: 0.0
Generation 31 counter: 0 best_fitness: 0.619 difference: 0.0
Generation 32 counter: 1 best_fitness: 0.619 difference: 0.0
Generation 33 counter: 2 best_fitness: 0.619 difference: 0.0
Generation 34 counter: 0 best_fitness: 0.622 difference: 0.0
Generation 35 counter: 1 best_fitness: 0.622 difference: 0.0
Generation 36 counter: 2 best_fitness: 0.622 difference: 0.0010000000000000009
Generation 37 counter: 3 best_fitness: 0.622 difference: 0.0
Generation 38 counter: 0 best_fitness: 0.623 difference: 0.0
Generation 39 counter: 0 best_fitness: 0.624 difference: 0.0
Generation 40 counter: 1 best_fitness: 0.624 difference: 0.0
Generation 41 counter: 0 best_fitness: 0.625 difference: 0.0
Generation 42 counter: 1 best_fitness: 0.625 difference: 0.0
Generation 43 counter: 0 best_fitness: 0.626 difference: 0.0
Genera

In [9]:
num = NUM_NICHES
selection = steady_state
best_fitness = 0
counter = 0

for g in range(NUM_GENERATIONS):
    #print("Generation n.", g)
    tmp_fitness = 0

    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:
        fa = [] # list of fitness average
        for n in niches:    # for each niche in the list of niches
            fl = [x.fitness for x in n]
            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))
        l = list(enumerate(fa))
        l.sort(key = lambda x : x[:][1])    # x is a tuple: first index -> element in the list, second index -> element in the tuple
        combined_niche = niches[l[0][0]] + niches[l[1][0]]   # it takes only the first two elements (the ones with the lowest fitness average); list of Individual objects
        combined_niche = sorted(combined_niche, key=lambda i: i.fitness, reverse=True)[:len(niches[l[0][0]]) + len(niches[l[1][0]])]
        remaining_niches = [niches[i[0]] for i in l[2:]] if len(l)>=2 else []    # list of x lists containing 
        niches = [remaining_niches[0], combined_niche] if remaining_niches else [combined_niche]
        num -= 1
    
    for n in niches:
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random() < 0.3:
                p = tournament(n)
                o = mutation(p.genome)
            else:
                p1 = tournament(n)
                p2 = tournament(n)
                o = one_cut(p1.genome, p2.genome)
            f = fitness(o)

            if f > tmp_fitness:
                tmp_fitness = f
            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)
    
    if tmp_fitness <= best_fitness:
        counter += 1
        
        if counter >= 100:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif tmp_fitness > best_fitness:
        counter = 0

        best_fitness = tmp_fitness
        if best_fitness == 1:
            break
    #print("best_fitness:", best_fitness)
    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "difference:",best_fitness-tmp_fitness)

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)

Generation 0 counter: 0 best_fitness: 0.537 difference: 0.0
Generation 1 counter: 1 best_fitness: 0.537 difference: 0.0010000000000000009
Generation 2 counter: 0 best_fitness: 0.545 difference: 0.0
Generation 3 counter: 1 best_fitness: 0.545 difference: 0.0030000000000000027
Generation 4 counter: 2 best_fitness: 0.545 difference: 0.0030000000000000027
Generation 5 counter: 0 best_fitness: 0.552 difference: 0.0
Generation 6 counter: 0 best_fitness: 0.559 difference: 0.0
Generation 7 counter: 1 best_fitness: 0.559 difference: 0.0040000000000000036
Generation 8 counter: 2 best_fitness: 0.559 difference: 0.007000000000000006
Generation 9 counter: 3 best_fitness: 0.559 difference: 0.0020000000000000018
Generation 10 counter: 4 best_fitness: 0.559 difference: 0.006000000000000005
Generation 11 counter: 5 best_fitness: 0.559 difference: 0.0030000000000000027
Generation 12 counter: 0 best_fitness: 0.576 difference: 0.0
Generation 13 counter: 1 best_fitness: 0.576 difference: 0.0199999999999999

In [10]:
num = NUM_NICHES
selection = steady_state
best_fitness = 0
counter = 0

for g in range(NUM_GENERATIONS):
    tmp_fitness = 0

    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:
        fa = [] # list of fitness average
        for n in niches:    # for each niche in the list of niches
            fl = [x.fitness for x in n]
            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))
        l = list(enumerate(fa))
        l.sort(key = lambda x : x[:][1])    # x is a tuple: first index -> element in the list, second index -> element in the tuple
        print(l)
        combined_niche = niches[l[0][0]] + niches[l[1][0]]   # it takes only the first two elements (the ones with the lowest fitness average); list of Individual objects
        combined_niche = sorted(combined_niche, key=lambda i: i.fitness, reverse=True)[:len(niches[l[0][0]]) + len(niches[l[1][0]])]
        remaining_niches = [niches[i[0]] for i in l[2:]] if len(l)>=2 else []    # list of x lists containing 
        niches = [remaining_niches[0], combined_niche] if remaining_niches else [combined_niche]
        num -= 1
    
    for n in niches:
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random() < 0.3:
                p = tournament(n)
                o = mutation(p.genome)
            else:
                p1 = tournament(n)
                p2 = tournament(n)
                if random() < 0.65:
                    o = one_cut(p1.genome, p2.genome)
                else:
                    o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))
            f = fitness(o)

            if f > tmp_fitness:
                tmp_fitness = f

            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)
    
    if tmp_fitness <= best_fitness:
        counter += 1
        
        if counter >= 300:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif tmp_fitness > best_fitness:
        counter = 0

        best_fitness = tmp_fitness
    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "difference:",best_fitness-tmp_fitness)

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)

Generation 0 counter: 0 best_fitness: 0.584 difference: 0.0
Generation 1 counter: 1 best_fitness: 0.584 difference: 0.006000000000000005
Generation 2 counter: 2 best_fitness: 0.584 difference: 0.006000000000000005
Generation 3 counter: 0 best_fitness: 0.588 difference: 0.0
Generation 4 counter: 1 best_fitness: 0.588 difference: 0.010000000000000009
Generation 5 counter: 2 best_fitness: 0.588 difference: 0.007000000000000006
Generation 6 counter: 3 best_fitness: 0.588 difference: 0.0050000000000000044
Generation 7 counter: 4 best_fitness: 0.588 difference: 0.010000000000000009
Generation 8 counter: 5 best_fitness: 0.588 difference: 0.01100000000000001
Generation 9 counter: 6 best_fitness: 0.588 difference: 0.014000000000000012
Generation 10 counter: 7 best_fitness: 0.588 difference: 0.0040000000000000036
Generation 11 counter: 8 best_fitness: 0.588 difference: 0.007000000000000006
Generation 12 counter: 9 best_fitness: 0.588 difference: 0.0050000000000000044
Generation 13 counter: 10 be

In [None]:
"""def niche(id: int, n: list[Individual], num_gen: int):
    for g in range(num_gen):
        tmp_fitness = 0
        
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random() < 0.3:
                p = tournament(n)
                o = mutation(p.genome)
            else:
                p1 = tournament(n)
                p2 = tournament(n)
                if random() < 0.65:
                    o = one_cut(p1.genome, p2.genome)
                else:
                    o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))
            f = fitness(o)

            if f > tmp_fitness:
                tmp_fitness = f

            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)

        print("ID: ",id,"Number generation:",g,"best_fitness:",tmp_fitness)
    return (n, tmp_fitness)"""

'def niche(id: int, n: list[Individual], num_gen: int):\n    for g in range(num_gen):\n        tmp_fitness = 0\n        \n        n_size = len(n)\n        offspring = list()\n        for i in range(OFFSPRING_SIZE):\n            if random() < 0.3:\n                p = tournament(n)\n                o = mutation(p.genome)\n            else:\n                p1 = tournament(n)\n                p2 = tournament(n)\n                if random() < 0.65:\n                    o = one_cut(p1.genome, p2.genome)\n                else:\n                    o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))\n            f = fitness(o)\n\n            if f > tmp_fitness:\n                tmp_fitness = f\n\n            offspring.append(Individual(o, f))\n        n = selection(n, offspring, n_size)\n\n        print("ID: ",id,"Number generation:",g,"best_fitness:",tmp_fitness)\n    return (n, tmp_fitness)'

In [None]:
"""import multiprocessing

num = NUM_NICHES
selection = steady_state
best_fitness = 0

# multiprocessing pool object 
pool = multiprocessing.Pool() 
  
# pool object with number of element 
pool = multiprocessing.Pool(processes=4)

for g in range(NUM_NICHES):
    inputs = [(i, niches[i], 300) for i in range(0, num)]
    outputs = pool.map(niche, inputs)
    for i, o in enumerate(outputs):
        niches[i] = o[0]
        if o[1] == best_fitness:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        elif o[1] > best_fitness:
            best_fitness = o[1]

    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:
        fa = [] # list of fitness average
        for n in niches:    # for each niche in the list of niches
            fl = [x.fitness for x in n]
            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))
        l = list(enumerate(fa))
        l.sort(key = lambda x : x[:][1])    # x is a tuple: first index -> element in the list, second index -> element in the tuple
        combined_niche = niches[l[0][0]] + niches[l[1][0]]   # it takes only the first two elements (the ones with the lowest fitness average); list of Individual objects
        remaining_niches = [niches[i[0]] for i in l[2:]] if len(l)>=2 else []    # list of x lists containing 
        niches = [remaining_niches[0], combined_niche] if remaining_niches else [combined_niche]
        num -= 1

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)"""

'import multiprocessing\n\nnum = NUM_NICHES\nselection = steady_state\nbest_fitness = 0\n\n# multiprocessing pool object \npool = multiprocessing.Pool() \n  \n# pool object with number of element \npool = multiprocessing.Pool(processes=4)\n\nfor g in range(NUM_NICHES):\n    inputs = [(i, niches[i], 300) for i in range(0, num)]\n    outputs = pool.map(niche, inputs)\n    for i, o in enumerate(outputs):\n        niches[i] = o[0]\n        if o[1] == best_fitness:\n            print("break at generation n.", g, "fitness: ", best_fitness)\n            break\n        elif o[1] > best_fitness:\n            best_fitness = o[1]\n\n    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:\n        fa = [] # list of fitness average\n        for n in niches:    # for each niche in the list of niches\n            fl = [x.fitness for x in n]\n            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))\n        l = list(enumerate(fa))\n        l.sort(key = lambda x : x[:]