Copyright **`(c)`** 2023 Ivan Magistro Contenta `<s314356@polito.it>`  
[`https://github.com/ivanmag22/computational-intelligence`](https://github.com/ivanmag22/computational-intelligence)

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [213]:
import logging
from random import random, choices, randint, shuffle
from functools import reduce
from collections import namedtuple
from dataclasses import dataclass
from copy import copy
import functools

from pprint import pprint

import numpy as np

import lab9_lib

In [214]:
PROBLEM_SIZE = 1_000
POPULATION_SIZE = 110 # 50
OFFSPRING_SIZE = 70 # 60
N_INSTANCES = 2 # as stride to calculate the one-max
TOURNAMENT_SIZE = 14

NUM_GENERATIONS = 100_000

In [215]:
fitness = lab9_lib.make_problem(N_INSTANCES)
for n in range(POPULATION_SIZE):
    ind = choices([0, 1], k=PROBLEM_SIZE)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}" if PROBLEM_SIZE<=10 else f"ind {n}: {fitness(ind):.2%}")

print(fitness.calls)

ind 0: 24.42%
ind 1: 24.62%
ind 2: 22.45%
ind 3: 24.45%
ind 4: 22.17%
ind 5: 22.85%
ind 6: 24.56%
ind 7: 23.85%
ind 8: 24.10%
ind 9: 22.04%
ind 10: 23.84%
ind 11: 21.97%
ind 12: 22.36%
ind 13: 22.55%
ind 14: 24.83%
ind 15: 23.06%
ind 16: 23.33%
ind 17: 21.73%
ind 18: 21.58%
ind 19: 22.46%
ind 20: 21.28%
ind 21: 21.16%
ind 22: 23.51%
ind 23: 23.97%
ind 24: 22.43%
ind 25: 22.80%
ind 26: 21.67%
ind 27: 23.18%
ind 28: 22.03%
ind 29: 24.72%
ind 30: 23.50%
ind 31: 23.86%
ind 32: 23.93%
ind 33: 22.08%
ind 34: 23.33%
ind 35: 23.80%
ind 36: 22.66%
ind 37: 22.91%
ind 38: 23.28%
ind 39: 24.27%
ind 40: 23.83%
ind 41: 25.10%
ind 42: 23.36%
ind 43: 22.89%
ind 44: 23.68%
ind 45: 24.91%
ind 46: 24.71%
ind 47: 23.26%
ind 48: 22.55%
ind 49: 23.84%
ind 50: 22.19%
ind 51: 23.74%
ind 52: 23.25%
ind 53: 22.49%
ind 54: 22.92%
ind 55: 22.55%
ind 56: 21.68%
ind 57: 21.90%
ind 58: 23.86%
ind 59: 24.08%
ind 60: 24.06%
ind 61: 23.41%
ind 62: 23.58%
ind 63: 22.94%
ind 64: 24.01%
ind 65: 21.81%
ind 66: 23.27%
ind 6

In [216]:
Individual = namedtuple("Individual", ["genome", "fitness"])

def tournament(population, tournament_size=2):
    return max(choices(population, k=tournament_size), key=lambda i: i.fitness)

def one_cut(g1, g2):
    cut = randint(0, PROBLEM_SIZE)
    return g1[:cut] + g2[cut:]

def n_cut(g1, g2, n):
    cut = PROBLEM_SIZE//(n+1)

    o = ()
    g = g1
    for i in range(n+1):
        start = i*cut
        if i == n:
            end = PROBLEM_SIZE
        else:
            end = i*cut + cut
        o += g[start : end]
        if g == g1:
            g = g2
        else:
            g = g1

    return o

def mutation(g):
    point = randint(0, PROBLEM_SIZE - 1)
    return g[:point] + (1 - g[point],) + g[point + 1 :]

## Initial Population

In [217]:
population = list()

for genome in [tuple(choices([0, 1], k=PROBLEM_SIZE)) for _ in range(POPULATION_SIZE)]:
    population.append(Individual(genome, fitness(genome)))

logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

In [218]:
NUM_NICHES = 5  # niches, islands are the same thing...
NUM_MIGRANTS = 10

niches = list()
p = copy(population)
shuffle(p) # p.sort(key= lambda x: x.fitness)

for i in range(NUM_NICHES):
    niches.append(p[i*POPULATION_SIZE // NUM_NICHES : (i+1)*POPULATION_SIZE // NUM_NICHES])

for n in niches:
    print(len(n))
# niches -> list of NUM_NICHES lists containing Individual objects

logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

22
22
22
22
22


## Evolution
### Segregation

È possibile implementare una sorta di self-adaptation in termini di miglioramenti rispetto alla generazione precedente per passare dall'exploration all'exploitation e viceversa.

Bisogna pensare a cosa fare con la popolazione:
- **steady-state GA**: da x genitori passo a y figli avendo una popolazione di x+y individui e poi torno a x individui per la generazione successiva
In a steady-state GA, only a subset of the population is replaced in each generation, typically a small percentage of the total population.
- **generational GA**: come un cambio generazionale, sostituisco i genitori con i figli in modo da avere sempre x individui
In a generational GA, the entire population is replaced in each generation. *Problem*: we could lose the best solution.
- **elitism**: Retains the best individuals from the current generation directly into the next generation without modification.

In [219]:
def steady_state(population, offspring, size):
    """
    steady-state GA: from x parents we obtain y children and from x+y individuals we take only x ones
    """
    
    population += offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size]

    return population

def generational(population, offspring, size):
    """
    generational GA: the entire population is replaced in each generation
    """
    
    population = offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size]

    return population

def elitism(population, offspring, size):
    """
    elitism: it retains the best individuals from the current generation directly into the next generation without modification
    """
    
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:size//4]
    offspring = sorted(offspring, key=lambda i: i.fitness, reverse=True)[:size*3//4]

    return population + offspring

Qual è il giusto compromesso tra minor numero di chiamate a fitness e miglior individio (con miglior fitness)?
- minor numero di chiamate a fitness (la fitness viene chiamata quando si vuole applicare o quando si vuole ordinare la popolazione o il niche in base alla fitness)
    - bisogna avere pochi individui da valutare -> pochi individui e più generazioni
- migliore fitness:
    - bisogna avere individui diversi e mutarli/ricombinarli. Bisogna sapere quando applicare la exploration e quando la exploitation -> dipende dalla generazione.
    Si può fare il tutto mediante un confronto tra generazione precedente e generazione corrente in termini di miglioramento:
        - rapporto tra media delle fitness della generazione precedente e media delle fitness della generazione corrente
        - rapporto tra differenza tra fitness massima e fitness minima della generazione precedente e differenza tra fitness massima e fitness minima della generazione corrente

**Condizioni di terminazione**:
- Migliore soluzione trovata
- Numero totale di valutazione: la migliore per valutare un algoritmo
- Numero totale di step: utile in ambienti paralleli
- Wall-clock time: richiesto in pratica, contesti industriali
- Steady-state (a.k.a., give up if chance of improvement is low): leggermente utile, tipicamente usato ad altre condizioni

## Model that works on the whole population

In [220]:
"""selection = steady_state
best_fitness = 0
counter = 0

for g in range(NUM_GENERATIONS):
    #print("Generation n.", g)
    tmp_fitness = 0
    
    offspring = list()
    for i in range(OFFSPRING_SIZE):
        if random() < 0.3:
            p = tournament(population)
            o = mutation(p.genome)
        else:
            p1 = tournament(population)
            p2 = tournament(population)
            o = one_cut(p1.genome, p2.genome)
        f = fitness(o)

        if f > tmp_fitness: # offsprings' fitness
            tmp_fitness = f
        offspring.append(Individual(o, f))
    population = selection(population, offspring, POPULATION_SIZE)
    
    if tmp_fitness <= best_fitness:
        counter += 1
        
        if counter >= 100:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif tmp_fitness > best_fitness:
        counter = 0

        best_fitness = tmp_fitness
        if best_fitness == 1:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
    
    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "difference:",best_fitness-tmp_fitness)

population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)"""

'selection = steady_state\nbest_fitness = 0\ncounter = 0\n\nfor g in range(NUM_GENERATIONS):\n    #print("Generation n.", g)\n    tmp_fitness = 0\n    \n    offspring = list()\n    for i in range(OFFSPRING_SIZE):\n        if random() < 0.3:\n            p = tournament(population)\n            o = mutation(p.genome)\n        else:\n            p1 = tournament(population)\n            p2 = tournament(population)\n            o = one_cut(p1.genome, p2.genome)\n        f = fitness(o)\n\n        if f > tmp_fitness: # offsprings\' fitness\n            tmp_fitness = f\n        offspring.append(Individual(o, f))\n    population = selection(population, offspring, POPULATION_SIZE)\n    \n    if tmp_fitness <= best_fitness:\n        counter += 1\n        \n        if counter >= 100:\n            print("break at generation n.", g, "fitness: ", best_fitness)\n            break\n        \n    elif tmp_fitness > best_fitness:\n        counter = 0\n\n        best_fitness = tmp_fitness\n        if best

## Parallel methods

### Island Model

In [221]:
selection = elitism #steady_state
best_fitness = 0
counter = 0
tmp_fitness = [0 for _ in range(NUM_NICHES)]

for i,n in enumerate(niches):
    nich = copy(n)
    f = sorted(nich, key=lambda i: i.fitness, reverse=True)[0].fitness
    tmp_fitness[i] = f

for g in range(NUM_GENERATIONS):

    if g % 100 == 0 and g != 0:
        for n in niches:
            migrants = []
            for _ in range(NUM_MIGRANTS):
                migrants.append(n.pop())
            c = 0
            for m in niches:
                if n != m:
                    if counter != (NUM_NICHES - 1) - 1:
                        m += migrants[c*NUM_MIGRANTS//NUM_NICHES:(c+1)*NUM_MIGRANTS//NUM_NICHES]
                    else:
                        m += migrants[c*NUM_MIGRANTS//NUM_NICHES:]
        
        tmp_fitness = [0 for _ in range(NUM_NICHES)]

        for i,n in enumerate(niches):
            nich = copy(n)
            f = sorted(nich, key=lambda i: i.fitness, reverse=True)[0].fitness
            tmp_fitness[i] = f
    
    for j,n in enumerate(niches):
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            p1 = tournament(n, TOURNAMENT_SIZE)
            p2 = tournament(n, TOURNAMENT_SIZE)
            if random() > 0.65:
                o = one_cut(p1.genome, p2.genome)
            else:
                o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))
            o = mutation(o)
            f = fitness(o)

            if f > tmp_fitness[j]: # offsprings' fitness
                tmp_fitness[j] = f

            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)
    
    m = max(tmp_fitness)
    if m <= best_fitness:
        counter += 1
        
        if counter >= 300:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif m > best_fitness:
        counter = 0

        best_fitness = m

    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "fitness:", tmp_fitness)

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)

Generation 0 counter: 0 best_fitness: 0.52 fitness: [0.262, 0.2665, 0.52, 0.512, 0.48]
Generation 1 counter: 0 best_fitness: 0.536 fitness: [0.266, 0.2681, 0.536, 0.512, 0.484]
Generation 2 counter: 1 best_fitness: 0.536 fitness: [0.26639999999999997, 0.53, 0.536, 0.512, 0.504]
Generation 3 counter: 2 best_fitness: 0.536 fitness: [0.522, 0.53, 0.536, 0.512, 0.504]
Generation 4 counter: 0 best_fitness: 0.542 fitness: [0.522, 0.53, 0.542, 0.512, 0.534]
Generation 5 counter: 1 best_fitness: 0.542 fitness: [0.522, 0.53, 0.542, 0.512, 0.534]
Generation 6 counter: 0 best_fitness: 0.544 fitness: [0.522, 0.544, 0.542, 0.512, 0.534]
Generation 7 counter: 1 best_fitness: 0.544 fitness: [0.522, 0.544, 0.542, 0.512, 0.534]
Generation 8 counter: 2 best_fitness: 0.544 fitness: [0.522, 0.544, 0.542, 0.512, 0.534]
Generation 9 counter: 3 best_fitness: 0.544 fitness: [0.522, 0.544, 0.542, 0.512, 0.534]
Generation 10 counter: 4 best_fitness: 0.544 fitness: [0.522, 0.544, 0.542, 0.512, 0.534]
Generation 

KeyboardInterrupt: 

### Segregation

In [None]:
"""
num = NUM_NICHES
selection = elitism #steady_state
best_fitness = 0
counter = 0
tmp_fitness = [0 for _ in range(num)]

for i,n in enumerate(niches):
    nich = copy(n)
    f = sorted(nich, key=lambda i: i.fitness, reverse=True)[0].fitness
    tmp_fitness[i] = f

for g in range(NUM_GENERATIONS):

    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:
        fa = [] # list of fitness average
        for n in niches:    # for each niche in the list of niches
            fl = [x.fitness for x in n]
            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl)) # average fitness for each niche
        l = list(enumerate(fa))
        l.sort(key = lambda x : x[:][1])    # x is a tuple: first index -> element in the list, second index -> element in the tuple
        combined_niche = niches[l[0][0]] + niches[l[1][0]]   # it takes only the first two elements (the ones with the lowest fitness average); list of Individual objects
        combined_niche = sorted(combined_niche, key=lambda i: i.fitness, reverse=True)[:len(niches[l[0][0]]) + len(niches[l[1][0]])]
        remaining_niches = [niches[i[0]] for i in l[2:]] if num>2 else []    # list of x lists containing 
        niches = [remaining_niches[0], combined_niche] if remaining_niches else [combined_niche]
        num -= 1
        
        tmp_fitness = [0 for _ in range(num)]

        for i,n in enumerate(niches):
            nich = copy(n)
            f = sorted(nich, key=lambda i: i.fitness, reverse=True)[0].fitness
            tmp_fitness[i] = f
    
    for j,n in enumerate(niches):
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            p1 = tournament(n, TOURNAMENT_SIZE)
            p2 = tournament(n, TOURNAMENT_SIZE)
            if random() > 0.65:
                o = one_cut(p1.genome, p2.genome)
            else:
                o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))
            o = mutation(o)
            f = fitness(o)

            if f > tmp_fitness[j]: # offsprings' fitness
                tmp_fitness[j] = f

            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)
    
    m = max(tmp_fitness)
    if m <= best_fitness:
        counter += 1
        
        if counter >= 300:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        
    elif m > best_fitness:
        counter = 0

        best_fitness = m

    print("Generation",g ,"counter:", counter, "best_fitness:", best_fitness, "fitness:", tmp_fitness)

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)
"""

Generation 0 counter: 0 best_fitness: 0.524 fitness: [0.522, 0.502, 0.524, 0.2585, 0.524, 0.494]
Generation 1 counter: 0 best_fitness: 0.54 fitness: [0.524, 0.502, 0.524, 0.54, 0.524, 0.496]
Generation 2 counter: 1 best_fitness: 0.54 fitness: [0.524, 0.502, 0.524, 0.54, 0.524, 0.51]
Generation 3 counter: 0 best_fitness: 0.546 fitness: [0.524, 0.516, 0.526, 0.546, 0.524, 0.51]
Generation 4 counter: 1 best_fitness: 0.546 fitness: [0.534, 0.516, 0.542, 0.546, 0.524, 0.51]
Generation 5 counter: 2 best_fitness: 0.546 fitness: [0.534, 0.516, 0.542, 0.546, 0.524, 0.51]
Generation 6 counter: 3 best_fitness: 0.546 fitness: [0.534, 0.532, 0.542, 0.546, 0.524, 0.52]
Generation 7 counter: 4 best_fitness: 0.546 fitness: [0.534, 0.532, 0.542, 0.546, 0.524, 0.52]
Generation 8 counter: 5 best_fitness: 0.546 fitness: [0.534, 0.532, 0.542, 0.546, 0.524, 0.52]
Generation 9 counter: 0 best_fitness: 0.548 fitness: [0.534, 0.534, 0.542, 0.548, 0.524, 0.52]
Generation 10 counter: 1 best_fitness: 0.548 fitnes

**Multi processing (NOT WORKING)** 

In [None]:
"""def niche(id: int, n: list[Individual], num_gen: int):
    for g in range(num_gen):
        tmp_fitness = 0
        
        n_size = len(n)
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            if random() < 0.3:
                p = tournament(n)
                o = mutation(p.genome)
            else:
                p1 = tournament(n)
                p2 = tournament(n)
                if random() < 0.65:
                    o = one_cut(p1.genome, p2.genome)
                else:
                    o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))
            f = fitness(o)

            if f > tmp_fitness:
                tmp_fitness = f

            offspring.append(Individual(o, f))
        n = selection(n, offspring, n_size)

        print("ID: ",id,"Number generation:",g,"best_fitness:",tmp_fitness)
    return (n, tmp_fitness)"""

'def niche(id: int, n: list[Individual], num_gen: int):\n    for g in range(num_gen):\n        tmp_fitness = 0\n        \n        n_size = len(n)\n        offspring = list()\n        for i in range(OFFSPRING_SIZE):\n            if random() < 0.3:\n                p = tournament(n)\n                o = mutation(p.genome)\n            else:\n                p1 = tournament(n)\n                p2 = tournament(n)\n                if random() < 0.65:\n                    o = one_cut(p1.genome, p2.genome)\n                else:\n                    o = n_cut(p1.genome, p2.genome, randint(2, PROBLEM_SIZE*3//4))\n            f = fitness(o)\n\n            if f > tmp_fitness:\n                tmp_fitness = f\n\n            offspring.append(Individual(o, f))\n        n = selection(n, offspring, n_size)\n\n        print("ID: ",id,"Number generation:",g,"best_fitness:",tmp_fitness)\n    return (n, tmp_fitness)'

In [None]:
"""import multiprocessing

num = NUM_NICHES
selection = steady_state
best_fitness = 0

# multiprocessing pool object 
pool = multiprocessing.Pool() 
  
# pool object with number of element 
pool = multiprocessing.Pool(processes=4)

for g in range(NUM_NICHES):
    inputs = [(i, niches[i], 300) for i in range(0, num)]
    outputs = pool.map(niche, inputs)
    for i, o in enumerate(outputs):
        niches[i] = o[0]
        if o[1] == best_fitness:
            print("break at generation n.", g, "fitness: ", best_fitness)
            break
        elif o[1] > best_fitness:
            best_fitness = o[1]

    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:
        fa = [] # list of fitness average
        for n in niches:    # for each niche in the list of niches
            fl = [x.fitness for x in n]
            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))
        l = list(enumerate(fa))
        l.sort(key = lambda x : x[:][1])    # x is a tuple: first index -> element in the list, second index -> element in the tuple
        combined_niche = niches[l[0][0]] + niches[l[1][0]]   # it takes only the first two elements (the ones with the lowest fitness average); list of Individual objects
        remaining_niches = [niches[i[0]] for i in l[2:]] if len(l)>=2 else []    # list of x lists containing 
        niches = [remaining_niches[0], combined_niche] if remaining_niches else [combined_niche]
        num -= 1

population = [item for sublist in niches for item in sublist]
population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
print(population[0])
print(fitness.calls)"""

'import multiprocessing\n\nnum = NUM_NICHES\nselection = steady_state\nbest_fitness = 0\n\n# multiprocessing pool object \npool = multiprocessing.Pool() \n  \n# pool object with number of element \npool = multiprocessing.Pool(processes=4)\n\nfor g in range(NUM_NICHES):\n    inputs = [(i, niches[i], 300) for i in range(0, num)]\n    outputs = pool.map(niche, inputs)\n    for i, o in enumerate(outputs):\n        niches[i] = o[0]\n        if o[1] == best_fitness:\n            print("break at generation n.", g, "fitness: ", best_fitness)\n            break\n        elif o[1] > best_fitness:\n            best_fitness = o[1]\n\n    if g % 300 == 0 and g != 0 and NUM_GENERATIONS - g >= 300 and num > 1:\n        fa = [] # list of fitness average\n        for n in niches:    # for each niche in the list of niches\n            fl = [x.fitness for x in n]\n            fa.append(functools.reduce(lambda a, b: a+b, fl) / len(fl))\n        l = list(enumerate(fa))\n        l.sort(key = lambda x : x[:]