In [81]:
import logging
from collections import namedtuple
import random
import sys
import numpy

In [82]:
N = 100
PROBLEM_SIZE = N**2             
POPULATION_SIZE = N*2           
OFFSPRING_SIZE = int(N*1.5)         

NUM_GENERATIONS = 1000          

TOURNAMENT_SIZE = int(N/2)
MIN_NUMBER = sys.float_info.min
GENETIC_OPERATOR_RANDOMNESS = 0.3
logging.getLogger().setLevel(logging.INFO)

In [83]:
def problem(N, seed=42):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [84]:
GOAL={i for i in range(N)}
list_of_lists = problem(N)
tmp =  {tuple(x) for x in list_of_lists}    
list_of_lists = list(tmp)                   
PROBLEM_SIZE = len(list_of_lists)           

In [85]:
Individual = namedtuple("Individual", ["genome", "fitness"])

def gen2List(genome):
    list = []
    for i, g in enumerate(genome):
        if g:
            list += list_of_lists[i]
    return list

def tournament(population, tournament_size=TOURNAMENT_SIZE):          
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness) 

def uniform_cross_over(g1, g2):
    new_genoma = []
    for i in range(PROBLEM_SIZE):
        if i%2:
            new_genoma.append(g1[i])
        else:
            new_genoma.append(g2[i])
    return tuple(new_genoma)

def mutation(g):                                
    point = random.randint(0, PROBLEM_SIZE - 1)      
    return g[:point] + (1 - g[point],) + g[point + 1 :] 

def print_population(population):
    for j, ind in enumerate(population):
        list = []
        s_temp = set()
        for i, g in enumerate(ind[0]):
            if g:
                list.append(list_of_lists[i])
                for x in list_of_lists[i]:
                    s_temp.add(x)
        print(f"{j+1} : {list} -> {s_temp} - {ind[1]}")


def compute_fitness(genome):                               
    list = gen2List(genome)
    return (N - len(GOAL - set(list))) - numpy.sqrt(len(list))  
    
def check_goal(genoma):
    if set(gen2List(genoma)) == GOAL:
        return True
    else:
        return False

def compute_w(genome):
    list = []
    for i, g in enumerate(genome):
        if g:
            list += list_of_lists[i]
    return len(list)
    

In [86]:
population = list()

    
for genome in [tuple(0 for _ in range(PROBLEM_SIZE)) for _ in range(POPULATION_SIZE)]:
    genome = mutation(genome)  # this function is called here because it is used exactly to generate a list of zeros with only one element placed at 1               
    population.append(Individual(genome, compute_fitness(genome))) 

logging.info(f"init: pop_size={len(population)}; max={max(population, key=lambda i: i.fitness)[1]}")

INFO:root:init: pop_size=200; max=35.51925930159214


In [87]:
fitness_log = [(0, i.fitness) for i in population]  

best_fit = 0
for g in range(NUM_GENERATIONS):
    offspring = list()
    for i in range(OFFSPRING_SIZE):
        if random.random() < GENETIC_OPERATOR_RANDOMNESS:                         
            p = tournament(population)                  
            o = mutation(p.genome)                    
        else:                                          
            p1 = tournament(population)                 
            p2 = tournament(population)
            o = uniform_cross_over(p1.genome, p2.genome)            
        f = compute_fitness(o)                                      
        fitness_log.append((g + 1, f))                     
        offspring.append(Individual(o, f))                 
    population += offspring      
    population = sorted(population, key=lambda i: i[1], reverse=True)[:POPULATION_SIZE]
    
    if best_fit < population[0][1] and check_goal(population[0][0]):
        best_fit = population[0][1]
        w = len(gen2List(population[0][0]))
        gen = g

In [88]:
print(f"conv_fit {best_fit} found at {gen} with w : {w}")

conv_fit 86.0 found at 16 with w : 196
