In [2]:
import logging
from collections import namedtuple
import random
import numpy as np
import math

In [18]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [70]:
PROBLEM_SIZE = 500 # set covering problem size
POPULATION_SIZE = 2*PROBLEM_SIZE #number of possible solutions
OFFSPRING_SIZE = math.ceil(1.7*PROBLEM_SIZE) #number of new gene added to the population
NUM_GENERATIONS = 200 #max number of generations

In [71]:
space = problem(PROBLEM_SIZE, seed = 42)
GOAL = set(range(PROBLEM_SIZE))
Individual = namedtuple("Individual", ["genome", "fitness"])


In [72]:
def flatten(data_list: list):
    flat_list = [item for sublist in data_list for item in sublist]
    return flat_list

In [73]:
def fitness(genome: list):
    genome_cost = 0
    for i, gene in enumerate(genome):
        if gene:
            genome_cost +=len(space[i])

    return  -genome_cost


def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def cross_over(g1, g2):
    cut = random.randint(0, len(g1)-1)
    return g1[:cut] + g2[cut:]


def mutation(g):
    point = random.randint(0, len(g) - 1)
    return g[:point] + list((1 - g[point],)) + g[point + 1 :]

def decode_solution(genome: list):
    solution = list()
    for i,gene in enumerate(genome):
        if gene == True:
            solution.append(space[i])
    return solution

def goal_reach(genome: list):
    solution = decode_solution(genome)
    unique_element = np.unique(np.array(flatten(solution)))
    return len(unique_element) == PROBLEM_SIZE

def cover_all_set(genome: list):
    discovered_elements = list()
    for i,gene in enumerate(genome):
        if gene:
            discovered_elements.append(space[i])

    discovered_elements = flatten(discovered_elements)
    discovered_elements = np.unique(np.array(discovered_elements))
    if len(discovered_elements) == PROBLEM_SIZE:
        return True
    return False



In [74]:

population = list()
for i in range(POPULATION_SIZE):
    individuals = list()
    genome = [random.choice([0, 1]) for _ in range(len(space))]
    population.append(Individual(genome=genome, fitness=fitness(genome)))


In [75]:
fitness_log = [(0, i.fitness) for i in population]

for generation in range(NUM_GENERATIONS):
    offspring = list()
    for i in range(OFFSPRING_SIZE):
        if random.random() < 0.3:
            selected_parent = tournament(population)
            genome = mutation(selected_parent.genome)
        else:
            p1 = tournament(population)
            p2 = tournament(population)
            genome = cross_over(p1.genome, p2.genome)
        if cover_all_set(genome):
            fitness_new = fitness(genome)
            fitness_log.append((generation + 1, fitness_new))
            offspring.append(Individual(genome, fitness_new))
    population += offspring
    population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]



In [76]:
logging.getLogger().setLevel(logging.INFO)
if population[0] != []:
    solution = decode_solution(population[0].genome)
    print(
        f"-N: {PROBLEM_SIZE}: "
        f"Total weight: {sum(len(_) for _ in solution)}; "
        f"(bloat={(sum(len(_) for _ in solution)-PROBLEM_SIZE)/PROBLEM_SIZE*100:.0f}%)"
    )
else:
    print(
        f"Solution not found!"
    )

-N: 500: Total weight: 53722; (bloat=10644%)
