Set covering problem description

In [2129]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue

import numpy as np

In [2130]:
PROBLEM_SIZE = 100
NUM_SETS = 2000
SETS = tuple(
    [
        np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)])
        for _ in range(NUM_SETS)
    ]
)
# prob 30% to be true, 70% to be false. This is the problem space.
State = namedtuple("State", ["taken", "not_taken"])
SETS

(array([ True, False, False,  True, False, False, False, False, False,
        False, False, False, False,  True, False, False, False,  True,
        False, False, False, False,  True,  True, False,  True, False,
        False,  True, False, False, False, False, False,  True, False,
        False,  True, False, False,  True, False,  True,  True,  True,
        False,  True, False,  True, False, False, False, False, False,
        False,  True, False, False, False, False, False, False, False,
         True, False,  True, False, False,  True,  True, False,  True,
        False, False, False, False,  True, False, False, False, False,
         True, False, False,  True, False,  True,  True, False,  True,
        False,  True, False,  True, False, False,  True, False, False,
        False]),
 array([False, False, False,  True, False, False, False, False, False,
        False,  True, False, False, False, False,  True, False, False,
        False,  True, False, False, False, False, False, Fal

In [2131]:
def goal_check(state):
    # this the test if we solve everything. We want each state to be covered. If there is an overlapping it's fine.
    # return np.all(reduce(np.logical_or, [SETS[i] for i in state.taken]))
    return np.all(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )


def distance_prof(state):
    num_not_covered_sets = PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )
    return num_not_covered_sets


def overlap(taken):
    return np.sum(np.sum([SETS[i] for i in taken], axis=0) > 1)


# how far I am from solving the problem
# this is the number of sets that I still need to cover.
def my_h(state, weigth=0.75):
    num_not_covered_sets = PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )
    num_of_overlap = overlap(state.taken)

    return num_not_covered_sets * weigth + num_of_overlap * (1 - weigth)

In [2132]:
assert goal_check((State(set(range(NUM_SETS)), set()))), "Problem not solvable"

In [2133]:
def search(distance):
    # We now start the search. This is the starting point.
    frontier = PriorityQueue()  # prio -> dijkstra.
    state = State(set(), set(range(NUM_SETS)))
    frontier.put((distance(state), state))

    counter = 0
    _, current_state = frontier.get()  # take the state
    # check if state is the solution
    while not goal_check(current_state):
        counter += 1
        for action in current_state.not_taken:  # all action we can take now
            new_state = State(
                current_state.taken ^ {action},
                current_state.not_taken ^ {action},
            )  # | is the set union
            frontier.put((distance(new_state), new_state))
        _, current_state = frontier.get()

    print(
        f"Solved in {counter:,} steps ({len(current_state.taken)} tiles), with state: {current_state.taken}"
    )
    return current_state.taken

In [2134]:
sol = search(my_h)

print(overlap(sol))

search(distance_prof)

print(overlap(sol))

Solved in 7 steps (5 tiles), with state: {1156, 1381, 105, 20, 727}
49
Solved in 5 steps (5 tiles), with state: {2, 1156, 1222, 1261, 564}
49
