In [3]:
import numpy as np
from random import random
from functools import reduce
from queue import PriorityQueue, LifoQueue
from collections import namedtuple

In [15]:
PROBLEM_SIZE = 1_000
NUM_SETS = 10_000
SETS = tuple(
    np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)])
    for _ in range(NUM_SETS)
)
State = namedtuple('State', ['taken', 'not_taken'])

In [5]:
def goal_check(state):
    return np.all(reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    ))

def h(state):
    # Gives an estimation on how far the current frontier is from the goal state
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

def g(state):
    # Gives the actual distance from the start state (in terms of number of node)
    return len(state.taken)

def f(state):
    return g(state) + h(state)

In [16]:
assert goal_check(State(set(range(NUM_SETS)), set())), "Problem is not solvable"

In [17]:
frontier = PriorityQueue()
state = State(set(), set(range(NUM_SETS)))
frontier.put((f(state), state))

counter = 0
_, current_state = frontier.get()
while not goal_check(current_state):
    counter += 1
    for action in current_state.not_taken:
        new_state = State(
            current_state.taken ^ {action},
            current_state.not_taken ^ {action},
        )
        frontier.put((f(new_state), new_state))
    _, current_state = frontier.get()

print(
    f"Solved in {counter:,} steps ({len(current_state.taken)} sets)"
)

print(
    f"The current sate is: {current_state}"
)
