In [1]:
from queue import PriorityQueue, SimpleQueue, LifoQueue
from random import random
from functools import reduce
from collections import namedtuple
from pprint import pprint
import numpy as np

Sets the initial constant for the problem

In [3]:
PROBLEM_SIZE = 5
NUM_SETS = 10

Create randomly the NUM_SETS array of lenght PROBLEM_SIZE, with a probability of 20% to have a true value in the singles line.
And I choose how **state** this rappresentation **({sets_taken},{sets_not_taked})** with **sets_taken** and **sets_not_taked** the index of the sets

In [25]:
SETS = tuple(
    np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS)
)
State = namedtuple("State", ["taken", "not_taken"])

To evaluate the goal:
1. take all the sets selected in the current state
2. apply a or between all this sets with the reduce function
3. return true just if we have all true in the resulted array

Note: we can have overlapping, it is not a limitation in the problem, so the or is a good choice

In [5]:
def goal_check(state):
    return np.all(reduce(np.logical_or, [SETS[i] for i in state.taken], False))

Now I check if the problem is solvable, because if I cannot solve it with all the sets the problem is not solvable

In [26]:
assert goal_check(State(set(range(NUM_SETS)), set())), "Problem not solvable"

Define a function search that with a frontier apply the path search and return the goal state

In [20]:
def search(frontier):
    current_state = frontier.get()

    # to count the number of sets put in the frontier
    counter = 0
    while not goal_check(current_state):
        counter += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action}, current_state.not_taken ^ {action}
            )
            frontier.put(new_state)
        current_state = frontier.get()

    print(f"Solved in {counter} steps")
    return current_state

With a PriorityQueue I don't know the priority that the frontier apply, so we don't know the real type of algorithm. But with a **SimpleQueue** we are solving the problem with a **Breadth First**, in this way we are solving the problem with the optimal solution in the sense of **minimum number of lines to covering all the line**.

In [21]:
# define a SimpleQueue, and LifoQueue how frontiers
fifo = SimpleQueue()
lifo = LifoQueue()
# put the initial state (taken is empty and not taked is all the sets) in the frontiers
fifo.put(State(set(), set(range(NUM_SETS))))
lifo.put(State(set(), set(range(NUM_SETS))))

breadth_solution_state = search(fifo)
depth_solution_state = search(lifo)

Solved in 265 steps
Solved in 8 steps


In [22]:
print(
    f"Solution with a Breadth Search:\n{breadth_solution_state}\nSolution with a Depth Search:\n{depth_solution_state}"
)

Solution with a Breadth Search:
State(taken={2, 3, 6}, not_taken={0, 1, 4, 5, 7, 8, 9})
Solution with a Depth Search:
State(taken={2, 3, 4, 5, 6, 7, 8, 9}, not_taken={0, 1})


In this case we don't have any type of cost (we are assuming the number of sets), if we define a different cost, for example a solution with the smallest number of element. In the sense that the **cost is the total number of true in taken sets**. To solve the problem , in this case we have to add the cost how key and use a PriorityQueue to order the queue to solve a **Dijkstra**

In [21]:
def cost_function(taken):
    return sum([np.sum(SETS[i]) for i in taken])

In [24]:
def search_dijkstra(frontier):
    current_state = frontier.get()

    # to count the number of sets put in the frontier
    counter = 0
    while not goal_check(current_state):
        counter += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action}, current_state.not_taken ^ {action}
            )
            frontier.put((cost_function(new_state.taken), new_state))
        current_state = frontier.get()[1]

    print(f"Solved in {counter} steps")
    return current_state

In [25]:
# define a PriorityQueue how frontiers
frontier = PriorityQueue()

# put the initial state (taken is empty and not taked is all the sets) in the frontiers
frontier.put(State(set(), set(range(NUM_SETS))))

dijkstra_solution_state = search_dijkstra(frontier)

Solved in 498391 steps


In [26]:
dijkstra_solution_state

State(taken={0, 1, 2, 3, 4, 6, 9}, not_taken={8, 5, 7})

In [29]:
pprint([SETS[i] for i in dijkstra_solution_state.taken])

[array([False, False, False, False, False]),
 array([False, False, False, False, False]),
 array([False,  True, False,  True,  True]),
 array([ True, False, False, False, False]),
 array([False, False, False, False, False]),
 array([False, False,  True,  True, False]),
 array([False, False, False, False, False])]


Now I want to implemente a version of Informed search, so with a Euristich that estimate the distances from a state to the goal, and use it to modify the search.
I define h(n) heuristic of node n with the number of false that I have considering the OR of all the taken sets, i.e. number of true that are missing for the goal.

In this way the evalutation function f(n) = g(n) + h(n), with g(n) the cost from start to node n

In [2]:
def heuristic(taken):
    return PROBLEM_SIZE - sum(reduce(np.logical_or, [SETS[i] for i in taken]))

In [18]:
def informed_search(frontier):
    current_state = frontier.get()

    # to count the number of sets put in the frontier
    counter = 0
    while not goal_check(current_state):
        counter += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action}, current_state.not_taken ^ {action}
            )
            frontier.put(
                (cost_function(new_state.taken) + heuristic(new_state.taken), new_state)
            )
        current_state = frontier.get()[1]

    print(f"Solved in {counter} steps")
    return current_state

In [27]:
# define a PriorityQueue how frontiers
frontier = PriorityQueue()

# put the initial state (taken is empty and not taked is all the sets) in the frontiers
frontier.put(State(set(), set(range(NUM_SETS))))

informed_solution_search = informed_search(frontier)

Solved in 17 steps


In [28]:
informed_solution_search

State(taken={8, 0, 9, 6}, not_taken={1, 2, 3, 4, 5, 7})

In [29]:
pprint([SETS[i] for i in informed_solution_search.taken])

[array([ True, False, False, False, False]),
 array([False, False, False,  True, False]),
 array([False,  True,  True, False, False]),
 array([False, False, False, False,  True])]
