First lab Computational intelligence: Set covering

In [1]:
import numpy as np
from random import random
from functools import reduce
from queue import PriorityQueue

In [4]:
#costants for the complexity of the problem
PROBLEM_SIZE = 5
NUM_SETS = 10 #SETS == TILES to choose from
SETS = tuple(np.array([random() < .2 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
#the set of all sets/tiles we can choose from

In [70]:
#to check if we finished
def goal_check(state):
    #whate happens in case of no index? Add initial value
    return np.all(reduce(np.logical_or, [SETS[i] for i in state[0]], np.array([False for _ in range(PROBLEM_SIZE)])))


assert goal_check((set(range(NUM_SETS)), set())), "Problem not solvable" #sanity check => if i select all sets i must have a solution
#otherwise the problem is not solvable


In [71]:
#the quality of a solution depends on the number of selected tiles
#so for each state we compute the cost as the number of selected tiles
def Cost(state):
    return len(state[0])


#distance as the number of false element in the set we want to cover
#[True, true false] == 1 and [false, false, false] == 3 and [true, true, true] == 0
def Distance(state):
    return PROBLEM_SIZE - sum(
        reduce(np.logical_or, [SETS[i] for i in state[0]], 
               np.array([False for _ in range(PROBLEM_SIZE)])))
    

In [76]:
#Path search implementation
frontier = PriorityQueue()
initial_state = (set(), set(range(NUM_SETS))) #initial state => no selected sets, all available index

#for starting we add in the frontier the initial state
frontier.put((0, initial_state))

(_, state) = frontier.get()

counter = 0
while not goal_check(state):
    counter += 1
    print(f"{counter}: selected state: {state} with cost {Cost(state)} and distance {Distance(state)}")

    for action in state[1]: #compute all the successors and add them to the queue
        new_state = (state[0] | {action}, state[1] - {action})
        #for the distance we consider both cost and the heuristic
        frontier.put((Cost(new_state) + Distance(new_state), new_state))
    
    (_, state) = frontier.get() #new state

print(f"Solve in {counter+1} step by taking sets {state[0]} with cost {Cost(state)}")

1: selected state: (set(), {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}) with cost 0 and distance 41280000
2: selected state: ({6}, {0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}) with cost 1 and distance 24051712
3: selected state: ({37, 6}, {0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}) with cost 2 and distance 14168064
4: selected state: ({11, 37, 6}, {0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}) with cost 3 and distance 8146944
5: s

In [75]:
#We could change the above definition of Distance such that
#given a state the cost consider both
#1) the number of uncovered elements
#2) the highest sets/tiles
#to do so we compute the distance as NUM_FALSE * NUM_TRUE_SET
# Where NUM_FALSE is the number of false that we need to cover(like before)
# NUM_TRUE_SET is the biggest number of true in a tile
# since we want to minimize the number of used tiles we will select the biggest one
def Distance(state):
    NUM_FALSE = (PROBLEM_SIZE - sum(
        reduce(np.logical_or, [SETS[i] for i in state[0]], 
               np.array([False for _ in range(PROBLEM_SIZE)])))) 
    NUM_TRUE_SET =  max([np.sum(SETS[i]) for i in state[1]])

    return NUM_FALSE * NUM_TRUE_SET

In [69]:
#by using a bigger size and increasing the probability so we have higher tiles
#we can compare the two
PROBLEM_SIZE = 10000
NUM_SETS = 50
SETS = tuple(np.array([random() < .4 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))

#to compare: run one of the two cell containing Distance (the previous declaration will be deleted)
#then by running this cell you can create new tiles.
#then you can run the A* algorithm and compare the results

Here some results by changin the problability of an element of the set to be true:
 - With probability 0.4
    - old H: solved in 18 steps with 14 tiles.
    - new H: solved in 15 steps with 14 tiles.
 - With probability 0.3
    - old H: solved in 27 steps with 19 tiles.
    - new H: solved in 25 steps with 19 tiles.
 - With probability 0.25
    - old H: solved in 20 steps with 24 tiles.
    - new H: solved in 20 steps with 24 tiles.
 - With probability 0.2
    - old H: solved in 30 steps with 29 tiles.
    - new H: solved in 31 steps with 30 tiles.

summing up: where there are higher probabilities the new heuristic is working as expected, speeding up the search.
With lower probabilities the results are similar or worse compared to the previous one.