## Set covering
##### Gabriele Iurlaro - Salvatore Adalberto Esposito

In [1]:
import random

from typing import Callable
from copy import deepcopy
from gx_utils import * 

### Problem definition

In [2]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

### State class

In [3]:
class State:
    def __init__(self, data: list):
        self._data = data.copy()

    def __hash__(self):
        return hash(bytes([element % 256 for sublist in self._data for element in sublist]))

    def __eq__(self, other):
        return bytes([element % 256 for sublist in self._data for element in sublist]) == bytes([element % 256 for sublist in other._data for element in sublist])

    def __lt__(self, other):
        return bytes([element % 256 for sublist in self._data for element in sublist]) < bytes([element % 256 for sublist in other._data for element in sublist])

    def __str__(self):
        return str(self._data)

    def __repr__(self):
        return repr(self._data)

    def add_action(self, action):
        self._data.append(action)
    
    def cost(self):
        return sum([len(_) for _ in self._data])
        
    @property
    def data(self):
        return self._data

    def copy_data(self):
        return self._data.copy()

In [4]:
def result(state, action):
    # Create a new state by taking the action
    new_list = deepcopy(state._data)
    new_list.append(action)
    return State(new_list)

In [5]:
def goal_test(solution, N):
    return len(set([element for sublist in solution._data for element in sublist])) == N


In [6]:
def h(s, N):
    return N - len(set([element for sublist in s._data for element in sublist]))

def possible_actions(state, all_states, max = None):
    res = deepcopy(all_states)
    for list in state._data:
        # print(f"Trying to remove {list} from {res}")
        res.remove(list)
    # print(f"Returned {len(res)} actions")
    if max is None:
        return res
    else: 
        max = min(max, len(res))
        # print(f"Max: {max}")
        return [random.choice(res) for _ in range(max)]

def solution(N):
    all_states = sorted([list(x) for x in list(set([tuple(x) for x in problem(N, seed = 42)]))], key = lambda l: -len(l))
    # print(f"First element: {all_states[0]}")
    # print(f"Starting space has {len(all_states)} lists")
    # print(all_states)

    if len(set([element for sublist in all_states for element in sublist])) != N:
        print(f"No solution with N = {N}")
        return
    frontier = PriorityQueue()
    #print(f"Before: {all_states}")

    #print(f"{solution}")
    state = State([all_states[0]])
    state_cost = dict()
    state_cost[state] = state.cost()

    while state is not None and not goal_test(state, N):
        for action in possible_actions(state, all_states):
            new_state = result(state, action)
            cost = len(action)
            if new_state not in state_cost and new_state not in frontier:
                state_cost[new_state] = state_cost[state] + cost
                frontier.push(new_state, p = state_cost[new_state] + h(new_state, N))
            elif new_state in frontier and state_cost[new_state] > state_cost[state] + cost:
                state[new_state] = state_cost[state] + cost
        if frontier:
            state = frontier.pop()
        else: 
            state = None
    if state is None:
        print("Not able to find the solution")
    else:
        print(f"#N = {N}, Greedy best-first solution: weight = {state.cost()}, bloat = {(state.cost() - N)/N * 100:.1f}%, discovered nodes {len(state_cost)} ")        

In [7]:
for n in [5, 10, 20, 100, 500, 1000]:
    solution(n)
    

#N = 5, Greedy best-first solution: weight = 6, bloat = 20.0%, discovered nodes 18 
#N = 10, Greedy best-first solution: weight = 13, bloat = 30.0%, discovered nodes 82 
#N = 20, Greedy best-first solution: weight = 29, bloat = 45.0%, discovered nodes 97 
#N = 100, Greedy best-first solution: weight = 192, bloat = 92.0%, discovered nodes 1699 
#N = 500, Greedy best-first solution: weight = 1295, bloat = 159.0%, discovered nodes 10834 
