Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [38]:
from random import random, seed
from itertools import product
import numpy as np

from icecream import ic

## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [39]:
UNIVERSE_SIZE = 100_000
NUM_SETS = 10_000
DENSITY = 0.3

rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(10_000 * DENSITY)]))

In [40]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [41]:
def valid(solution):
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution):
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

## Have Fun!

intuition:
greedy algorthm, always add to the solution the set with best
ratio new_elements/cost until all elements are covered  

In [42]:
def greedy_set_cover(SETS, COSTS, UNIVERSE_SIZE):
    covered = np.zeros(UNIVERSE_SIZE, dtype=bool) 
    solution = []

    while not np.all(covered): 
        best_set = None
        best_ratio = -1
        
        for i, current_set in enumerate(SETS):
            if i in solution:
                continue  
            
            uncovered_elements = np.logical_and(current_set, ~covered)  # Uncovered elements in this set
            num_uncovered = np.sum(uncovered_elements)  # Count uncovered elements
            
            if num_uncovered > 0:
                ratio = num_uncovered / COSTS[i]
                if ratio > best_ratio:
                    best_ratio = ratio
                    best_set = i
        
        solution.append(best_set)
        covered = np.logical_or(covered, SETS[best_set]) 
    return solution

In [43]:
solution = greedy_set_cover(SETS,COSTS,UNIVERSE_SIZE)
valid(solution), cost(solution)

(np.True_, np.float64(1761543.6361832996))

| UNIVERSE_SIZE | NUM_SETS | DENSITY  |  COST  |
|---------------|----------|----------|--------|
| 100 | 10 |0.2 | 293 |
| 1000 | 100 | 0.2 |  5795 | 
| 10_000 | 1000 | 0.2 | 101306 | 
| 100_000 | 10_000 | 0.1 | 1_528_829 | 
| 100_000 | 10_000 | 0.2 |  1_ 720_509  |  
| 100_000 | 10_000 | 0.3 | 1_761_543 | 