In [30]:
from itertools import product
from random import random, randint, shuffle, seed
import numpy as np
from functools import reduce
from copy import copy
from scipy import sparse

In [31]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets-1), p] = True
    return sets

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [32]:
NUM_POINTS = 5000
DENSITY = 0.7

In [33]:
sets = make_set_covering_problem(NUM_POINTS, NUM_POINTS, DENSITY).toarray()

Fitness function stolen from Professor's repository 👀

In [34]:
def fitness(solution):
    cost = len(solution)
    valid = np.sum(reduce(
        np.logical_or, 
        [sets[i] for i in solution],
        np.zeros(NUM_POINTS))
    )
    return valid, -cost

In [35]:
def goal_check(solution):
    return  np.sum(reduce(
        np.logical_or, 
        [sets[i] for i in solution],
        np.zeros(NUM_POINTS))
        ) == NUM_POINTS

The idea of this tweak function is to delete a set randomically and then check if it was really important to cover some points or was unuseful

In [36]:
def tweak(solution):
    new_solution = copy(solution)
    index = randint(0, len(new_solution) - 1)
    del new_solution[index]
    return new_solution

Starting from a solution with few number of randomic sets, given by correlation between the number of points, the density and an heuristic value of 0.05

In [37]:
solution = []
for _ in range(int(NUM_POINTS*(0.05/DENSITY))):
    solution.append(randint(0, NUM_POINTS - 1))

fitness_score = fitness(solution)
#fitness_score
assert(fitness_score[0] == NUM_POINTS)
print(f'Best solution: {solution} with fitness score: {fitness_score}')

Best solution: [4071, 4651, 1978, 2464, 3469, 3918, 2456, 4991, 1216, 1527, 1413, 1043, 3266, 2669, 1317, 2869, 1141, 387, 2607, 2701, 1323, 1951, 1751, 2693, 1653, 3048, 2078, 2532, 1348, 3342, 4244, 1234, 1442, 2132, 1815, 4716, 2570, 1436, 1798, 4270, 4162, 3104, 3633, 864, 1961, 1673, 2086, 738, 2855, 426, 3475, 554, 49, 216, 268, 2010, 891, 894, 4975, 2665, 93, 89, 3216, 4439, 569, 3420, 3966, 3394, 3421, 10, 3528, 2559, 4955, 2595, 3854, 891, 1022, 2039, 4091, 1196, 419, 1007, 2513, 3774, 4425, 729, 4962, 2269, 3157, 617, 2882, 236, 4071, 869, 2547, 3257, 4560, 4040, 3780, 3211, 1384, 3758, 1373, 1792, 4187, 3021, 3092, 1364, 647, 2948, 4090, 662, 1091, 4867, 4153, 2732, 2159, 2596, 1472, 3767, 4450, 4768, 3008, 4627, 591, 3493, 219, 3978, 1608, 785, 3431, 3658, 4740, 3661, 4859, 859, 2362, 3904, 3828, 4009, 3559, 1454, 3803, 1607, 321, 2895, 3158, 2756, 561, 1040, 2149, 2513, 2373, 3271, 3823, 3918, 3940, 1239, 186, 3452, 2242, 3934, 4347, 4107, 728, 1047, 50, 4847, 3000, 2899, 

As explained during lessons, if we search a better solution more than 5 times, it means that maybe we already have, so we should stop

In [38]:
same_fitness = 0
fitness_count = 0
while True:
    temp = copy(solution)
    for _ in range(len(solution)-1):
        new_solution = tweak(temp)
        if(goal_check(new_solution)):
            temp = new_solution
    score = fitness(temp)
    fitness_count += 1
    # if score > fitness_score: found better solution, redo the tweak 
    if score == fitness_score:
        same_fitness +=1 
    solution = temp
    fitness_score = score
    if same_fitness == 5:
        break
print(f'Best solution: {solution} with fitness_score: {fitness_score}')

Best solution: [1007, 3021, 3493, 2985, 1419, 688] with fitness_score: (5000, -6)


| NSETS & NPOINTS | DENSITY | FITNESS |
| -- | -- | -- |
| 100 | 0.3 | (100, -7) |
| 100 | 0.7 | (100, -3) |
| 1000 | 0.3 | (1000, -15) |
| 1000 | 0.7 | (1000, -6) |
| 5000 | 0.3 | (5000, -20) |
| 5000 | 0.7 | (5000, -6) | 