Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [1]:
from itertools import product
from random import random, randint, shuffle, seed
import numpy as np
from scipy import sparse
from copy import copy

In [2]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets-1), p] = True
    return sets

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [3]:
x = make_set_covering_problem(1000, 1000, .3)
print("Element at row=42 and column=42:", x[42, 42])

Element at row=42 and column=42: False


In [4]:
#generate the problems so that they are stored in memory
points = [100, 1_000, 5_000]
densities = [.3, .7]

problems = []
for num_points in points:
    for density in densities:
        num_sets = num_points
        sets = make_set_covering_problem(num_points, num_sets, density)
        print('problem with', sets.nnz, 'non zero values:', num_points, density)
        problems.append(sets)

problem with 3066 non zero values: 100 0.3
problem with 7019 non zero values: 100 0.7
problem with 300776 non zero values: 1000 0.3
problem with 701016 non zero values: 1000 0.7
problem with 7500703 non zero values: 5000 0.3
problem with 17508823 non zero values: 5000 0.7


In [28]:
solution = [1,2,3,4]  #store indices of taken sets to form solution, initialized randomly
a = problems[0].getrow(0)

problems[0][solution, :].sum(axis = 0, dtype = bool).sum()


78

solve one problem at a time, try to solve them
we use slicing cos its fast (from scipy docs)

In [22]:
def fitness(sets, solution):
    return ((sets[solution, :].sum(axis = 0, dtype = bool).sum() - sets.shape[0]), sets.shape[0] - len(solution)) 

P = 0.5
def tweak1(sets, solution):                                                      
    mutation = copy(solution)
    if len(mutation) == 0 or random() <= P:
        mutation.append(randint(0, sets.shape[0]-1))
    else:
        mutation[randint(0, len(mutation) - 1)] = randint(0, sets.shape[0]-1)    
    return mutation

def tweak2(sets, solution):                                                      #improve: needs to be a set
    mutation = copy(solution)
    if len(mutation) == 0 or random() <= P:
        mutation.append(randint(0, sets.shape[0]-1))
    else:
        mutation.pop(randint(0, len(mutation) - 1))    
    return mutation

tweak = tweak2


In [32]:
MESA_THRESHOLD = 150

for sets in problems:
    solution = []
    fitness_prev = fitness(sets, solution)
    since_last_mutation = 0
    it = 0
    print(fitness_prev, '->', end=' ')

    while(since_last_mutation < MESA_THRESHOLD):
        it += 1
        mutation = tweak(sets, solution)
        fitness_new = fitness(sets, mutation)
        if fitness_new >= fitness_prev:
            fitness_prev = fitness_new
            solution = mutation
            since_last_mutation = 0
        else:
            since_last_mutation += 1    
    print(fitness_prev, '\n', solution)
    print('called fitness function', it, 'times', end = '\n\n')

(-100, 100) -> (0, 92) 
 [39, 34, 12, 94, 65, 87, 40, 77]
called fitness function 222 times

(-100, 100) -> (0, 96) 
 [42, 39, 5, 61]
called fitness function 174 times

(-1000, 1000) -> (0, 986) 
 [100, 265, 213, 660, 742, 845, 755, 643, 544, 520, 770, 378, 613, 137]
called fitness function 240 times

(-1000, 1000) -> (0, 995) 
 [808, 621, 196, 4, 88]
called fitness function 186 times

(-5000, 5000) -> (0, 4982) 
 [1510, 1234, 3491, 4841, 2903, 796, 4695, 3592, 2320, 497, 1152, 3760, 4569, 2572, 4207, 280, 666, 2969]
called fitness function 210 times

(-5000, 5000) -> (0, 4993) 
 [1336, 1120, 1368, 3219, 2183, 4002, 234]
called fitness function 166 times

