Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [1]:
from itertools import product
from random import random, randint, shuffle, seed
import numpy as np
from scipy import sparse
from copy import copy

In [2]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets-1), p] = True
    return sets

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [3]:
x = make_set_covering_problem(1000, 1000, .3)
print("Element at row=42 and column=42:", x[42, 42])

Element at row=42 and column=42: False


In [4]:
#generate the problems so that they are stored in memory
points = [100, 1_000, 5_000]
densities = [.3, .7]

problems = []
for num_points in points:
    for density in densities:
        num_sets = num_points
        sets = make_set_covering_problem(num_points, num_sets, density)
        print('problem with', sets.nnz, 'non zero values:', num_points, density)
        problems.append(sets)

problem with 3066 non zero values: 100 0.3
problem with 7019 non zero values: 100 0.7
problem with 300776 non zero values: 1000 0.3
problem with 701016 non zero values: 1000 0.7
problem with 7500703 non zero values: 5000 0.3
problem with 17508823 non zero values: 5000 0.7


In [5]:
solution = [1,2,3,4]  #store indices of taken sets to form solution, initialized randomly
a = problems[0].getrow(0)

problems[0][solution, :].sum(axis = 0, dtype = bool).sum()


78

# Solve one problem at a time, try to solve them
we use slicing cos its fast (from scipy docs)

In [8]:
def fitness(sets, solution):
    return ((sets[solution, :].sum(axis = 0, dtype = bool).sum() - sets.shape[0]), sets.shape[0] - len(solution)) 

P = 0.5
def tweak1(sets, solution):                                                      
    mutation = copy(solution)
    if len(mutation) == 0 or random() <= P:
        mutation.append(randint(0, sets.shape[0]-1))
    else:
        mutation[randint(0, len(mutation) - 1)] = randint(0, sets.shape[0]-1)    
    return mutation

def tweak2(sets, solution):                                                      #improve: needs to be a set
    mutation = copy(solution)
    if len(mutation) == 0 or random() <= P:
        mutation.append(randint(0, sets.shape[0]-1))
    else:
        mutation.pop(randint(0, len(mutation) - 1))    
    return mutation

tweak = tweak2


In [9]:
MESA_THRESHOLD = 150

for sets in problems:
    solution = []
    fitness_prev = fitness(sets, solution)
    since_last_mutation = 0
    it = 0
    print(fitness_prev, '->', end=' ')

    while(since_last_mutation < MESA_THRESHOLD):
        it += 1
        mutation = tweak(sets, solution)
        fitness_new = fitness(sets, mutation)
        if fitness_new >= fitness_prev:
            fitness_prev = fitness_new
            solution = mutation
            since_last_mutation = 0
        else:
            since_last_mutation += 1    
    print(fitness_prev, '\n', solution)
    print('called fitness function', it, 'times', end = '\n\n')

(-100, 100) -> (0, 92) 
 [12, 94, 20, 81, 36, 95, 2, 89]
called fitness function 213 times

(-100, 100) -> (0, 96) 
 [29, 76, 60, 22]
called fitness function 159 times

(-1000, 1000) -> (0, 985) 
 [852, 568, 506, 913, 334, 295, 618, 511, 42, 230, 760, 977, 777, 421, 20]
called fitness function 273 times

(-1000, 1000) -> (0, 995) 
 [110, 578, 758, 67, 627]
called fitness function 160 times

(-5000, 5000) -> (0, 4981) 
 [1926, 3267, 3534, 3416, 3552, 330, 2358, 697, 3420, 618, 2880, 2907, 4295, 1442, 1959, 393, 1683, 1843, 4991]
called fitness function 270 times

(-5000, 5000) -> (0, 4994) 
 [2551, 1791, 3067, 4506, 3086, 3750]
called fitness function 173 times



# Try a more 'greedy' approach

In [10]:
for sets in problems:
    solution = []
    fitness_prev = fitness(sets, solution)
    print(fitness_prev, '->', end=' ')

    while(fitness_prev[0] != 0):
        it += 1
        mutation = tweak(sets, solution)
        fitness_new = fitness(sets, mutation)
        if fitness_new >= fitness_prev:
            fitness_prev = fitness_new
            solution = mutation
               
    print(fitness_prev, '\n', solution)
    print('called fitness function', it, 'times', end = '\n\n')

(-100, 100) -> (0, 92) 
 [83, 24, 29, 89, 59, 87, 68, 72]
called fitness function 199 times

(-100, 100) -> (0, 94) 
 [73, 61, 48, 86, 63, 40]
called fitness function 209 times

(-1000, 1000) -> (0, 982) 
 [872, 787, 263, 542, 195, 87, 678, 881, 225, 33, 734, 393, 279, 587, 772, 634, 715, 351]
called fitness function 242 times

(-1000, 1000) -> (0, 993) 
 [226, 963, 99, 746, 910, 381, 562]
called fitness function 251 times

(-5000, 5000) -> (0, 4976) 
 [3482, 325, 4936, 3447, 2338, 509, 2473, 3910, 4022, 3803, 1420, 2225, 3036, 3299, 302, 4355, 3460, 608, 105, 795, 3397, 4169, 1156, 1154]
called fitness function 298 times

(-5000, 5000) -> (0, 4992) 
 [2140, 1439, 2904, 3644, 4241, 4151, 2285, 518]
called fitness function 317 times



# 1 + lambda ES
takes 3 different paths, one additive, one subtractive, one that swaps 

In [15]:
# 1 + lambda with different strategies
# already returns best solution and its fitness
def tweak3(sets, solution): 
    mutation1 = copy(solution)
    mutation1.append(randint(0, sets.shape[0]-1))
    f1 = fitness(sets, mutation1)
    if len(mutation1) == 1:
        return mutation1, f1
    
    mutation2 = copy(solution)    
    mutation2[randint(0, len(mutation2) - 1)] = randint(0, sets.shape[0]-1) 
    f2 = fitness(sets, mutation2)

    mutation3 = copy(solution)
    mutation3.pop(randint(0, len(mutation3) - 1)) 
    f3 = fitness(sets, mutation3)

    if(f1 >= f2 and f1 >= f3):
        return mutation1, f1
    if(f2 >= f1 and f2 >= f3):
        return mutation2, f2  
    return mutation3, f3

tweak = tweak3

for sets in problems:
    solution = []
    fitness_prev = fitness(sets, solution)
    it = 0
    print(fitness_prev, '->', end=' ')

    while(fitness_prev[0] != 0):
        it += 1
        mutation, fitness_new = tweak(sets, solution)
        if fitness_new >= fitness_prev:
            fitness_prev = fitness_new
            solution = mutation  
    print(fitness_prev, '\n', solution)
    print('called fitness function', it*3, 'times', end = '\n\n')

(-100, 100) -> (0, 90) 
 [95, 32, 81, 12, 84, 30, 2, 74, 83, 5]
called fitness function 36 times

(-100, 100) -> (0, 96) 
 [32, 2, 87, 83]
called fitness function 21 times

(-1000, 1000) -> (0, 984) 
 [327, 533, 379, 999, 647, 675, 494, 361, 346, 612, 886, 390, 113, 407, 4, 679]
called fitness function 57 times

(-1000, 1000) -> (0, 995) 
 [305, 744, 387, 124, 224]
called fitness function 18 times

(-5000, 5000) -> (0, 4977) 
 [805, 26, 2100, 405, 1171, 3579, 3259, 3716, 2744, 1265, 3634, 2306, 849, 1659, 2146, 1681, 844, 418, 2334, 2745, 75, 543, 783]
called fitness function 78 times

(-5000, 5000) -> (0, 4992) 
 [4513, 2047, 4982, 4028, 3717, 4569, 4212, 302]
called fitness function 24 times

