# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

## Have Fun!

In [160]:
import logging
from itertools import combinations
import pandas as pd
import numpy as np
from geopy.distance import geodesic
import networkx as nx
import functools
from icecream import ic
from tqdm import tqdm

logging.basicConfig(level=logging.INFO)

In [161]:
CVS_FILE = "cities/russia.csv"

In [162]:
CITIES = pd.read_csv(CVS_FILE, header=None, names=['name', 'lat', 'lon'])
DIST_MATRIX = np.zeros((len(CITIES), len(CITIES)))
for c1, c2 in combinations(CITIES.itertuples(), 2):
    DIST_MATRIX[c1.Index, c2.Index] = DIST_MATRIX[c2.Index, c1.Index] = geodesic(
        (c1.lat, c1.lon), (c2.lat, c2.lon)
    ).km
CITIES.head()

Unnamed: 0,name,lat,lon
0,Abakan,53.72,91.43
1,Achinsk,56.28,90.5
2,Almetyevsk,54.9,52.31
3,Angarsk,52.57,103.91
4,Arkhangelsk,64.57,40.53


In [163]:
def counter(fn):
    """Simple decorator for counting number of calls"""

    @functools.wraps(fn)
    def helper(*args, **kargs):
        helper.calls += 1
        return fn(*args, **kargs)

    helper.calls = 0
    return helper

@counter
def tsp_cost(tsp):
    assert tsp[0] == tsp[-1]
    assert set(tsp) == set(range(len(CITIES)))

    tot_cost = 0
    for c1, c2 in zip(tsp, tsp[1:]):
        tot_cost += DIST_MATRIX[c1, c2]
    return tot_cost

## Firt Greedy

In [164]:
visited = np.full(len(CITIES), False)
dist = DIST_MATRIX.copy()
city = 0
visited[city] = True
tsp = list()
tsp.append(int(city))
while not np.all(visited):
    dist[:, city] = np.inf
    closest = np.argmin(dist[city])
    logging.debug(
        f"step: {CITIES.at[city,'name']} -> {CITIES.at[closest,'name']} ({DIST_MATRIX[city,closest]:.2f}km)"
    )
    visited[closest] = True
    city = closest
    tsp.append(int(city))
logging.debug(
    f"step: {CITIES.at[tsp[-1],'name']} -> {CITIES.at[tsp[0],'name']} ({DIST_MATRIX[tsp[-1],tsp[0]]:.2f}km)"
)
tsp.append(tsp[0])


logging.info(f"result: Found a path of {len(tsp)-1} steps, total length {tsp_cost(tsp):.2f}km")

INFO:root:result: Found a path of 167 steps, total length 42334.16km


## Second Greedy

In [165]:
def cyclic(edges):
    G = nx.Graph()
    G.add_edges_from(edges)
    try:
        nx.find_cycle(G)
        return True
    except:
        return False

In [166]:
segments = [
    ({c1, c2}, float(DIST_MATRIX[c1, c2])) for c1, c2 in combinations(range(len(CITIES)), 2)
]
visited = set()
edges = set()

In [167]:
shortest = next(_ for _ in sorted(segments, key=lambda e: e[1]))
visited |= shortest[0]
visited
edges |= {tuple(shortest[0])}
segments = [s for s in segments if not cyclic(edges | {tuple(s[0])})]

In [168]:
segments

[({0, 1}, 291.12871812096597),
 ({0, 2}, 2516.33442321499),
 ({0, 3}, 843.679166213213),
 ({0, 4}, 3050.3327570180104),
 ({0, 5}, 3697.257751542974),
 ({0, 6}, 3169.5429738640505),
 ({0, 7}, 3025.8255661682847),
 ({0, 8}, 3160.2480682832283),
 ({0, 9}, 2898.5097152436815),
 ({0, 10}, 3364.293337838562),
 ({0, 11}, 510.48040419758183),
 ({0, 12}, 3659.116686030305),
 ({0, 13}, 3674.232708952557),
 ({0, 14}, 2194.6122160833356),
 ({0, 15}, 439.31947457577616),
 ({0, 16}, 2481.4335921593824),
 ({0, 17}, 716.7680282426004),
 ({0, 18}, 3680.756755100788),
 ({0, 19}, 2796.2681940600255),
 ({0, 20}, 1938.0889218280688),
 ({0, 21}, 3264.6854322830545),
 ({0, 22}, 3678.9247499298162),
 ({0, 23}, 1488.4462769418906),
 ({0, 24}, 3412.8183943767885),
 ({0, 25}, 2707.300979312825),
 ({0, 26}, 3021.42923733918),
 ({0, 27}, 3335.5365148607657),
 ({0, 28}, 3414.741242958905),
 ({0, 29}, 3026.843158723314),
 ({0, 30}, 3501.175002654965),
 ({0, 31}, 872.0827902020271),
 ({0, 32}, 3145.496905689024),
 ({

In [169]:
cyclic(edges)

False

## Lab 2 -  Proposed Solution

In [170]:
POPULATION_SIZE = 100
GENERATIONS = 1000
MUTATION_RATE = 0.1
TOURNAMENT_SIZE = 5
OFFSPRING_SIZE = 5

In [171]:
from dataclasses import dataclass

@dataclass
class Individual:
    genome: np.ndarray
    fitness: float = None

In [172]:
def valid_tsp(tsp):
    """
    It verifies the validity of a TSP route.
    """
    tsp = np.array(tsp)
    
    if tsp[0] != tsp[-1]:
        return False
    
    cities_visited = tsp[:-1]
    expected_cities = np.arange(len(CITIES))
    
    return (len(np.unique(cities_visited)) == len(CITIES) and 
            np.all(np.isin(expected_cities, cities_visited)))

In [173]:
def inversion_mutation(tsp):
    tsp = np.array(tsp)
    value1, value2 = sorted(np.random.choice(range(1, len(tsp) - 1), 2, replace=False))
    tsp[value1:value2+1] = np.flip(tsp[value1:value2+1])
    return tsp

In [174]:
# seleziono un genitore in base a quello che ha il costo minore 
def tournament_selection(population):
    tournament = population[np.random.choice(len(population), TOURNAMENT_SIZE, replace=False)]
    costs = np.array([tsp_cost(individual) for individual in tournament]) #seleziono i costi per ogni genitore
    return tournament[np.argmin(costs)]

In [175]:
# crossover tra due percorsi
def order_crossover(parent1, parent2):
    start, end = np.sort(np.random.choice(range(1, len(parent1) - 1), 2, replace=False))
    child = np.full(len(parent1), -1)
    child[start:end] = parent1[start:end]
    pos = end
    for city in parent2:
        if city not in child:
            child[pos] = city
            pos = (pos + 1) % (len(parent1) - 1) # necessario per non fare out of index
    child[-1] = child[0]
    return child

In [176]:
def inver_over(parent1, parent2):
    n = len(parent1)

    start, end = sorted(np.random.choice(range(n), 2, replace=False))
    child = parent1.copy()
    child[start:end+1] = parent2[start:end+1][::-1]
    for i in range(n):
        while child[i] in child[start:end+1]:
            for value in parent1:
                if value not in child:
                    child[i] = value
                    break

    return child

In [177]:
def create_initial_population():
    population = np.zeros((POPULATION_SIZE, len(CITIES) + 1), dtype=int)
    for i in range(POPULATION_SIZE):
        tsp = np.arange(len(CITIES))
        np.random.shuffle(tsp[1:])
        tsp = np.append(tsp, tsp[0])
        population[i] = tsp
    return population

## Algortimo lento ma preciso - evolutionary algorithm 

In [178]:
def ea():
    population = create_initial_population()
    for _generation in range(GENERATIONS):
        costs = np.array([tsp_cost(individual) for individual in population])
        best_index = np.argmin(costs)
        best_tsp = population[best_index]
        # logging.info(f'Generation {generation} - Best cost: {tsp_cost(best_tsp):.2f}')

        new_population = np.zeros_like(population)
        new_population[0] = best_tsp

        offspring = []

        for _ in range(1, POPULATION_SIZE):
            if np.random.rand() < MUTATION_RATE:
                parent = tournament_selection(population)
                child = inversion_mutation(parent)
            else:
                parent1 = tournament_selection(population)
                parent2 = tournament_selection(population)
                child = order_crossover(parent1, parent2)
            
            offspring.append(child)  
            
        new_population[1:] = offspring[:POPULATION_SIZE - 1]
        population = new_population

    return best_tsp

best_path = ea()
ic(tsp_cost(best_path), tsp_cost.calls, valid_tsp(best_path), best_path)

ic| tsp_cost(best_path): np.float64(57789.41982681002)
    tsp_cost.calls: 1041212
    valid_tsp(best_path): np.True_
    best_path: array([ 11, 113,  90,  40,  54,  57,  17,  23, 144,   3,   0,  15,  58,
                       31,  16,  71, 146, 151, 162,  41,  48, 106,   6,  61, 157,  82,
                       14, 104,  44,  19, 133,   4, 123,  67, 107,  47,  21,  94, 147,
                      127, 148, 116, 110,  34,  18,  59, 129,  56, 156,  60,  87, 136,
                      134,  29, 120,   9, 138, 145, 103,  37, 155, 153, 149,  72,   8,
                        5, 111, 128,  64,  53,  88,  84,  12, 125,  89, 112, 152, 135,
                      130,  45,  28,  63,  38,  42,  73,  30,  24,  22,  76,  13, 160,
                       68,  52,  26,  98, 140,  27,  66,  43, 114,  46,  81, 165, 164,
                      115, 154, 158, 126,  50,  35, 102, 150, 141, 163, 122, 108, 121,
                       10,  95,  69,  51,  32,  80, 161,  83,   7, 119,  39,  77,   2,
            

(np.float64(57789.41982681002),
 1041212,
 np.True_,
 array([ 11, 113,  90,  40,  54,  57,  17,  23, 144,   3,   0,  15,  58,
         31,  16,  71, 146, 151, 162,  41,  48, 106,   6,  61, 157,  82,
         14, 104,  44,  19, 133,   4, 123,  67, 107,  47,  21,  94, 147,
        127, 148, 116, 110,  34,  18,  59, 129,  56, 156,  60,  87, 136,
        134,  29, 120,   9, 138, 145, 103,  37, 155, 153, 149,  72,   8,
          5, 111, 128,  64,  53,  88,  84,  12, 125,  89, 112, 152, 135,
        130,  45,  28,  63,  38,  42,  73,  30,  24,  22,  76,  13, 160,
         68,  52,  26,  98, 140,  27,  66,  43, 114,  46,  81, 165, 164,
        115, 154, 158, 126,  50,  35, 102, 150, 141, 163, 122, 108, 121,
         10,  95,  69,  51,  32,  80, 161,  83,   7, 119,  39,  77,   2,
         70,  33,  96, 117,  62, 131,  91,  85, 118,  25,  99, 100, 101,
        143, 137,  79, 105, 159, 142,  36,  20,  49,  74,  65, 166,  55,
         97,  78, 132,  75,  93,  92, 124, 139,   1, 109,  86,  11]))

## Algortimo veloce ma impreciso

In [179]:
# env variables
NUM_CITIES = len(DIST_MATRIX)
MAX_STEPS = 10000
INITIAL_TEMPERATURE = 1000
COOLING_RATE = 0.985

In [180]:
# init function
def create_initial_solution():
    random = np.random.permutation(NUM_CITIES)
    solution = np.append(random, random[0])
    return solution

In [181]:
def single_mutation(solution: np.ndarray) -> np.ndarray:
    new_sol = solution.copy()
    i, j = np.random.randint(1, NUM_CITIES), np.random.randint(1, NUM_CITIES)
    while i == j:
        j = np.random.randint(1, NUM_CITIES)
        new_sol[i], new_sol[j] = new_sol[j], new_sol[i]
    return new_sol

In [182]:
# SA Alg.
def simulated_annealing():
    current_solution = create_initial_solution()
    best_solution = current_solution.copy()

    fitness_solution = tsp_cost(best_solution)
    history = [fitness_solution]       
    temperature = INITIAL_TEMPERATURE
    logging.info(f'Initial cost: {fitness_solution:.2f}')

    for _step in tqdm(range(MAX_STEPS)):
        new_solution = single_mutation(current_solution)
        new_cost = tsp_cost(new_solution)
        history.append(new_cost) 
        cost_diff = new_cost - tsp_cost(current_solution)

        if cost_diff < 0 or np.random.random() < np.exp(-cost_diff / temperature):
            current_solution = new_solution.copy()
        if new_cost < fitness_solution:
            best_solution = new_solution.copy()
            fitness_solution = new_cost
        if temperature > 1e-17:
            temperature *= COOLING_RATE

    return best_solution, history

In [183]:
# run SA alg.
best_path, cost_history = simulated_annealing()
ic(tsp_cost(best_path), tsp_cost.calls, valid_tsp(best_path), best_path)

INFO:root:Initial cost: 354401.45
100%|██████████| 10000/10000 [00:00<00:00, 12609.56it/s]
ic| tsp_cost(best_path): np.float64(323858.6243802655)
    tsp_cost.calls: 1061214
    valid_tsp(best_path): np.True_
    best_path: array([131,  88,  61,  20,  75, 151,   9,  29, 142, 112, 139,  23, 129,
                      138,  50,  73, 128,  55, 100,  80, 149,  11,  19,  44, 118, 119,
                      117,  16, 137, 102,  48,  67, 104,  87,  83,  60,  64,  31,  56,
                      124,  63,   7,  27, 141,  22,  84,  18,  26,  81,  24,   1, 133,
                      166, 106,  68,   2,  77,  17, 153, 114, 123,  46, 154,  13, 125,
                       96, 108,  12,   8,   5,  32,  36,  39, 101,  66,  59, 156,   4,
                       95,  40,  98,  85,  53, 152, 130, 136,  74,  99, 127, 148,  82,
                      160, 109, 159,  49,  42,  79,  65,  14, 110, 165,  15,  35, 164,
                        0,  76,  51, 146, 113, 111,  54,  45,  97, 103, 134,  72,  89,
        

(np.float64(323858.6243802655),
 1061214,
 np.True_,
 array([131,  88,  61,  20,  75, 151,   9,  29, 142, 112, 139,  23, 129,
        138,  50,  73, 128,  55, 100,  80, 149,  11,  19,  44, 118, 119,
        117,  16, 137, 102,  48,  67, 104,  87,  83,  60,  64,  31,  56,
        124,  63,   7,  27, 141,  22,  84,  18,  26,  81,  24,   1, 133,
        166, 106,  68,   2,  77,  17, 153, 114, 123,  46, 154,  13, 125,
         96, 108,  12,   8,   5,  32,  36,  39, 101,  66,  59, 156,   4,
         95,  40,  98,  85,  53, 152, 130, 136,  74,  99, 127, 148,  82,
        160, 109, 159,  49,  42,  79,  65,  14, 110, 165,  15,  35, 164,
          0,  76,  51, 146, 113, 111,  54,  45,  97, 103, 134,  72,  89,
        147,  10,  93,   6, 105, 120, 135,  86,  57,  62, 161, 107,  21,
        157,  43,  91,  78,  41, 122, 132,   3,  71, 126,  25,  90,  30,
         94,  69, 140, 144, 162,  47,  34,  38, 163, 143,  92,  58, 116,
         70, 145,  37, 121,  28,  33, 155, 150,  52, 158, 115, 131]))