In [None]:
import sys
sys.path.append("home/christopher_orlowicz1_vodafone_c/gershgorin/")
%cd home/christopher_orlowicz1_vodafone_c/gershgorin/

In [None]:
import mpire
import multiprocessing
from functools import partial
import numpy as np
import time
import matplotlib.pyplot as plt

from src.gershgorin.greedy_sampling import greedy_set_cover_parallel_, greedy_set_cover_
from src.gershgorin.disc_alignment import estimate_coverage_subsets, expand_radius
from src.gershgorin.bs_gda import BS_GDA
from src.graph.graph_builder import GraphBuilder
from src.graph.graph import Graph

In [None]:
def task(shared_objects, set_idx, sets, uncovered):
    covered, selected = shared_objects
    max_coverage_set = set()
    max_coverage = 0
    max_idx = None
    for node, s in zip(set_idx, sets):
        if not (covered[node] or selected[node]):
            coverage_set = s & uncovered
            num_covered = len(coverage_set)
            if num_covered > max_coverage:
                max_coverage_set = coverage_set
                max_coverage = num_covered
                max_idx = node
            elif num_covered == 0:
                covered[node] = 1
    return max_coverage_set, max_idx


def set_cover(k, sets, uncovered, covered, selected, n_jobs):
    sampling_set = list()
    batches = np.array_split(sets, n_jobs)
    set_idx = list(range(len(sets)))
    batched_set_idx = np.array_split(set_idx, n_jobs)
    num_selected = 0
    
    with mpire.WorkerPool(n_jobs, shared_objects=(covered, selected), keep_alive=True) as pool:
        while len(uncovered) > 0 and num_selected < k:
            candidates = list(
                pool.map_unordered(
                    partial(task, uncovered=uncovered), zip(batched_set_idx, batches), 
                    iterable_len=len(batches), n_splits=n_jobs
                ))
            max_coverage_set, max_idx = max(candidates, key=lambda x: len(x[0]))
            uncovered -= max_coverage_set
            sampling_set.append(max_idx)
            selected[max_idx] = 1
            num_selected += 1
        
    vf = True
    if len(uncovered) > 0:
        vf = False
    
    return sampling_set, vf

In [None]:
def setup(n, k):
    uncovered = set(range(n))
    sets = [set([i]) for i in range(n)]
    #covered = np.zeros(n, dtype=bool)
    #selected = np.zeros(n, dtype=bool)
    covered = multiprocessing.Array('b', n, lock=False)
    selected = multiprocessing.Array('b', n, lock=False)
    for i in range(n):
        covered[i] = 0
        selected[i] = 0
    return sets, uncovered, covered, selected

n = 2000
k = int(0.1*n)
sets, uncovered, covered, selected = setup(n, k)
t = time.perf_counter()
sampling_set, vf = set_cover(k, sets, uncovered, covered, selected, n_jobs=10)
print(f"That took {time.perf_counter()-t} s")


In [None]:
ns = [500, 1000, 1500, 2000, 3000, 4000, 5000]
jobs = [2, 4, 8, 16, 32, 64, 96]

times = np.zeros((len(jobs), len(ns)))
for i, n_jobs in enumerate(jobs):
    print(n_jobs)
    for j, n in enumerate(ns):
        print(n, end=" ")
        k = int(0.1*n)
        sets, uncovered, covered, selected = setup(n, k)
        t = time.perf_counter()
        set_cover(k, sets, uncovered, covered, selected, n_jobs)
        times[i, j] = time.perf_counter()-t
    print()

In [None]:
plt.figure()
for i, n_jobs in enumerate(jobs):
    plt.plot(ns, times[i], label=n_jobs)

plt.yscale('log')
plt.legend();

## Graphs

In [None]:
runs = 1
thres = 0.5
mu = 0.01
p_hops = 12

In [None]:
sizes = [500, 1000, 3000, 5000, 7000, 10000]
jobs = [2, 4, 8, 16, 24, 32]
graphs = [GraphBuilder("sensor").sensor(n) for n in sizes]
graphs = [Graph(graph.W) for graph in graphs]

In [None]:
times = np.zeros((len(jobs), len(sizes)))
for i, n_jobs in enumerate(jobs):
    print(n_jobs)
    for j, n in enumerate(sizes):
        print(n, end=" ")
        k = int(0.1*n)
        nodes = list(range(n))
        sets = estimate_coverage_subsets(nodes, graphs[j], thres, mu, p_hops, parallel=False)
        t = time.perf_counter()
        greedy_set_cover_parallel_(sets, nodes, k, n_jobs)
        times[i, j] = time.perf_counter()-t
    print()


In [None]:
plt.figure()
for i, n_jobs in enumerate(jobs):
    plt.plot(sizes, times[i], alpha=.7, label=n_jobs)

plt.title("Runtime comparison for different number of processors")
plt.xlabel("Nodes")
plt.ylabel("Time (in s)")
plt.yscale('log')
plt.legend()
plt.savefig("set_cover_graph_size_vs_num_processors.pdf")

In [None]:
times_seq = np.zeros(len(sizes))
for j, n in enumerate(sizes):
    print(n, end=" ")
    k = int(0.1*n)
    nodes = list(range(n))
    sets = estimate_coverage_subsets(nodes, graphs[j], thres, mu, p_hops, parallel=False)
    t = time.perf_counter()
    greedy_set_cover_(sets, nodes, k)
    times_seq[j] = time.perf_counter()-t
print()

In [None]:
plt.figure()
plt.plot(sizes, times_seq, alpha=.7)
plt.title("Runtime of sequential execution")
plt.xlabel("Nodes")
plt.ylabel("Time (in s)")
plt.yscale('log')
plt.savefig("set_cover_sequential_graph_size_vs_num_processors.pdf")