# Estimate performance benefit of paralellization

In [None]:
import sys
sys.path.append("home/christopher_orlowicz1_vodafone_c/gershgorin/")

In [None]:
%pip install gurobipy graphilp mpire

In [None]:
#%cd home/christopher_orlowicz1_vodafone_c/gershgorin/

In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import mpire
from functools import partial

from src.gershgorin.disc_alignment import estimate_coverage_subsets, expand_radius
from src.gershgorin.bs_gda import BS_GDA
from src.graph.graph_builder import GraphBuilder
from src.graph.graph import Graph

## Single executation of estimate_coverage_subsets

In [None]:
runs = 5
thres = 0.5
mu = 0.01
p_hops = 12

In [None]:
sizes = [200, 500, 800, 1000, 1200, 1500, 2000, 5000, 7000, 10000]
graphs = [GraphBuilder("sensor").sensor(n) for n in sizes]
graphs = [Graph(graph.W) for graph in graphs]

In [None]:
m = len(sizes)
not_parallel = np.zeros(m)
parallel = np.zeros(m)

for i in range(runs):
    print("Run", i+1)
    for j, (n, g) in enumerate(zip(sizes, graphs)):
        print(n, end=" ")
        nodes = list(range(n))
        start = time.perf_counter()
        estimate_coverage_subsets(nodes, g, thres, mu, p_hops, parallel=False)
        not_parallel[j] += time.perf_counter()-start
        start = time.perf_counter()
        estimate_coverage_subsets(nodes, g, thres, mu, p_hops, parallel=True)
        parallel[j] += time.perf_counter()-start
    print()
not_parallel /= m
parallel /= m

In [None]:
plt.figure()
plt.plot(sizes, not_parallel, alpha=.8, label="Sequential")
plt.plot(sizes, parallel, alpha=.8, label="Parallel")
plt.title("Sequential vs. parallel execution")
plt.xlabel("Nodes")
plt.ylabel("Time (in s)")
plt.legend()
plt.savefig("seq_vs_parallel_single.pdf")

## Batching

In [None]:
from queue import PriorityQueue

def estimate_coverage_subsets_no_batch(nodes, graph: Graph, thres: float, mu: float, p_hops: int, parallel=False):
    if parallel:
        # start as many processes as cores are available
        with mpire.WorkerPool(mpire.cpu_count(), shared_objects=graph) as pool:
            return list(
                pool.map(
                    partial(estimate_coverage_subset_, thres=thres, mu=mu, p_hops=p_hops),
                    nodes
                )
            )
    else:
        coverage_subsets = list()
        for i in nodes:
            coverage_subset = estimate_coverage_subset_(i, graph, thres, mu, p_hops)
            coverage_subsets.append(coverage_subset)
        return coverage_subsets

# shared object (graph) has to be the first argument of the method
def estimate_coverage_subset_(graph: Graph, i: int, thres: float, mu: float, p_hops: int) -> set:
    """Estimating Coverage Subset. (Algorithm 1)
    :param i: candidate sampling node
    :param graph: graph
    :param thres: threshold for lower bound
    :param mu: parameter for graph Laplacian based gsp reconstruction
    :param p_hops: number of hops
    :return: coverage subset
    """
    n = graph.num_nodes
    # initial disc radii
    s = np.ones(n)
    # candidate sampling vector
    a = np.zeros(n, dtype=bool)
    a[i] = 1
    # hop numbers
    h = np.zeros(n, dtype=np.int8)
    coverage_subset = set()
    # visited nodes
    visited = np.zeros(n, dtype=bool)
    queue = PriorityQueue()
    queue.put(i)
    visited[i] = 1
    while not queue.empty():
        k = queue.get()
        s[k] = expand_radius(graph, s, k, a, mu, thres)
        if s[k] >= 1 and h[k] <= p_hops:
            coverage_subset.add(k)
            for t in graph.neighbors[k]:
                if not visited[t]:
                    queue.put(t)
                    visited[t] = 1
                    h[t] = h[k] + 1
    return coverage_subset

In [None]:
runs = 5
thres = 0.5
mu = 0.01
p_hops = 12

In [None]:
sizes = [200, 500, 800, 1000, 1200, 1500, 2000, 3000, 5000, 10000]
graphs = [GraphBuilder("sensor").sensor(n) for n in sizes]
graphs = [Graph(graph.W) for graph in graphs]

In [None]:
m = len(sizes)
not_batched = np.zeros(m)
batched = np.zeros(m)

for i in range(runs):
    print("Run", i+1)
    for j, (n, g) in enumerate(zip(sizes, graphs)):
        print(n, end=" ")
        nodes = list(range(n))
        start = time.perf_counter()
        estimate_coverage_subsets(nodes, g, thres, mu, p_hops, parallel=True)
        batched[j] += time.perf_counter()-start
        start = time.perf_counter()
        estimate_coverage_subsets_no_batch(nodes, g, thres, mu, p_hops, parallel=True)
        not_batched[j] += time.perf_counter()-start
    print()
not_batched /= m
batched /= m

In [None]:
plt.figure()
plt.plot(sizes, not_batched, alpha=.8, label="Not batched")
plt.plot(sizes, batched, alpha=.8, label="Batched")
plt.title("Batched vs. not-batched execution")
plt.xlabel("Nodes")
plt.ylabel("Time (in s)")
plt.legend()
plt.savefig("batched_vs_not_batched_single.pdf")

## Complete sampling method

In [None]:
sizes = [2000, 3000, 4000, 5000, 7000, 10000]
graphs = [GraphBuilder("sensor").sensor(n) for n in sizes]
graphs = [Graph(graph.W) for graph in graphs]

In [None]:
runs = 2
k = 0.1
mu = 0.01
eps = 1e-5
p_hops = 12

In [None]:
m = len(sizes)
not_parallel = np.zeros(m)
parallel = np.zeros(m)

for i in range(runs):
    print("Run", i+1)
    for j, (n, g) in enumerate(zip(sizes, graphs)):
        print(n, end=" ")
        start = time.perf_counter()
        BS_GDA().bs_gda(g, int(k*n), mu, eps, p_hops, parallel=False)
        not_parallel[j] += time.perf_counter()-start
        start = time.perf_counter()
        BS_GDA().bs_gda(g, int(k*n), mu, eps, p_hops, parallel=True)
        parallel[j] += time.perf_counter()-start
not_parallel /= m
parallel /= m

In [None]:
plt.figure()
plt.plot(sizes, not_parallel, alpha=.8, label="Sequential")
plt.plot(sizes, parallel, alpha=.8, label="Parallel")
plt.title("Sequential vs. parallel execution")
plt.xlabel("Nodes")
plt.ylabel("Time (in s)")
plt.legend()
plt.savefig("seq_vs_parallel_complete.pdf")