# Are the sampled nodes also "central" nodes?

In [None]:
import sys
sys.path.append("../")

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import itertools as it
import time

import numpy as np
import scipy
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

from src.gershgorin.bs_gda import BS_GDA
from src.graph.graph import Graph

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

## Build Graph

In [None]:
G = nx.random_partition_graph([10, 10, 10, 10], p_in=0.8, p_out=0.3)
#G = nx.grid_2d_graph(10, 10)
#G = nx.connected_caveman_graph(5, 10)
G = nx.convert_node_labels_to_integers(G)
pos = nx.layout.spring_layout(G, seed=0)
plt.figure(figsize=(3,3))
nx.draw(G, pos=pos, node_size=50, width=.4)

## Centrality measures

- Betweenness centrality
- Closeness centrality
- Degree centrality
- Eigenvector centrality

In [None]:
centrality_measures = [nx.betweenness_centrality, nx.closeness_centrality, nx.degree_centrality, nx.eigenvector_centrality]
centrality_scores = dict()

for c in centrality_measures:
    centrality_scores[c.__name__] = c(G)

In [None]:
plt.figure(figsize=(6,3))
for method, c in centrality_scores.items():
    plt.plot(np.arange(len(G)), c.values(), marker='o', label=method)

plt.title("Centrality scores", y=1.05)
plt.xlabel("node")
plt.ylabel("centrality")
plt.legend(bbox_to_anchor=(1, 0.5));

In [None]:
fig, axs = plt.subplots(2,2)
for ax, (method, c) in zip(axs.flat, centrality_scores.items()):
    ax.set_title(method)
    nx.draw(G, pos=pos, node_size=20, width=0.4, node_color=list(c.values()), ax=ax)

In [None]:
sampling_budget = 10

graph = Graph(nx.adjacency_matrix(G))
sampling_set, _ = BS_GDA().bs_gda(graph, sampling_budget)

In [None]:
c = np.zeros(graph.num_nodes)
c[list(sampling_set)] = 1
sizes = 40*c + 20
plt.figure(figsize=(3,3))
plt.title("Sampled nodes", size=10)
nx.draw(G, pos=pos, node_size=sizes, width=0.4, node_color=c)

### Convert discrete sampling centrality to continuous vector

In [None]:
def propagate_centrality(G, p: float, start=None):
    """
    Simulates the propagation of a value from one or more nodes to their neighbors.
    The initial signal at the starting node(s) is propagated with probability p to the neighboring nodes.
    :param G: graph
    :param start: starting node(s) where the signal is set to 1 (discrete centrality vector)
    :param p: propagation probability
    :return: continuous centrality vector
    """
    n = len(G)
    s = np.zeros(n)
    if start is None:
        start = np.random.choice(n)
    s[start] = 1.
    H = nx.bfs_successors(G, start)
    for node, neighbors in H:
        for neighbor in neighbors:
            s[neighbor] += p * s[node]
    return s

In [None]:
propagated_centrality = [propagate_centrality(G, 0.7, node) for node in sampling_set]
sampling_centrality = np.median(np.vstack(propagated_centrality), axis=0)
z_vals = (sampling_centrality - np.mean(sampling_centrality)) / np.std(sampling_centrality)

In [None]:
sizes = 40*sampling_centrality + 20
plt.figure(figsize=(3,3))
plt.title("Sampling centrality")
nx.draw(G, pos=pos, node_size=sizes, width=0.4, node_color=sampling_centrality)

In [None]:
def diffusion_process(G, t_steps, u0=None):
    """
    Simulates a diffusion process on graph G.
    :param G: graph
    :param t_steps: number of time steps
    :param u0: starting distribution / signal
    :return: matrix U where column i corresponds to the signal after i time steps.
    """
    L = nx.laplacian_matrix(G).astype('float')
    n = len(G.nodes)
    if u0 is None:
        u0 = np.random.rand(n, 1)

    L = scipy.sparse.csc_matrix(L)
    expL = scipy.sparse.linalg.expm(-t_steps * L)

    U = np.zeros((n, t_steps))
    U[:, 0] = u0.reshape(n)
    for i in range(1, t_steps):
        U[:, i] = (expL @ U[:, i-1]).reshape(n)
    return U

In [None]:
diffusion_process(G, 3, c)

## Correlation coefficients

In [None]:
# cast centrality dicts to arrays
centralities = {m: np.array(list(c.values())) for m, c in centrality_scores.items()}
# add gershgorin result to other centralities
centralities['sampling_centrality'] = sampling_centrality

In [None]:
data = np.vstack(list(centralities.values())).T
df = pd.DataFrame(data, columns=centralities.keys())

In [None]:
df.head()

In [None]:
plt.figure(figsize=(4,4))
plt.title("Pearson Correlation of Centrality Values", y=1.05, size=10)
sns.heatmap(df.corr(), annot=True, linewidth=0.1, fmt=".2f", cmap=plt.cm.copper);

## Distance to closest central node

In [None]:
def dist_to_closest_central_node(graph, sampling_set, centralities):
    closest_dist = np.inf
    central_nodes = np.flatnonzero(centralities > np.median(centralities))
    for node in sampling_set:
        # compute shortest path lengths from sampled node
        # to all other nodes
        sp_lengths = nx.shortest_path_length(graph, node)
        dists_central = {node: dist for node, dist in sp_lengths.items() if node in central_nodes}
        dist = min(dists_central.values())
        if dist < closest_dist:
            closest_dist = dist
    return closest_dist

In [None]:
dist_to_closest_central_node(G, sampling_set, centralities['betweenness_centrality'])