In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import random

Graph $(V, E)$ with nodes $v\subset\mathbb{N}$ as subsets of integers
Strategy for merging:
1. Draw a set of edges $F\subseteq E$ somehow
2. Identify connected components $\mathcal{C}=\{C_1,\dotsc, C_k\}$ of $(V, F)$. They form the new set of nodes
3. Compute new adjacency matrix $A^\prime$ by summing entries of $A$ blockwise 

#### Rule of thumb for choosing temperature $\tau$:
If there are $m$ edges in an unweighted graph, the amount of chosen edges is distrubted as Bin($e$, $n^{-\tau}$). Therefore, the expected number of chosen edges is $n^{1-\tau}$, and a choice of 
$$
\tau=\frac{\log(2)}{\log(m)}
$$
leads to $m/2$ in expectation.

In [2]:
class FindCuts(object):
    def __init__(self, A, sample_fn, partition_fn, quality_fn):
        self.A = A
        self.V = [{i} for i in range(nx.number_of_nodes(G))]
        self.sample_fn = sample_fn
        self.partition_fn = partition_fn
        self.quality_fn = quality_fn
    
    def _merge_nodes(self, V, component):
        """ Merge the sets of V that are indexed in component."""
        merged_node = set({})
        for idx in component:
            merged_node = merged_node.union(V[idx])
        return merged_node
    
    def _merge_edges(self, V, A, A_sampled):
        """ Merge edges in A_sampled to obtain a new graph."""
        G_sampled = nx.from_numpy_array(A_sampled)
        connected_components = list(nx.connected_components(G_sampled))
        V_coarse = [self._merge_nodes(V, component) for component in connected_components]
        n_coarse = len(V_coarse)
        A_coarse = np.zeros((n_coarse, n_coarse))
        for i in range(n_coarse):
            for j in range(i+1, n_coarse):
                for v in connected_components[i]:
                    for w in connected_components[j]:
                        A_coarse[i, j] += A[v, w] 
        A_coarse += A_coarse.T
        return V_coarse, A_coarse
        
    def get_cuts(self, T):
        """ T: number of partitions"""
        V = self.V
        A = self.A
        for t in range(T):
            A_sampled = self.sample_fn(A)
            V, A = self._merge_edges(V, A, A_sampled)
            if len(V)==1:
                return
            P = self.partition_fn(V, A, n=self.A.shape[0])
            print(self.quality_fn(P), wcut(self.A, np.ones(self.A.shape[0]), P))
        return
    

# Weighted ratio cut with weighted kernelized k-means

__Input:__ Adjacency matrix $A$ with weighted edges, $w$ vector of node weights. <br>
__Output:__ Cluster indicator vector x

Weighted ratio cut (Wcut) is given by
$$
Wcut(A,A^c) = \left(\frac{1}{w(A)}+\frac{1}{w(A^c)}\right)cut(A,A^c)\,,
$$
where
$$
w(A) = \sum_{i\in A}w(i)\quad\text{and}\quad cut(A,A^c)=\sum_{i\in A, j\in A^c}w(i,j)
$$

In [3]:
def sample_edges(A, temperature=.7):
    """ Sample each edge with a probability, scaled by the temperature.
    Returns new edges in form of adjacency matrix"""
    edges_idx = np.triu_indices_from(A, k=1)
    edge_probs = (A[edges_idx] / A[edges_idx].sum())**temperature
    sampled_edges = np.random.binomial(n=1, p=edge_probs)
    is_sampled = (sampled_edges == 1)
    sampled_edges_idx = (edges_idx[0][is_sampled], edges_idx[1][is_sampled])
    A_sampled = np.zeros_like(A)
    A_sampled[sampled_edges_idx] = 1
    A_sampled += A_sampled.T
    return A_sampled

def wcut(A, w, x):
    """ Computes weighted ratio cut value for a given flat vector x."""
    if w is None:
        w = np.ones(A.shape[0])
    return (1 / (x @ w) + 1 / ((1 - x) @ w)) * (x @ A) @ (1 - x)

def compute_wcut(A, w, sigma=0, t_max=100):
    """ Compute weighted ratio cut with weighted kernel 2-means."""
    # Compute appropriate kernel matrix
    if w is None:
        w = np.ones(A.shape[0])
    
    L = np.diag(np.sum(A, axis=1)) - A
    K = np.diag(1 / w) * sigma + (np.diag(1 / w) @ (np.diag(w) - L)) @ np.diag(1 / w)
    
    # Perform weighted kernelized k-means with random initialization
    n = A.shape[0]
    x_old = np.random.binomial(n=1, p=.5, size=n)
    x_old[0] = 1 - x_old[-1] # Hacky way to avoid having a vector of all 0 or all 1
    D = np.zeros((n, 2))
    for t in range(t_max):
        for b in [0, 1]:
            w_b = w * (x_old==b)
            w_b_sum = w_b.sum()
            D[:, b] = -2 * (K @ w_b) / w_b_sum + np.sum(w_b.reshape(-1, 1) * K * w_b) / w_b_sum ** 2
        x_new = np.argmin(D, axis=1)        
        if (x_old==x_new).all():
            break
        x_old = x_new
    return x_new

def compute_best_wcut(A, w=None, sigmas=np.linspace(-2, 2, 20)):
    """ Compute weighted ratio cut for several sigma and take best."""
    x_best = []
    best_val = np.inf
    for sigma in sigmas:
        x = compute_wcut(A, w, sigma=sigma)
        if wcut(A, w, x) < best_val:
            x_best = x
            best_val = wcut(A, w, x)
    return x_best

def partition(V, A, n):
    """ Compute coarse partition and translate to partition of original graph."""
    x_coarse = compute_best_wcut(A=A, w=np.array([len(v) for v in V]))
    x = np.zeros(n)
    for i,v in enumerate(V):
        x[list(v)]= x_coarse[i]
    return x

def evaluate_partition_quality(x, sizes):
    """ Evaluate the fractions that are separated by a partition in an SBM."""
    fractions = []
    pos = 0
    for i in range(len(sizes)):
        fractions.append(x[pos: pos + sizes[i]].mean())
        pos += sizes[i]
    return fractions

### Example: Stochastical Block model

In [4]:
sizes = np.array([50, 50])
G = nx.stochastic_block_model(sizes=sizes, p=[[.8, .1],
                                                 [.1, .8]])
A = nx.to_numpy_array(G)
V = [{i} for i in range(A.shape[0])]

In [5]:
rnd_cuts = []
for _ in range(10):
    P = np.random.binomial(n=1, p=.5, size=A.shape[0])
    print(evaluate_partition_quality(P, sizes=sizes))
    rnd_cuts.append(wcut(A, None, P))
mean = np.array(rnd_cuts).mean()
print(mean)

[0.54, 0.42]
[0.6, 0.38]
[0.42, 0.6]
[0.44, 0.52]
[0.58, 0.42]
[0.6, 0.64]
[0.5, 0.5]
[0.42, 0.5]
[0.4, 0.5]
[0.46, 0.54]
45.26410419712602


In [6]:
x_blocks = np.zeros(A.shape[0])
x_blocks[:sizes[0]] = 1
wcut(A, np.ones(A.shape[0]), x_blocks)

10.56

In [7]:
findCuts = FindCuts(A=A, sample_fn=sample_edges, partition_fn=partition, 
                    quality_fn=lambda x: evaluate_partition_quality(x, sizes=sizes))
findCuts.get_cuts(T=30)

[0.36, 0.56] 44.32367149758454
[0.62, 0.42] 43.7099358974359
[0.58, 0.2] 41.488020176544765
[0.68, 0.52] 43.87500000000001
[0.34, 0.56] 43.43434343434343
[0.24, 0.66] 39.03030303030303
[0.42, 0.66] 43.51851851851852
[0.92, 0.64] 41.31701631701632
[0.86, 0.58] 42.65873015873016
[0.14, 0.4] 42.719431760527655
[0.98, 0.7] 41.07142857142857
[0.98, 0.68] 40.25513819985825
[0.92, 0.98] 44.21052631578947
[0.96, 0.98] 42.611683848797256
[0.02, 0.02] 42.3469387755102
[0.02, 0.06] 43.489583333333336
[0.98, 0.98] 43.87755102040816
[0.02, 0.04] 43.6426116838488
[0.02, 0.02] 43.87755102040816
[0.98, 0.98] 43.87755102040816


In [8]:
wcut(A=A, w=None, x=compute_best_wcut(A, sigmas=np.linspace(-10,10,100)))

42.938311688311686

# Yu and Shi Postprocessing: spectral approach to Wcut