In [65]:
import numpy as np
import networkx as nx
import algorithms
import evaluate

In [66]:
def list_minus(x, y):
    return list(set(x) - set(y))

# TODO: parallelize assignment sampling
# TODO: ensure that coarse graph is not disconnected (i.e. every node is within some r-ball)

def landmark_partition(A, num_landmark, radius, affinity_fn, K=2):
    """ Compute k-way partition coarsening the input graph into balls """
    n = A.shape[0]
    # 1. Choose landmark points
    L = np.random.choice(n, size=num_landmark, replace=False)
    # 2. Compute distances within radius r
    D = np.zeros((n, num_landmark))
    for i in range(num_landmark):
        U_old = []
        U = [L[i]]
        for r in range(1, radius+1):
            U_comp = np.array(list_minus(np.arange(n), U)) # Complement of U
            A_sub = A[np.ix_(U, U_comp)] # Submatrix from U to its complement          
            neighbors = list_minus(U_comp[np.max(A_sub, axis=0)>0], U_old) # Seen nodes cannot be new neighbors
            U, U_old = neighbors, U
            if len(U) == 0:
                break            
            D[list(U), i] = affinity_fn(r) 


    # Overwrite rows for landmark points, such that they get assigned deterministically (weight probto infty)
    # (Alternative: don't sample for landmark points at all)
    D[L] = np.eye(num_landmark)
    # TODO: Ensure that every point is within some ball
    assert np.min(np.max(D, axis=1)) > 0, 'Too few/ small balls: there are points in no ball'
    
    # 3. Assign vertices to landmark points
    P = 1 / D.sum(axis=1, keepdims=True) * D
    cell_ixs = list(np.zeros(n))
    # TODO: parallelize
    for i in range(n):
        cell_ixs[i] = np.random.choice(a=L, p=P[i])
        
    # 4. Define coarsened-graph
    node_weights = np.zeros(num_landmark)
    A_coarse = np.zeros((num_landmark, num_landmark))
    for i in range(num_landmark):
        node_weights[i] = cell_ixs.count(L[i])
    for i in range(num_landmark):
        for j in range(i+1, num_landmark):
            A_coarse[i, j] = A[np.ix_(cell_ixs==L[i], cell_ixs==L[j])].sum()
    A_coarse += A_coarse.T
    # 5. Partition ball-graph
    x_coarse = algorithms.compute_spectral_wcut(A=A_coarse, w=node_weights, K=K)
    # 6. Project partition to input graph
    x = np.zeros(n)
    for i in np.where(x_coarse==1)[0]:
        x[np.array(cell_ixs)==L[i]] = 1
    return x

In [102]:
import copy

def graph_growing_partition(A, num_landmarks, choose_neighbor_fn, num_meta_landmarks=False, K=2):
    """Compute K-way partition coarsening the input graph into growing regions."""
    A = copy.deepcopy(A) # Don't want to destroy input matrix
    n = A.shape[0]
    # 1. Choose landmark points
    if num_meta_landmarks is False:
        landmarks = np.random.choice(n, size=num_landmarks, replace=False)        
        node_sets = [{v} for v in np.arange(n)]        
    else:
        A, node_sets, landmarks = landmark_fn(A=A, num_meta_landmarks=num_meta_landmarks, 
                                              num_landmarks=num_landmarks)        
    # 2. Loop until converged    
    while len(node_sets) > num_landmarks:
    # 3. Choose neighbors for all landmark points
        for i in range(num_landmarks):
            neighbor = choose_neighbor_fn(base_node=landmarks[i], forbidden_neighbors=landmarks, A=A)
    # 4. Merge nodes to obtain new graph
            if neighbor is not None:
                A, node_sets, landmarks = merge_edge(base_node=landmarks[i], node_to_merge=neighbor, A=A, 
                                                node_sets=node_sets, landmarks=landmarks)
    # 5. Partition coarsened graph
    x_coarse = algorithms.compute_spectral_wcut(A=A, w=np.array([len(V) for V in node_sets]), K=K)
    # 6. Project partition to input graph
#     if K == 2:
#         x = np.zeros(n)
#         for i in np.where(x_coarse==1)[0]:
#             x[list(node_sets[i])] = 1
#     else:
    x = np.zeros((K, n))
    for k in range(K):
        for i in np.where(x_coarse[k]==1)[0]:
            x[k, list(node_sets[i])] = 1
    if K==2:
        x = x[0]
    return x, node_sets

In [87]:
# TODO: after building A_meta, how to handle isolated vertices? One possibility: restrict to LCC, throw away
# isolated meta landmarks

def landmark_fn(A, num_meta_landmarks, num_landmarks, sizes=None):
    n = A.shape[0]
    # 1. Choose meta landmarks
    meta_landmarks = np.random.choice(n, size=num_meta_landmarks, replace=False)  
    # 2. Compute their edge weights
    A_sub = A[meta_landmarks]
    d = np.sum(A_sub, axis=1, keepdims=True)
    common_neighbors = A_sub @ A_sub.T
    A_meta = A[np.ix_(meta_landmarks, meta_landmarks)] * (1 / d + 1 / d.T) # Connections of order 1
    A_meta += common_neighbors / (d + d.T + common_neighbors) # Connections of order 2
    np.fill_diagonal(A_meta, 0)
    # 3. Partition landmarks
    X_meta = algorithms.compute_spectral_wcut(A=A_meta, K=num_landmarks)
    # 4. Merge partition: always keep node with smallest index
    # Node sets
    node_sets = [{v} for v in np.arange(n)]
    nodes_to_remove = []
    for k in range(num_landmarks): # Iterate over all sets to merge
        l_min = np.min(meta_landmarks[X_meta[k]==1]) # Index to keep
        for l in meta_landmarks[X_meta[k]==1]: # Iterate over indices to merge
            if l != l_min:
                node_sets[l_min] = node_sets[l_min].union({l})
                A[l_min] += A[l]
                A[:, l_min] += A[l]
                nodes_to_remove.append(l)
    np.fill_diagonal(A, 0)
    # Remove merged nodes from nodeset
    for node in nodes_to_remove:
        node_sets.remove({node})
    # Find indices of new landmarks
    landmarks = []
    for i in range(len(node_sets)):
        if len(set(meta_landmarks).intersection(node_sets[i])) != 0:
            landmarks.append(i)
    # Remove merged nodes from adjacency matrix
    nodes_to_keep = list_minus(np.arange(n), nodes_to_remove)
    A = A[np.ix_(nodes_to_keep, nodes_to_keep)]   
    # For evaluation: test how the grouped meta_landmarks distribute over ground-truth cluster
    if sizes is not None:
        print('Evaluate grouping of meta landmarks with ground truth')
        blocks = []
        pos = 0
        for i in range(len(sizes)):
            blocks.append(set(np.arange(pos, pos+sizes[i])))
            pos += sizes[i]
        for k in range(num_landmarks):
            nodes_per_block = np.zeros(len(sizes))
            group = set(meta_landmarks[X_meta[k]==1])
            for i, block in enumerate(blocks):
                nodes_per_block[i] = len(block.intersection(group))
            print(f'Nodes per block: {nodes_per_block}')
    return A, node_sets, landmarks

In [81]:
v = set([13, 2, 10, 20, 5, 33])
sizes = [10, 15, 17]
blocks = []
pos = 0
for i in range(len(sizes)):
    blocks.append(set(np.arange(pos, pos+sizes[i])))
    pos += sizes[i]

In [83]:
nodes_per_block = np.zeros(len(sizes))
for j, block in enumerate(blocks):
    nodes_per_block[j] = len(block.intersection(v))
    

In [84]:
nodes_per_block

array([2., 3., 1.])

In [80]:
blocks

[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
 {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
 {25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41}]

# Test landmark merging

In [69]:
A = np.array([[0, 1, 1, 0, 0, 0, 0, 0, 0],
              [1, 0, 0, 0, 1, 0, 0, 0, 0],
              [1, 0, 0, 0, 1, 0, 0, 0, 0],
              [0, 0, 0, 0, 1, 0, 0, 0, 0],
              [0, 1, 1, 1, 0, 1, 0, 0, 0],
              [0, 0, 0, 0, 1, 0, 0, 1, 0],
              [0, 0, 0, 0, 0, 0, 0, 1, 0],
              [0, 0, 0, 0, 0, 1, 1, 0, 1],
              [0, 0, 0, 0, 0, 0, 0, 1, 0]])
assert np.sum(A - A.T ==0), 'A not symmetrical.'
X_meta = np.array([[1, 1, 0, 0, 0],
                   [0, 0, 1, 1, 1]])
meta_landmarks = np.array([2, 5, 3, 4, 8])
num_landmarks = 2
n = A.shape[0]

node_sets = [{v} for v in np.arange(n)]
nodes_to_remove = []
for k in range(num_landmarks): # Iterate over all sets to merge
    l_min = np.min(meta_landmarks[X_meta[k]==1]) # Index to keep
    for l in meta_landmarks[X_meta[k]==1]: # Iterate over indices to merge
        if l != l_min:
            node_sets[l_min] = node_sets[l_min].union({l})
            A[l_min] += A[l]
            A[:, l_min] += A[l]
            nodes_to_remove.append(l)
np.fill_diagonal(A, 0)
# Remove merged nodes from nodeset
for node in nodes_to_remove:
    node_sets.remove({node})
# Find indices of new landmarks
landmarks = []
for i in range(len(node_sets)):
    if len(set(meta_landmarks).intersection(node_sets[i])) != 0:
        landmarks.append(i)
# Remove merged nodes from adjacency matrix
nodes_to_keep = list_minus(np.arange(n), nodes_to_remove)
A = A[np.ix_(nodes_to_keep, nodes_to_keep)]   
print(nodes_to_keep)
print(A)
print(node_sets)
print(landmarks)

[0, 1, 2, 3, 6, 7]
[[0 1 1 0 0 0]
 [1 0 0 1 0 0]
 [1 0 0 2 0 1]
 [0 1 2 0 0 1]
 [0 0 0 0 0 1]
 [0 0 1 1 1 0]]
[{0}, {1}, {2, 5}, {8, 3, 4}, {6}, {7}]
[2, 3]


In [70]:
def choose_neighbor_uniform(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        neighbor = np.random.choice(a=allowed_neighbors)
    return neighbor

def choose_neighbor_weighted(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        weights = A[base_node, allowed_neighbors]
        neighbor = np.random.choice(a=allowed_neighbors, p=weights / weights.sum())
    return neighbor

def choose_neighbor_maxweight(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        np.random.shuffle(allowed_neighbors)
        weights = A[base_node, allowed_neighbors]
        neighbor = allowed_neighbors[np.argmax(weights)]
    return neighbor

def merge_edge(base_node, node_to_merge, A, node_sets, landmarks):
    # Update adjacency matrix
    A[base_node] += A[node_to_merge]
    A[:, base_node] += A[node_to_merge]
    A[base_node, base_node] = 0
    A = np.delete(A, node_to_merge, axis=0)
    A = np.delete(A, node_to_merge, axis=1)
    # Update node sets
    node_sets[base_node] = node_sets[base_node].union(node_sets[node_to_merge])
    del node_sets[node_to_merge]
    # Update landmarks
    landmarks = [l - (l > node_to_merge) for l in landmarks]
    return A, node_sets, landmarks

# Test on SBM

In [114]:
sizes = np.array([150, 50, 50])
p_within = .3
p_between = .05
p = p_between * np.ones((len(sizes), len(sizes)))
np.fill_diagonal(p, p_within)
G = nx.stochastic_block_model(sizes=sizes, p=p)
A = nx.to_numpy_array(G)

In [89]:
def diff_fractions(l):
    return(np.abs(l[0] - l[1]))

In [90]:
names = ['Uniform', 'Weighted', 'Max']
for j, choose_neighbor_fn in enumerate([choose_neighbor_uniform, choose_neighbor_weighted, choose_neighbor_maxweight]):
    print(f'Choosing strategy: {names[j]}')
    for num_landmarks in [2, 20, 50, 100]:
        diff = []
        for i in range(1):
            x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                                   choose_neighbor_fn=choose_neighbor_fn)
            diff.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
        print(f'num_landmarks={num_landmarks}: frac_diff={np.array(diff).mean()}')

Choosing strategy: Uniform
num_landmarks=2: frac_diff=0.21333333333333337
num_landmarks=20: frac_diff=0.18666666666666665
num_landmarks=50: frac_diff=0.3466666666666667
num_landmarks=100: frac_diff=0.7533333333333333
Choosing strategy: Weighted
num_landmarks=2: frac_diff=0.1333333333333333




num_landmarks=20: frac_diff=0.2666666666666667
num_landmarks=50: frac_diff=0.45333333333333325
num_landmarks=100: frac_diff=0.64
Choosing strategy: Max
num_landmarks=2: frac_diff=0.0
num_landmarks=20: frac_diff=0.053333333333333344
num_landmarks=50: frac_diff=0.6933333333333332
num_landmarks=100: frac_diff=0.7


In [96]:
for num_landmarks in [2, 5, 10]:
    avg = []
    for _ in range(10):
        x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                               choose_neighbor_fn=choose_neighbor_maxweight,
                                               K=2)
        avg.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
    print(f'num_landmarks: {num_landmarks}, fractions={np.array(avg).mean()}')
    print(f'Node sets after coarsening: {evaluate.evaluate_regions(node_sets=node_sets, sizes=sizes)}')

num_landmarks: 2, fractions=0.272
Node sets after coarsening: [[0.77, 0.23], [0.73, 0.27]]
num_landmarks: 5, fractions=0.32533333333333336
Node sets after coarsening: [[1.0, 0.0], [1.0, 0.0], [0.4, 0.6], [0.95, 0.05], [0.4, 0.6]]
num_landmarks: 10, fractions=0.4960000000000001
Node sets after coarsening: [[0.85, 0.15], [0.85, 0.15], [0.9, 0.1], [0.85, 0.15], [0.8, 0.2], [0.8, 0.2], [0.85, 0.15], [0.85, 0.15], [0.75, 0.25], [0.0, 1.0]]


In [98]:
help(graph_growing_partition)

Help on function graph_growing_partition in module __main__:

graph_growing_partition(A, num_landmarks, choose_neighbor_fn, num_meta_landmarks=False, K=2)
    Compute K-way partition coarsening the input graph into growing regions.



In [115]:
for num_meta_landmarks in [10, 20, 30, 40, 50, 60]:
    print(f'num_meta_landmarks: {num_meta_landmarks}')
    for num_landmarks in [2, 3, 10]:
        x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                               choose_neighbor_fn=choose_neighbor_maxweight,
                                               K=2, num_meta_landmarks=num_meta_landmarks)
        fractions = evaluate.evaluate_SBM_partition(x=x, sizes=sizes)
        print(f'num_landmarks: {num_landmarks}, fractions={fractions}')
#         print(f'Node sets after coarsening: {evaluate.evaluate_regions(node_sets=node_sets, sizes=sizes)}')

num_meta_landmarks: 10
num_landmarks: 2, fractions=[0.7, 0.48, 0.0]
num_landmarks: 3, fractions=[0.7933333333333333, 1.0, 0.0]
num_landmarks: 10, fractions=[0.11333333333333333, 0.2, 0.96]
num_meta_landmarks: 20
num_landmarks: 2, fractions=[0.6866666666666666, 0.12, 0.44]
num_landmarks: 3, fractions=[0.02, 0.58, 0.98]
num_landmarks: 10, fractions=[0.9533333333333334, 0.66, 1.0]
num_meta_landmarks: 30
num_landmarks: 2, fractions=[0.7266666666666667, 0.52, 0.0]
num_landmarks: 3, fractions=[0.24666666666666667, 0.54, 0.28]
num_landmarks: 10, fractions=[0.9866666666666667, 0.58, 1.0]
num_meta_landmarks: 40
num_landmarks: 2, fractions=[0.26666666666666666, 0.46, 1.0]
num_landmarks: 3, fractions=[0.22, 0.26, 0.52]
num_landmarks: 10, fractions=[0.9333333333333333, 0.32, 1.0]
num_meta_landmarks: 50
num_landmarks: 2, fractions=[0.7333333333333333, 0.12, 0.6]
num_landmarks: 3, fractions=[0.16, 1.0, 0.0]
num_landmarks: 10, fractions=[0.9, 0.4, 0.98]
num_meta_landmarks: 60
num_landmarks: 2, fracti

In [109]:
for num_meta_landmarks in [10, 20, 30, 40, 50, 60]:
    print(f'num_meta_landmarks: {num_meta_landmarks}')
    for num_landmarks in [2, 3, 4, 5, 10]:
        avg = []
        for _ in range(10):
            x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                                   choose_neighbor_fn=choose_neighbor_maxweight,
                                                   K=2, num_meta_landmarks=num_meta_landmarks)
            avg.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
        print(f'num_landmarks: {num_landmarks}, fractions={np.array(avg).mean()}')
#         print(f'Node sets after coarsening: {evaluate.evaluate_regions(node_sets=node_sets, sizes=sizes)}')

num_meta_landmarks: 10
num_landmarks: 2, fractions=0.5453333333333333
num_landmarks: 3, fractions=0.4126666666666667
num_landmarks: 4, fractions=0.484
num_landmarks: 5, fractions=0.5773333333333334
num_landmarks: 10, fractions=0.5706666666666667
num_meta_landmarks: 20
num_landmarks: 2, fractions=0.6853333333333333
num_landmarks: 3, fractions=0.3986666666666666
num_landmarks: 4, fractions=0.42733333333333334
num_landmarks: 5, fractions=0.5213333333333334
num_landmarks: 10, fractions=0.6799999999999999
num_meta_landmarks: 30
num_landmarks: 2, fractions=0.7046666666666667
num_landmarks: 3, fractions=0.5193333333333332
num_landmarks: 4, fractions=0.3186666666666667
num_landmarks: 5, fractions=0.27466666666666667
num_landmarks: 10, fractions=0.32533333333333336
num_meta_landmarks: 40
num_landmarks: 2, fractions=0.7233333333333334
num_landmarks: 3, fractions=0.6013333333333334
num_landmarks: 4, fractions=0.39599999999999996
num_landmarks: 5, fractions=0.23466666666666666
num_landmarks: 10, f

In [106]:
x = np.array(np.arange(10))
np.random.shuffle(x)
print(x)

[7 0 4 1 8 9 6 3 5 2]


In [117]:
for num_landmarks in [3, 10, 50, 100]:
    avg = []
    for _ in range(2):
        x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                               choose_neighbor_fn=choose_neighbor_maxweight,
                                               K=2)
        avg.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
    print(f'num_landmarks: {num_landmarks}, fractions={np.array(avg).mean()}')

num_landmarks: 3, fractions=0.4446666666666667
num_landmarks: 10, fractions=0.39866666666666667
num_landmarks: 50, fractions=0.8306666666666667
num_landmarks: 100, fractions=0.728


In [68]:
evaluate.evaluate_regions(node_sets=node_sets, sizes=sizes)

[[0.7575, 0.2425], [0.78, 0.22], [0.755, 0.245], [0.7575, 0.2425], [0.7, 0.3]]