In [1]:
import numpy as np
import networkx as nx
import algorithms
import evaluate

In [2]:
def list_minus(x, y):
    return list(set(x) - set(y))

# TODO: parallelize assignment sampling
# TODO: ensure that coarse graph is not disconnected (i.e. every node is within some r-ball)

def landmark_partition(A, num_landmark, radius, affinity_fn, K=2):
    """ Compute k-way partition coarsening the input graph into balls """
    n = A.shape[0]
    # 1. Choose landmark points
    L = np.random.choice(n, size=num_landmark, replace=False)
    # 2. Compute distances within radius r
    D = np.zeros((n, num_landmark))
    for i in range(num_landmark):
        U_old = []
        U = [L[i]]
        for r in range(1, radius+1):
            U_comp = np.array(list_minus(np.arange(n), U)) # Complement of U
            A_sub = A[np.ix_(U, U_comp)] # Submatrix from U to its complement          
            neighbors = list_minus(U_comp[np.max(A_sub, axis=0)>0], U_old) # Seen nodes cannot be new neighbors
            U, U_old = neighbors, U
            if len(U) == 0:
                break            
            D[list(U), i] = affinity_fn(r) 


    # Overwrite rows for landmark points, such that they get assigned deterministically (weight probto infty)
    # (Alternative: don't sample for landmark points at all)
    D[L] = np.eye(num_landmark)
    # TODO: Ensure that every point is within some ball
    assert np.min(np.max(D, axis=1)) > 0, 'Too few/ small balls: there are points in no ball'
    
    # 3. Assign vertices to landmark points
    P = 1 / D.sum(axis=1, keepdims=True) * D
    cell_ixs = list(np.zeros(n))
    # TODO: parallelize
    for i in range(n):
        cell_ixs[i] = np.random.choice(a=L, p=P[i])
        
    # 4. Define coarsened-graph
    node_weights = np.zeros(num_landmark)
    A_coarse = np.zeros((num_landmark, num_landmark))
    for i in range(num_landmark):
        node_weights[i] = cell_ixs.count(L[i])
    for i in range(num_landmark):
        for j in range(i+1, num_landmark):
            A_coarse[i, j] = A[np.ix_(cell_ixs==L[i], cell_ixs==L[j])].sum()
    A_coarse += A_coarse.T
    # 5. Partition ball-graph
    x_coarse = algorithms.compute_spectral_wcut(A=A_coarse, w=node_weights, K=K)
    # 6. Project partition to input graph
    x = np.zeros(n)
    for i in np.where(x_coarse==1)[0]:
        x[np.array(cell_ixs)==L[i]] = 1
    return x

In [3]:
import copy

def graph_growing_partition(A, num_landmarks, choose_neighbor_fn, num_meta_landmarks=False,K=2):
    """Compute K-way partition coarsening the input graph into growing regions."""
    A = copy.deepcopy(A) # Don't want to destroy input matrix
    n = A.shape[0]
    # 1. Choose landmark points
    if num_meta_landmarks is False:
        landmarks = np.random.choice(n, size=num_landmarks, replace=False)        
        node_sets = [{v} for v in np.arange(n)]        
    else:
        A, node_sets, landmarks = landmark_fn(A=A, num_meta_landmarks=num_meta_landmarks, 
                                              num_landmarks=num_landmarks)        
    # 2. Loop until converged    
    while len(node_sets) > num_landmarks:
    # 3. Choose neighbors for all landmark points
        for i in range(num_landmarks):
            neighbor = choose_neighbor_fn(base_node=landmarks[i], forbidden_neighbors=landmarks, A=A)
    # 4. Merge nodes to obtain new graph
            if neighbor is not None:
                A, node_sets, landmarks = merge_edge(base_node=landmarks[i], node_to_merge=neighbor, A=A, 
                                                node_sets=node_sets, landmarks=landmarks)
    # 5. Partition coarsened graph
    x_coarse = algorithms.compute_spectral_wcut(A=A, w=np.array([len(V) for V in node_sets]), K=K)
    # 6. Project partition to input graph
#     if K == 2:
#         x = np.zeros(n)
#         for i in np.where(x_coarse==1)[0]:
#             x[list(node_sets[i])] = 1
#     else:
    x = np.zeros((K, n))
    for k in range(K):
        for i in np.where(x_coarse[k]==1)[0]:
            x[k, list(node_sets[i])] = 1
    if K==2:
        x = x[0]
    return x, node_sets

In [4]:
def landmark_fn(A, num_meta_landmarks, num_landmarks):
    n = A.shape[0]
    # 1. Choose meta landmarks
    meta_landmarks = np.random.choice(n, size=num_meta_landmarks, replace=False)  
    # 2. Compute their edge weights
    A_sub = A[np.ix_(meta_landmarks, meta_landmarks)]
    d = np.sum(A_sub, axis=1, keepdims=True)
    common_neighbors = A_sub @ A_sub.T
    A_meta = common_neighbors / (d + d.T + common_neighbors)
    # 3. Partition landmarks
    X_meta = algorithms.compute_spectral_wcut(A=A_meta, K=num_landmarks)
    # 4. Merge partition: always keep node with smallest index
    # Node sets
    node_sets = [{v} for v in np.arange(n)]
    nodes_to_remove = []
    for k in range(num_landmarks):
        l_min = np.min(meta_landmarks[X_meta[k]==1])
        for l in meta_landmarks[X_meta[k]==1]:
            if l != l_min:
                node_sets[l_min] = node_sets[l_min].union({l})
                nodes_to_remove.append({l})
    for node in nodes_to_remove:
        node_sets.remove(node)
    #     
    
    return

In [22]:
n = 13
X_meta = np.array([[0,0,1,1,0,0],
                   [1,0,0,0,1,0],
                   [0,1,0,0,0,1]])
meta_landmarks = np.array([7,5,3,9,10,12])
num_landmarks = 3

node_sets = [{v} for v in np.arange(n)]
nodes_to_remove = []
for k in range(num_landmarks):
    l_min = np.min(meta_landmarks[X_meta[k]==1])
    for l in meta_landmarks[X_meta[k]==1]:
        if l != l_min:
            node_sets[l_min] = node_sets[l_min].union({l})
            nodes_to_remove.append({l})
for node in nodes_to_remove:
    node_sets.remove(node)
print(node_sets)

[{0}, {1}, {2}, {9, 3}, {4}, {12, 5}, {6}, {10, 7}, {8}, {11}]


In [18]:
np.min([1,2,3])

1

In [19]:
x = [{v} for v in np.arange(5)]
print(x)
x.remove({0})
print(x)

[{0}, {1}, {2}, {3}, {4}]


TypeError: remove() takes exactly one argument (2 given)

In [5]:
def choose_neighbor_uniform(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        neighbor = np.random.choice(a=allowed_neighbors)
    return neighbor

def choose_neighbor_weighted(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        weights = A[base_node, allowed_neighbors]
        neighbor = np.random.choice(a=allowed_neighbors, p=weights / weights.sum())
    return neighbor

def choose_neighbor_maxweight(base_node, forbidden_neighbors, A):
    allowed_neighbors = list_minus(np.where(A[base_node] > 0)[0], forbidden_neighbors)
    neighbor = None
    if len(allowed_neighbors) > 0:
        np.random.shuffle(allowed_neighbors)
        weights = A[base_node, allowed_neighbors]
        neighbor = allowed_neighbors[np.argmax(weights)]
    return neighbor

def merge_edge(base_node, node_to_merge, A, node_sets, landmarks):
    # Update adjacency matrix
    A[base_node] += A[node_to_merge]
    A[:, base_node] += A[node_to_merge]
    A[base_node, base_node] = 0
    A = np.delete(A, node_to_merge, axis=0)
    A = np.delete(A, node_to_merge, axis=1)
    # Update node sets
    node_sets[base_node] = node_sets[base_node].union(node_sets[node_to_merge])
    del node_sets[node_to_merge]
    # Update landmarks
    landmarks = [l - (l > node_to_merge) for l in landmarks]
    return A, node_sets, landmarks

# Test on SBM

In [6]:
sizes = np.array([150, 50])
p_within = .3
p_between = .05
p = p_between * np.ones((len(sizes), len(sizes)))
np.fill_diagonal(p, p_within)
G = nx.stochastic_block_model(sizes=sizes, p=p)
A = nx.to_numpy_array(G)

In [42]:
import matplotlib.pyplot as plt
nx.draw(G, with_labels=True)

  if cb.is_numlike(alpha):


In [7]:
def diff_fractions(l):
    return(np.abs(l[0] - l[1]))

In [8]:
names = ['Uniform', 'Weighted', 'Max']
for j, choose_neighbor_fn in enumerate([choose_neighbor_uniform, choose_neighbor_weighted, choose_neighbor_maxweight]):
    print(f'Choosing strategy: {names[j]}')
    for num_landmarks in [2, 20, 50, 100]:
        diff = []
        for i in range(1):
            x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                                   choose_neighbor_fn=choose_neighbor_fn)
            diff.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
        print(f'num_landmarks={num_landmarks}: frac_diff={np.array(diff).mean()}')

Choosing strategy: Uniform
num_landmarks=2: frac_diff=0.08000000000000007
num_landmarks=20: frac_diff=0.41333333333333333
num_landmarks=50: frac_diff=0.5333333333333334
num_landmarks=100: frac_diff=0.8
Choosing strategy: Weighted
num_landmarks=2: frac_diff=0.3466666666666667
num_landmarks=20: frac_diff=0.17333333333333334




num_landmarks=50: frac_diff=0.2666666666666667
num_landmarks=100: frac_diff=0.7
Choosing strategy: Max
num_landmarks=2: frac_diff=0.0
num_landmarks=20: frac_diff=0.6
num_landmarks=50: frac_diff=0.45333333333333325
num_landmarks=100: frac_diff=0.6599999999999999


In [9]:
for num_landmarks in [2, 5, 10, 20, 50]:
    avg = []
    for _ in range(10):
        x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                               choose_neighbor_fn=choose_neighbor_maxweight,
                                               K=2)
        avg.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
    print(f'num_landmarks: {num_landmarks}, fractions={np.array(avg).mean()}')

num_landmarks: 2, fractions=0.06933333333333334
num_landmarks: 5, fractions=0.3226666666666667
num_landmarks: 10, fractions=0.43133333333333335
num_landmarks: 20, fractions=0.5846666666666667
num_landmarks: 50, fractions=0.6120000000000001


In [106]:
x = np.array(np.arange(10))
np.random.shuffle(x)
print(x)

[7 0 4 1 8 9 6 3 5 2]


In [117]:
for num_landmarks in [3, 10, 50, 100]:
    avg = []
    for _ in range(2):
        x, node_sets = graph_growing_partition(A=A, num_landmarks=num_landmarks, 
                                               choose_neighbor_fn=choose_neighbor_maxweight,
                                               K=2)
        avg.append(diff_fractions(evaluate.evaluate_SBM_partition(x=x, sizes=sizes)))
    print(f'num_landmarks: {num_landmarks}, fractions={np.array(avg).mean()}')

num_landmarks: 3, fractions=0.4446666666666667
num_landmarks: 10, fractions=0.39866666666666667
num_landmarks: 50, fractions=0.8306666666666667
num_landmarks: 100, fractions=0.728


In [68]:
evaluate.evaluate_regions(node_sets=node_sets, sizes=sizes)

[[0.7575, 0.2425], [0.78, 0.22], [0.755, 0.245], [0.7575, 0.2425], [0.7, 0.3]]