In [1]:
import numpy as np
import networkx as nx
from sklearn.decomposition import TruncatedSVD as TSVD
from graspy.embed import AdjacencySpectralEmbed

from tqdm import tqdm_notebook as tqdm

In [2]:
def gaussian_covariance(X, Y, bandwidth = 0.5):
    diffs = np.expand_dims(X, 1) - np.expand_dims(Y, 0)
    return np.exp(-0.5 * np.sum(diffs**2, axis=2) / bandwidth**2)

In [128]:
def statistic(X, Y):
    N, _ = X.shape
    M, _ = Y.shape
    x_stat = np.sum(gaussian_covariance(X, X, 0.5) - np.eye(N))/(N*(N-1))
    y_stat = np.sum(gaussian_covariance(Y, Y, 0.5) - np.eye(M))/(M*(M-1))
    xy_stat = np.sum(gaussian_covariance(X, Y, 0.5))/(N*M)
    return x_stat - xy_stat + x_stat

def statistic2(m, N):
    x_stat = m[:N,:N]
    y_stat = m[N:,N:]
    xy_stat = m[:N,N:]
    return np.sum(x_stat) - 2*np.sum(xy_stat) + np.sum(x_stat)

def gen_stat(X, Y):
    N, _ = X.shape
    M, _ = Y.shape
    x_stat = gaussian_covariance(X, X, 0.5) - np.eye(N)/(N*(N-1))
    y_stat = gaussian_covariance(Y, Y, 0.5) - np.eye(M)/(M*(M-1))
    xy_stat = gaussian_covariance(X, Y, 0.5)/(N*M)
    return x_stat, y_stat, xy_stat

In [185]:
def ASE2(A):
    tsvd = TSVD()
    vecs, vals = tsvd.fit(A).components_, tsvd.singular_values_
    vecs_2 = np.array([vecs[0, :], vecs[1, :]]) 
    X_hat = vecs_2.T @ np.diag(vals[:2]**(1/2))
    return X_hat

def ASE(A): #too SLOW! Takes like 100 hrs... can't validate using this.
    ase = AdjacencySpectralEmbed(algorithm='randomized')
    X_hat = ase.fit_transform(A)
    return X_hat

In [186]:
N = 10
x = np.array([0,1,2,3,4,5,6,7,8,9])
y = np.array([10,20,30,40,50,60,70,80,90,100])
z = np.concatenate((x,y))
print(x[np.random.choice(np.arange(0,N), size = int(N), replace = False)])
print(y[np.random.choice(np.arange(0,N), size = int(N), replace = False)])
print(z[np.random.choice(np.arange(0,2*N), size = int(2*N), replace = False)][:N])

[7 3 6 8 5 4 0 2 1 9]
[ 30 100  60  40  10  90  70  50  20  80]
[  5  20  60   3   2   9 100  10  90  40]


In [187]:
def bootstrap(X, Y, M = 200, alpha = 0.05):
    N, _ = X.shape
    M2, _ = Y.shape
    Z = np.concatenate((X,Y))
    print(Z.shape)
    
    statistics = np.zeros(M)
    #swap x and y
    for i in range(M):
        # bs_X = X[np.random.choice(np.arange(0,N), size = int(N/2), replace = False)]
        # bs_Y = Y[np.random.choice(np.arange(0,M2), size = int(M2/2), replace = False)]
        bs_Z = Z[np.random.choice(np.arange(0,N+M2), size = int(N+M2), replace = False)]
        bs_X2 = bs_Z[:N,:]
        bs_Y2 = bs_Z[N:,:]
        statistics[i] = statistic(bs_X2, bs_Y2)
        
    sorted_ = np.sort(statistics)
    rej_ind = int(np.ceil(((1 - alpha)*M)))
    return sorted_[rej_ind]

def bootstrap2(X, Y, M = 200, alpha = 0.05):
    N, _ = X.shape
    M2, _ = Y.shape
    
    xm, ym, xym = gen_stat(X, Y)
    m = np.concatenate((np.concatenate((xm,xym)),np.concatenate((xym,ym))), axis=1)
    statistics = np.zeros(M)
    #swap x and y
    for i in range(M):
        ind = np.random.choice(np.arange(0,N+M2), size = int(N+M2), replace = False)
        statistics[i] = statistic2(m[ind,:][:,ind],N)
        
    sorted_ = np.sort(statistics)
    rej_ind = int(np.ceil(((1 - alpha)*M)))
    return sorted_[rej_ind]

In [188]:
def gen_data(n, eps):
    pi = [0.4, 0.6]
    sizes = [int(pi[0]*n), int(pi[1]*n)]

    probsA = np.array([
        [0.5, 0.2],
        [0.2, 0.5]])
    
    probsB = np.array([
        [0.5 + eps, 0.2],
        [0.2, 0.5 + eps]])
    
    G1 = nx.stochastic_block_model(sizes, probsA)
    A1 = nx.to_numpy_array(G1)

    G2 = nx.stochastic_block_model(sizes, probsA)
    A2 = nx.to_numpy_array(G2)
    return sizes, probsA, probsB, A1, A2

In [189]:
def median_heuristic(X1, X2):
    X1_medians = np.median(X1, axis=0)
    X2_medians = np.median(X2, axis=0)
    val = np.multiply(X1_medians, X2_medians)
    t = (val>0)*2-1
    X1 = np.multiply(t.reshape(-1,1).T,X1)
    return X1, X2

In [190]:
#np.random.seed(1)
n = 100
eps = 0.1
sizes, probsA, probsB, A1, A2 = gen_data(n, eps)
iters = 1000
X1_hat = ASE(A1)
X2_hat = ASE(A2)
X1_hat, X2_hat = median_heuristic(X1_hat, X2_hat)
critical_value = bootstrap2(X1_hat, X2_hat)
print(critical_value)

NameError: name 'AdjacencySpectralEmbed' is not defined

In [191]:
def estimated_power(n, eps, M, alpha, iters):
    sizes, probsA, probsB, A1, A2 = gen_data(n, eps)
    
    X1_hat = ASE(A1)
    X2_hat = ASE(A2)
    X1_hat, X2_hat = median_heuristic(X1_hat, X2_hat)
    critical_value = bootstrap(X1_hat, X2_hat, M, alpha)
    
    rejections = 0
    for i in range(iters):
        G3 = nx.stochastic_block_model(sizes, probsA)
        A = nx.to_numpy_array(G3)
        G4 = nx.stochastic_block_model(sizes, probsB)
        B = nx.to_numpy_array(G4)
        X_hat = ASE(A)
        Y_hat = ASE(B)
        X_hat, Y_hat = median_heuristic(X_hat, Y_hat)
        U = statistic(X_hat, Y_hat)
        if U > critical_value:
            rejections += 1
    return rejections/iters

In [192]:
def estimated_power2(n, eps, M, alpha, iters):
    rejections = 0
    for i in range(iters):
        sizes, probsA, probsB, A1, A2 = gen_data(n, eps)

        G3 = nx.stochastic_block_model(sizes, probsA)
        A = nx.to_numpy_array(G3)
        G4 = nx.stochastic_block_model(sizes, probsB)
        B = nx.to_numpy_array(G4)
        X_hat = ASE(A)
        Y_hat = ASE(B)
        X_hat, Y_hat = median_heuristic(X_hat, Y_hat)
        critical_value = bootstrap(X_hat, Y_hat, M, alpha)
        
        U = statistic(X1_hat, X2_hat)
        if U > critical_value:
            rejections += 1
    return rejections/iters

In [193]:
def monte_carlo(ns, eps, M = 200, alpha = 0.05, iters = 1000):
    powers = np.zeros(shape = (len(ns),len(eps)))
    for i in tqdm(range(len(ns))):
        for j in range(len(eps)):
            powers[i,j] = np.array(estimated_power(ns[i], eps[j], M, alpha, iters))
    return powers

In [1]:
# monte_ns = [100, 200, 500, 1000]
monte_ns = [100]
monte_eps = [0.02]
power_table = monte_carlo(ns = monte_ns, eps = monte_eps)

NameError: name 'monte_carlo' is not defined

In [2]:
print("Paper results")
print(np.array([[.06,.09,.27],[.09,.17,.83],[.1,.43,1],[.14,1,1]]))
print("Sim results")
print(round(power_table,2))

Paper results
[[0.06 0.09 0.27]
 [0.09 0.17 0.83]
 [0.1  0.43 1.  ]
 [0.14 1.   1.  ]]
Sim results


NameError: name 'power_table' is not defined