In [1]:
import numpy as np
import networkx as nx
from graspy.embed import AdjacencySpectralEmbed

from tqdm import tqdm_notebook as tqdm

SyntaxError: invalid syntax (nonpar.py, line 80)

In [None]:
def gaussian_covariance(X, Y, bandwidth = 0.5):
    diffs = np.expand_dims(X, 1) - np.expand_dims(Y, 0)
    return np.exp(-0.5 * np.sum(diffs**2, axis=2) / bandwidth**2)

In [None]:
def statistic(X, Y):
    N, _ = X.shape
    M, _ = Y.shape
    x_stat = np.sum(gaussian_covariance(X, X, 0.5) - np.eye(N))/(N*(N-1))
    y_stat = np.sum(gaussian_covariance(Y, Y, 0.5) - np.eye(M))/(M*(M-1))
    xy_stat = np.sum(gaussian_covariance(X, Y, 0.5))/(N*M)
    return x_stat - xy_stat + x_stat

def statistic2(m, N):
    x_stat = m[:N,:N]
    y_stat = m[N:,N:]
    xy_stat = m[:N,N:]
    return np.sum(x_stat) - 2*np.sum(xy_stat) + np.sum(x_stat)

def gen_stat(X, Y):
    N, _ = X.shape
    M, _ = Y.shape
    x_stat = gaussian_covariance(X, X, 0.5) - np.eye(N)/(N*(N-1))
    y_stat = gaussian_covariance(Y, Y, 0.5) - np.eye(M)/(M*(M-1))
    xy_stat = gaussian_covariance(X, Y, 0.5)/(N*M)
    return x_stat, y_stat, xy_stat

In [None]:
def ASE(A): #too SLOW! Takes like 100 hrs... can't validate using this.
    ase = AdjacencySpectralEmbed(algorithm='randomized', n_components=2)
    X_hat = ase.fit_transform(A)
    return X_hat

In [None]:
def bootstrap(X, Y, M = 200, alpha = 0.05):
    N, _ = X.shape
    M2, _ = Y.shape
    Z = np.concatenate((X,Y))    
    statistics = np.zeros(M)
    for i in range(M):
        bs_Z = Z[np.random.choice(np.arange(0,N+M2), size = int(N+M2), replace = False)]
        bs_X2 = bs_Z[:N,:]
        bs_Y2 = bs_Z[N:,:]
        statistics[i] = statistic(bs_X2, bs_Y2)
        
    sorted_ = np.sort(statistics)
    rej_ind = int(np.ceil(((1 - alpha)*M)))
    return sorted_[rej_ind]

In [None]:
def median_heuristic(X1, X2):
    X1_medians = np.median(X1, axis=0)
    X2_medians = np.median(X2, axis=0)
    val = np.multiply(X1_medians, X2_medians)
    t = (val>0)*2-1
    X1 = np.multiply(t.reshape(-1,1).T,X1)
    return X1, X2

In [None]:
def gen_data(n, eps):
    pi = [0.4, 0.6]
    sizes = [int(pi[0]*n), int(pi[1]*n)]

    probsA = np.array([
        [0.5, 0.2],
        [0.2, 0.5]])
    
    probsB = np.array([
        [0.5 + eps, 0.2],
        [0.2, 0.5 + eps]])
    
    G1 = nx.stochastic_block_model(sizes, probsA)
    A1 = nx.to_numpy_array(G1)

    G2 = nx.stochastic_block_model(sizes, probsA)
    A2 = nx.to_numpy_array(G2)
    return sizes, probsA, probsB, A1, A2

In [None]:
def estimated_power(n, eps, M, alpha, iters):
    sizes, probsA, probsB, A1, A2 = gen_data(n, eps)
    
    X1_hat = ASE(A1)
    X2_hat = ASE(A2)
    X1_hat, X2_hat = median_heuristic(X1_hat, X2_hat)
    critical_value = bootstrap(X1_hat, X2_hat, M, alpha)
    
    rejections = 0
    for i in range(iters):
        G3 = nx.stochastic_block_model(sizes, probsA)
        A = nx.to_numpy_array(G3)
        G4 = nx.stochastic_block_model(sizes, probsB)
        B = nx.to_numpy_array(G4)
        X_hat = ASE(A)
        Y_hat = ASE(B)
        X_hat, Y_hat = median_heuristic(X_hat, Y_hat)
        U = statistic(X_hat, Y_hat)
        if U > critical_value:
            rejections += 1
    return rejections/iters

In [None]:
def monte_carlo(ns, eps, M = 200, alpha = 0.05, iters = 1000):
    powers = np.zeros(shape = (len(ns),len(eps)))
    for i in tqdm(range(len(ns))):
        for j in range(len(eps)):
            powers[i,j] = np.array(estimated_power(ns[i], eps[j], M, alpha, iters))
    return powers

In [None]:
# monte_ns = [100
monte_ns = [100, 200]#, 500, 1000]
monte_eps = [0.02, 0.05, 0.1]
power_table = monte_carlo(ns = monte_ns, eps = monte_eps)

In [None]:
print("Paper results")
print(np.array([[.06,.09,.27],[.09,.17,.83],[.1,.43,1],[.14,1,1]]))
print("Sim results")
print(power_table)