In [10]:
from graphstats import sbm
import graspy
import numpy as np
from graspy.embed import AdjacencySpectralEmbed as ASE

The goal of this notebook is to propose a hypothesis test for testing whether or not two graphs with known vertex alginment share the same latent positions. This is a more restricted setting than the setting presented in Semi-par and so it should be possible to leverage the known vertex alignment to construct a more powerful test. 

Consider the following setting.
Let $ A^{(1)}, A^{(2)} $ be observed adjacency matrices on $ V $ where it is assumed $ A \sim RDPG(X_{1}) $ and $ B \sim RDPG(X_{2}) $. Let $ X_{1}, X_{2} \in \mathbb{R}^{n \times d} $ be stacked latent positions where the $ i^{th} $ row of $ X_{j} $ is the latent position of node $ i $ that generated $ A^{(j)} $. In this setting, a natural question to ask is whether or not $ X_{1}X_{1}^{T} = X_{2}X_{2}^{T} $. Note that $ X_{i}X_{i}^{T} \in \mathbb{R}^{n \times n} $.

Or testing 
\begin{align*}
H_{0}: X_{1}X_{1}^{T} &= X_{2}X_{2}^{T} \\
& vs \\
H_{1}: X_{1}X_{1}^{T} &\neq X_{2}X_{2}^{T}
\end{align*}

I propose a simple procedure in the vertex aligned setting. Let $ S \subset V $ and let $ A^{(i)}_{S, j} $ denote the adjacency matrix in which the adjacencies of vertices in S in graph i are replaced by the adjacencies of vertices in S in graph j. In matrix syntax, let $ a_{k,\ell}^{i} $ denote the (j, $\ell$)th entry of graph i. Then the k,$\ell$th entry of $ A^{(i)}_{S,j} $ is $ a_{k,\ell}^{i} $ if $ k $ or $ \ell $ is in $ S $ and $ a_{k, \ell}^{j} $ otherwise.

The testing procedure will go as follows. Fix $ K \le \frac{|V|}{2}\in \mathbb{N} $. Let $ \hat{X}_{i} = ASE(A^{(i)}) $ and let $ \hat{P}_{i} = \hat{X}_{i} \hat{X}_{i}^{T} $. Let $ B^{1}, B^{2} \sim RDPG(\hat{P}_{i}) $ on $ V $. Let S be a random subset of V of size K. Now let $ B^{1}_{S, 2} $ be defined as above and consider $ \hat{Y}_{1} = ASE(B^{1}) $ and $ \hat{Y}_{2} = ASE(B^{1}_{S, 2}) $ and let $ T = ||\hat{Y}_{1}\hat{Y}_{1}^{T} - \hat{Y}_{2}\hat{Y}_{2}^{T}||_{F}$. Repeating this a bunch of times we can get two distributions of $ T $ under the null. We reject if the observed statistic is in the $ (1 - \alpha)^{th} $ quantile in both estimated distributions.

This notebook will

-- implement this testing procedure

-- empirically show its validity

-- empirically show its consistency

-- compare power to semi-par

In [15]:
## Implementation
n = 200
S = np.random.binomial(n, 0.5)

pi = [S/n, 1 - S/n]
eps = 0
B1 = np.array([
    [0.55, 0.25],
    [0.25, 0.45]
])

B2 = np.array([
    [0.55 + eps, 0.25],
    [0.25, 0.45 + eps]
])

A1 = sbm.adj_matrix(n, pi, B1)
A2 = sbm.adj_matrix(n, pi, B2)

ase_object1 = ASE()
X_hat1 = ase_object1.fit_transform(A1)
ase_object2 = ASE()
X_hat2 = ase_object2.fit_transform(A2)

P1 = X_hat1 @ X_hat1.T
P2 = X_hat2 @ X_hat2.T

T = np.linalg.norm(P1 - P2)

In [17]:
def switcheroo(A, B, k, seed=None):
    if seed is None:
        seed=np.random.randint(-10**6, 10**6)
    np.random.seed(seed)
    
    S = np.random.choice(range(n), size=k, replace=False)
    
    return S

def generate_null_distribution(P, n, k, b, seed=None):
    """
    P - probability matrix
    n - number of nodes
    k - the size of the random subset to switch
    b - number of iterations
    """
    if seed is None:
        seed = np.random.randint(-10**6, 10**6)
    np.random.seed(seed)
        
    statistics=[]
    for i in range(b):
        tempA1 = sbm.adj_matrix(n, np.ones(n)/n, P)
        tempA2 = sbm.adj_matrix(n, np.ones(n)/n, P)
        
        mix = switcheroo(tempA1, tempA2, k)
        
        ase_object1 = ASE()
        X_hat1 = ase_object1.fit_transform(tempA1)
        ase_object2 = ASE()
        X_hat2 = ase_object2.fit_transform(mix)
        
        P1 = X_hat1 @ X_hat1.T
        P2 = X_hat2 @ X_hat2.T
        
        T=np.linalg.norm(P1 - P2)
        statistics.append(T)
        
    return statistics

In [None]:
## Validity

In [None]:
## Consistency

In [None]:
## vs semi-par