Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelize bootstrap for latent position test #710

Merged
merged 3 commits into from
Apr 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 37 additions & 16 deletions graspologic/inference/latent_position_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
# Copyright (c) Microsoft Corporation and contributors.
# Licensed under the MIT License.

from collections import namedtuple

import numpy as np
from joblib import Parallel, delayed
from scipy.linalg import orthogonal_procrustes

from ..align import OrthogonalProcrustes
from ..embed import AdjacencySpectralEmbed, OmnibusEmbed, select_dimension
from ..simulations import rdpg
from ..utils import import_graph, is_symmetric
from collections import namedtuple

lpt_result = namedtuple("lpt_result", ("p_value", "sample_T_statistic", "misc_stats"))


def latent_position_test(
A1, A2, embedding="ase", n_components=None, test_case="rotation", n_bootstraps=500
A1,
A2,
embedding="ase",
n_components=None,
test_case="rotation",
n_bootstraps=500,
workers=1,
bdpedigo marked this conversation as resolved.
Show resolved Hide resolved
):
r"""
Two-sample hypothesis test for the problem of determining whether two random
Expand Down Expand Up @@ -68,6 +76,10 @@ def latent_position_test(
n_bootstraps : int, optional (default 500)
Number of bootstrap simulations to run to generate the null distribution

workers : int (default=1)
Number of workers to use. If more than 1, parallelizes the bootstrap simulations.
Supply -1 to use all cores available.

Returns
----------
p_value : float
Expand Down Expand Up @@ -121,6 +133,10 @@ def latent_position_test(
"test_case must be one of 'rotation', 'scalar-rotation',"
+ "'diagonal-rotation'"
)
# check workers argument
if not isinstance(workers, int):
msg = "workers must be an int, not {}".format(type(workers))
raise TypeError(msg)

A1 = import_graph(A1)
A2 = import_graph(A2)
Expand All @@ -135,12 +151,19 @@ def latent_position_test(
n_components = max(num_dims1, num_dims2)
X_hats = _embed(A1, A2, embedding, n_components)
sample_T_statistic = _difference_norm(X_hats[0], X_hats[1], embedding, test_case)
null_distribution_1 = _bootstrap(
X_hats[0], embedding, n_components, n_bootstraps, test_case

# Compute null distributions
null_distribution_1 = Parallel(n_jobs=workers)(
delayed(_bootstrap)(X_hats[0], embedding, n_components, n_bootstraps, test_case)
for _ in range(n_bootstraps)
)
null_distribution_2 = _bootstrap(
X_hats[1], embedding, n_components, n_bootstraps, test_case
null_distribution_1 = np.array(null_distribution_1)

null_distribution_2 = Parallel(n_jobs=workers)(
delayed(_bootstrap)(X_hats[1], embedding, n_components, n_bootstraps, test_case)
for _ in range(n_bootstraps)
)
null_distribution_2 = np.array(null_distribution_2)

# using exact mc p-values (see, for example, Phipson and Smyth, 2010)
p_value_1 = (
Expand All @@ -165,16 +188,14 @@ def latent_position_test(
def _bootstrap(
X_hat, embedding, n_components, n_bootstraps, test_case, rescale=False, loops=False
):
t_bootstrap = np.zeros(n_bootstraps)
for i in range(n_bootstraps):
A1_simulated = rdpg(X_hat, rescale=rescale, loops=loops)
A2_simulated = rdpg(X_hat, rescale=rescale, loops=loops)
X1_hat_simulated, X2_hat_simulated = _embed(
A1_simulated, A2_simulated, embedding, n_components, check_lcc=False
)
t_bootstrap[i] = _difference_norm(
X1_hat_simulated, X2_hat_simulated, embedding, test_case
)
A1_simulated = rdpg(X_hat, rescale=rescale, loops=loops)
A2_simulated = rdpg(X_hat, rescale=rescale, loops=loops)
X1_hat_simulated, X2_hat_simulated = _embed(
A1_simulated, A2_simulated, embedding, n_components, check_lcc=False
)
t_bootstrap = _difference_norm(
X1_hat_simulated, X2_hat_simulated, embedding, test_case
)
return t_bootstrap


Expand Down
13 changes: 13 additions & 0 deletions tests/test_latentpositiontest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def test_bad_kwargs(self):
latent_position_test(A1, A2, embedding=6)
with pytest.raises(TypeError):
latent_position_test(A1, A2, test_case=6)
with pytest.raises(TypeError):
latent_position_test(A1, A2, workers="oops")

def test_n_bootstraps(self):
np.random.seed(1234556)
Expand Down Expand Up @@ -113,11 +115,22 @@ def test_SBM_epsilon(self):
A2 = sbm(2 * [b_size], B1)
A3 = sbm(2 * [b_size], B2)

# non parallel test
lpt_null = latent_position_test(A1, A2, n_components=2, n_bootstraps=100)
lpt_alt = latent_position_test(A1, A3, n_components=2, n_bootstraps=100)
self.assertTrue(lpt_null[0] > 0.05)
self.assertTrue(lpt_alt[0] <= 0.05)

# parallel test
lpt_null = latent_position_test(
A1, A2, n_components=2, n_bootstraps=100, workers=-1
)
lpt_alt = latent_position_test(
A1, A3, n_components=2, n_bootstraps=100, workers=-1
)
self.assertTrue(lpt_null[0] > 0.05)
self.assertTrue(lpt_alt[0] <= 0.05)


if __name__ == "__main__":
unittest.main()