Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Similarity Augmented Graph Matching #560

Merged
merged 38 commits into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
00287d6
wraps scipy code, gets rid of skp function
asaadeldin11 Oct 14, 2020
465b9d7
Update match.rst
asaadeldin11 Oct 16, 2020
b3e4804
remove mentions of skp file, init_method -> init
asaadeldin11 Oct 16, 2020
7362247
Update gmp.py
asaadeldin11 Oct 18, 2020
024c2ac
Merge branch 'dev' into qap_in
bdpedigo Oct 22, 2020
bc34dd4
allow gm to handle similarity matrices
asaadeldin11 Oct 26, 2020
bb2778e
Update gmp.py
asaadeldin11 Oct 26, 2020
ae40309
add tests
asaadeldin11 Nov 2, 2020
ae9c91a
Merge branch 'dev' into sim_aug
asaadeldin11 Nov 2, 2020
ec60cac
score calc takes into account similarity matrix
asaadeldin11 Nov 3, 2020
80c543c
Merge branch 'dev' into sim_aug
asaadeldin11 Nov 17, 2020
69813bf
black
asaadeldin11 Nov 17, 2020
c954f25
Merge branch 'dev' into sim_aug
asaadeldin11 Nov 30, 2020
12e469f
bug fixes
asaadeldin11 Jan 25, 2021
9dedb7a
Merge branch 'dev' into sim_aug
asaadeldin11 Jan 26, 2021
e94dcc6
add documentation
asaadeldin11 Jan 28, 2021
9e38266
Merge branch 'dev' into sim_aug
asaadeldin11 Jan 28, 2021
7cccbcb
Merge branch 'dev' into sim_aug
bdpedigo Feb 3, 2021
62ba84c
Merge branch 'dev' into sim_aug
bdpedigo Feb 7, 2021
2096e22
update sim tests such that they would fail with vanilla gm
asaadeldin11 Feb 7, 2021
c30a3f7
fix `S` documentation and checks
asaadeldin11 Feb 8, 2021
74e276f
format changes
asaadeldin11 Feb 8, 2021
015d63f
Update qap.py
asaadeldin11 Feb 15, 2021
cb3dbd7
Docs updates
asaadeldin11 Feb 16, 2021
39f9f75
Merge branch 'dev' into sim_aug
bdpedigo Mar 14, 2021
e211ec6
'S' documentation changes
asaadeldin11 Mar 19, 2021
a6b6d58
Update gmp.py
asaadeldin11 Mar 19, 2021
e988aab
fix calc_score
asaadeldin11 Mar 19, 2021
4c670cc
Small doc change
asaadeldin11 Mar 29, 2021
053be34
Update gmp.py
asaadeldin11 Mar 29, 2021
54f0a07
fix S size and padding
asaadeldin11 Apr 1, 2021
f405536
resolve merge conflict
asaadeldin11 Apr 5, 2021
411f500
Merge branch 'dev' into sim_aug
asaadeldin11 Apr 5, 2021
4d94a49
Update graspologic/match/qap.py
bdpedigo Apr 5, 2021
df1ff41
Update graspologic/match/gmp.py
bdpedigo Apr 5, 2021
0ce54a5
Update graspologic/match/gmp.py
bdpedigo Apr 5, 2021
4557612
fix failed tests
asaadeldin11 Apr 5, 2021
9dd7e34
Merge branch 'dev' into sim_aug
bdpedigo Apr 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 24 additions & 8 deletions graspologic/match/gmp.py
Expand Up @@ -52,7 +52,7 @@ class GraphMatch(BaseEstimator):
Gives users the option to shuffle the nodes of A matrix to avoid results
from inputs that were already matched.

eps : float (default = 0.1)
eps : float (default = 0.03)
A positive, threshold stopping criteria such that FW continues to iterate
while Frobenius norm of :math:`(P_{i}-P_{i+1}) > \\text{eps}`

Expand All @@ -76,7 +76,8 @@ class GraphMatch(BaseEstimator):

perm_inds_ : array, size (n,) where n is the number of vertices in the fitted graphs.
The indices of the optimal permutation (with the fixed seeds given) on the nodes of B,
to best minimize the objective function :math:`f(P) = \\text{trace}(A^T PBP^T )`.
to best minimize the objective function :math:`f(P) = \\text{trace}(A^T PBP^T)
+ \\text{trace}(S^T P)`.


score_ : float
Expand Down Expand Up @@ -107,7 +108,7 @@ def __init__(
init="barycenter",
max_iter=30,
shuffle_input=True,
eps=0.1,
eps=0.03,
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
gmp=True,
padding="adopted",
):
Expand All @@ -121,9 +122,9 @@ def __init__(
else:
msg = '"n_init" must be a positive integer'
raise TypeError(msg)
if init == "rand":
if isinstance(init, str) and init == "rand":
self.init = "randomized"
elif init == "barycenter":
elif isinstance(init, str) and init == "barycenter":
self.init = "barycenter"
elif not isinstance(init, str):
self.init = init
Expand Down Expand Up @@ -159,7 +160,7 @@ def __init__(
msg = '"padding" parameter must be of type string'
raise TypeError(msg)

def fit(self, A, B, seeds_A=[], seeds_B=[]):
def fit(self, A, B, seeds_A=[], seeds_B=[], S=None, rng=None):
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
"""
Fits the model with two assigned adjacency matrices

Expand All @@ -178,6 +179,12 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
An array where each entry is an index of a node in `B` The elements of
``seeds_A`` and ``seeds_B`` are vertices which are known to be matched, that is,
``seeds_A[i]`` is matched to vertex ``seeds_B[i]``.
S : 2d-array, square
A square similarity matrix. Should be same shape as ``A`` and ``B``.
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved

Note: the scale of `S` may effect the weight placed on the term
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
:math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)`
during the optimization process.

Returns
-------
Expand All @@ -196,6 +203,8 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
options = {
"maximize": self.gmp,
"partial_match": partial_match,
"S": S,
"rng": rng,
"P0": self.init,
"shuffle_input": self.shuffle_input,
"maxiter": self.max_iter,
Expand All @@ -212,7 +221,7 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
self.n_iter_ = res.nit
return self

def fit_predict(self, A, B, seeds_A=[], seeds_B=[]):
def fit_predict(self, A, B, seeds_A=[], seeds_B=[], S=None):
bdpedigo marked this conversation as resolved.
Show resolved Hide resolved
"""
Fits the model with two assigned adjacency matrices, returning optimal
permutation indices
Expand All @@ -233,12 +242,19 @@ def fit_predict(self, A, B, seeds_A=[], seeds_B=[]):
``seeds_A`` and ``seeds_B`` are vertices which are known to be matched, that is,
``seeds_A[i]`` is matched to vertex ``seeds_B[i]``.

S : 2d-array, square
A square similarity matrix. Should be same shape as ``A`` and ``B``.

Note: the scale of `S` may effect the weight placed on the term
:math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)`
during the optimization process.

Returns
-------
perm_inds_ : 1-d array, some shuffling of [0, n_vert)
The optimal permutation indices to minimize the objective function
"""
self.fit(A, B, seeds_A, seeds_B)
self.fit(A, B, seeds_A, seeds_B, S)
return self.perm_inds_


Expand Down
33 changes: 27 additions & 6 deletions graspologic/match/qap.py
Expand Up @@ -169,9 +169,9 @@ def quadratic_assignment(A, B, method="faq", options=None):
return res


def _calc_score(A, B, perm):
def _calc_score(A, B, S, perm):
# equivalent to objective function but avoids matmul
return np.sum(A * B[perm][:, perm])
return np.sum(A * B[perm][:, perm]) + np.sum(S[np.arange(len(S)), perm])
bdpedigo marked this conversation as resolved.
Show resolved Hide resolved


def _common_input_validation(A, B, partial_match):
Expand Down Expand Up @@ -217,6 +217,7 @@ def _quadratic_assignment_faq(
B,
maximize=False,
partial_match=None,
S=None,
rng=None,
P0="barycenter",
shuffle_input=False,
Expand Down Expand Up @@ -278,6 +279,11 @@ def _quadratic_assignment_faq(
matched to node ``partial_match[i, 1]`` of `B`. Accordingly,
``partial_match`` is an array of size ``(m , 2)``, where ``m`` is
not greater than the number of nodes, :math:`n`.
S : 2d-array, square
A square similarity matrix. Should be same shape as ``A`` and ``B``.
bdpedigo marked this conversation as resolved.
Show resolved Hide resolved
Note: the scale of `S` may effect the weight placed on the term
:math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)`
during the optimization process.
P0 : 2d-array, "barycenter", or "randomized" (default = "barycenter")
The initial (guess) permutation matrix or search "position"
`P0`.
Expand Down Expand Up @@ -372,13 +378,23 @@ def _quadratic_assignment_faq(
# ValueError check
A, B, partial_match = _common_input_validation(A, B, partial_match)

if S is None:
S = np.zeros(A.shape)
S = np.atleast_2d(S)

msg = None
if isinstance(P0, str) and P0 not in {"barycenter", "randomized"}:
msg = "Invalid 'P0' parameter string"
elif maxiter <= 0:
msg = "'maxiter' must be a positive integer"
elif tol <= 0:
msg = "'tol' must be a positive float"
elif S.shape[0] != S.shape[1]:
msg = "`S` must be square"
elif S.ndim != 2:
msg = "`S` must have exactly two dimensions"
elif S.shape != A.shape:
msg = "`S`, `A`, and `B` matrices must be of equal size"
if msg is not None:
raise ValueError(msg)

Expand All @@ -389,7 +405,7 @@ def _quadratic_assignment_faq(

# check outlier cases
if n == 0 or partial_match.shape[0] == n:
score = _calc_score(A, B, partial_match[:, 1])
score = _calc_score(A, B, S, partial_match[:, 1])
res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
return OptimizeResult(res)

Expand All @@ -398,6 +414,7 @@ def _quadratic_assignment_faq(
obj_func_scalar = -1

nonseed_B = np.setdiff1d(range(n), partial_match[:, 1])
perm_S = np.copy(nonseed_B)
if shuffle_input:
nonseed_B = rng.permutation(nonseed_B)
# shuffle_input to avoid results from inputs that were already matched
Expand All @@ -406,9 +423,12 @@ def _quadratic_assignment_faq(
perm_A = np.concatenate([partial_match[:, 0], nonseed_A])
perm_B = np.concatenate([partial_match[:, 1], nonseed_B])

S = S[:, perm_B]

# definitions according to Seeded Graph Matching [2].
A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds)
B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds)
S22 = S[perm_S, n_seeds:]

# [1] Algorithm 1 Line 1 - choose initialization
if isinstance(P0, str):
Expand All @@ -429,7 +449,7 @@ def _quadratic_assignment_faq(
_check_init_input(P0, n_unseed)
P = P0

const_sum = A21 @ B21.T + A12.T @ B12
const_sum = A21 @ B21.T + A12.T @ B12 + S22

# [1] Algorithm 1 Line 2 - loop while stopping criteria not met
for n_iter in range(1, maxiter + 1):
Expand All @@ -451,8 +471,9 @@ def _quadratic_assignment_faq(
BR22 = B22 @ R.T
b22a = (AR22 * B22.T[cols]).sum()
b22b = (A22 * BR22[cols]).sum()
s = (S22 * R).sum()
a = (AR22.T * BR22).sum()
b = b21 + b12 + b22a + b22b
b = b21 + b12 + b22a + b22b + s
# critical point of ax^2 + bx + c is at x = -d/(2*e)
# if a * obj_func_scalar > 0, it is a minimum
# if minimum is not in [0, 1], only endpoints need to be considered
Expand All @@ -476,7 +497,7 @@ def _quadratic_assignment_faq(
unshuffled_perm = np.zeros(n, dtype=int)
unshuffled_perm[perm_A] = perm_B[perm]

score = _calc_score(A, B, unshuffled_perm)
score = _calc_score(A, B, S, unshuffled_perm)

res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter}

Expand Down
43 changes: 42 additions & 1 deletion tests/test_match.py
Expand Up @@ -6,7 +6,9 @@
import math
import random
from graspologic.match import GraphMatch as GMP
from graspologic.simulations import er_np
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
from graspologic.simulations import er_np, sbm_corr
from graspologic.embed import AdjacencySpectralEmbed
from graspologic.align import SignFlips

np.random.seed(0)

Expand Down Expand Up @@ -59,6 +61,22 @@ def test_SGM_inputs(self):
GMP().fit(
np.identity(3), np.identity(3), -1 * np.arange(2), -1 * np.arange(2)
)
with pytest.raises(ValueError):
GMP().fit(
np.random.random((4, 4)),
np.random.random((4, 4)),
np.arange(2),
np.arange(2),
np.random.random((3, 4)),
)
with pytest.raises(ValueError):
GMP().fit(
np.random.random((4, 4)),
np.random.random((4, 4)),
np.arange(2),
np.arange(2),
np.random.random((3, 3)),
)

def _get_AB(self):
# adjacency matrices from QAPLIB instance chr12c
Expand Down Expand Up @@ -140,3 +158,26 @@ def test_padding(self):
res = gmp_adopted.fit(G1, G2)

assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)

def test_sim(self):
n = 150
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
asaadeldin11 marked this conversation as resolved.
Show resolved Hide resolved
rho = 0.9
n_per_block = int(n / 3)
n_blocks = 3
block_members = np.array(n_blocks * [n_per_block])
block_probs = np.array(
[[0.2, 0.01, 0.01], [0.01, 0.1, 0.01], [0.01, 0.01, 0.2]]
)
directed = False
loops = False
A1, A2 = sbm_corr(
block_members, block_probs, rho, directed=directed, loops=loops
)
ase = AdjacencySpectralEmbed(n_components=3, algorithm="truncated")
x1 = ase.fit_transform(A1)
x2 = ase.fit_transform(A2)
xh1 = SignFlips().fit_transform(x1, x2)
S = xh1 @ x2.T
res = self.barygm.fit(A1, A2, S=S)

assert 0.7 <= (sum(res.perm_inds_ == np.arange(n)) / n)