graspologic-org · bdpedigo · Apr 7, 2021 · Oct 14, 2020 · Oct 16, 2020 · Oct 16, 2020
diff --git a/graspologic/match/gmp.py b/graspologic/match/gmp.py
@@ -52,7 +52,7 @@ class GraphMatch(BaseEstimator):
         Gives users the option to shuffle the nodes of A matrix to avoid results
         from inputs that were already matched.
 
-    eps : float (default = 0.1)
+    eps : float (default = 0.03)
         A positive, threshold stopping criteria such that FW continues to iterate
         while Frobenius norm of :math:`(P_{i}-P_{i+1}) > \\text{eps}`
 
@@ -76,7 +76,8 @@ class GraphMatch(BaseEstimator):
 
     perm_inds_ : array, size (n,) where n is the number of vertices in the fitted graphs.
         The indices of the optimal permutation (with the fixed seeds given) on the nodes of B,
-        to best minimize the objective function :math:`f(P) = \\text{trace}(A^T PBP^T )`.
+        to best minimize the objective function :math:`f(P) = \\text{trace}(A^T PBP^T)
+        + \\text{trace}(S^T P)`.
 
 
     score_ : float
@@ -107,7 +108,7 @@ def __init__(
         init="barycenter",
         max_iter=30,
         shuffle_input=True,
-        eps=0.1,
+        eps=0.03,
         gmp=True,
         padding="adopted",
     ):
@@ -121,9 +122,9 @@ def __init__(
         else:
             msg = '"n_init" must be a positive integer'
             raise TypeError(msg)
-        if init == "rand":
+        if isinstance(init, str) and init == "rand":
             self.init = "randomized"
-        elif init == "barycenter":
+        elif isinstance(init, str) and init == "barycenter":
             self.init = "barycenter"
         elif not isinstance(init, str):
             self.init = init
@@ -159,7 +160,7 @@ def __init__(
             msg = '"padding" parameter must be of type string'
             raise TypeError(msg)
 
-    def fit(self, A, B, seeds_A=[], seeds_B=[]):
+    def fit(self, A, B, seeds_A=[], seeds_B=[], S=None, rng=None):
         """
         Fits the model with two assigned adjacency matrices
 
@@ -178,6 +179,12 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
             An array where each entry is an index of a node in `B` The elements of
             ``seeds_A`` and ``seeds_B`` are vertices which are known to be matched, that is,
             ``seeds_A[i]`` is matched to vertex ``seeds_B[i]``.
+        S : 2d-array, square
+            A square similarity matrix. Should be same shape as ``A`` and ``B``.
+
+            Note: the scale of `S` may effect the weight placed on the term
+            :math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)`
+            during the optimization process.
 
         Returns
         -------
@@ -196,6 +203,8 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
         options = {
             "maximize": self.gmp,
             "partial_match": partial_match,
+            "S": S,
+            "rng": rng,
             "P0": self.init,
             "shuffle_input": self.shuffle_input,
             "maxiter": self.max_iter,
@@ -212,7 +221,7 @@ def fit(self, A, B, seeds_A=[], seeds_B=[]):
         self.n_iter_ = res.nit
         return self
 
-    def fit_predict(self, A, B, seeds_A=[], seeds_B=[]):
+    def fit_predict(self, A, B, seeds_A=[], seeds_B=[], S=None):
         """
         Fits the model with two assigned adjacency matrices, returning optimal
         permutation indices
@@ -233,12 +242,19 @@ def fit_predict(self, A, B, seeds_A=[], seeds_B=[]):
             ``seeds_A`` and ``seeds_B`` are vertices which are known to be matched, that is,
             ``seeds_A[i]`` is matched to vertex ``seeds_B[i]``.
 
+        S : 2d-array, square
+            A square similarity matrix. Should be same shape as ``A`` and ``B``.
+
+            Note: the scale of `S` may effect the weight placed on the term
+            :math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)`
+            during the optimization process.
+
         Returns
         -------
         perm_inds_ : 1-d array, some shuffling of [0, n_vert)
             The optimal permutation indices to minimize the objective function
         """
-        self.fit(A, B, seeds_A, seeds_B)
+        self.fit(A, B, seeds_A, seeds_B, S)
         return self.perm_inds_
 
 

diff --git a/graspologic/match/qap.py b/graspologic/match/qap.py
@@ -169,9 +169,9 @@ def quadratic_assignment(A, B, method="faq", options=None):
     return res
 
 
-def _calc_score(A, B, perm):
+def _calc_score(A, B, S, perm):
     # equivalent to objective function but avoids matmul
-    return np.sum(A * B[perm][:, perm])
+    return np.sum(A * B[perm][:, perm]) + np.sum(S[np.arange(len(S)), perm])
 
 
 def _common_input_validation(A, B, partial_match):
@@ -217,6 +217,7 @@ def _quadratic_assignment_faq(
     B,
     maximize=False,
     partial_match=None,
+    S=None,
     rng=None,
     P0="barycenter",
     shuffle_input=False,
@@ -278,6 +279,11 @@ def _quadratic_assignment_faq(
         matched to node ``partial_match[i, 1]`` of `B`. Accordingly,
         ``partial_match`` is an array of size ``(m , 2)``, where ``m`` is
         not greater than the number of nodes, :math:`n`.
+    S : 2d-array, square
+        A square similarity matrix. Should be same shape as ``A`` and ``B``.   
+        Note: the scale of `S` may effect the weight placed on the term 
+        :math:`\\text{trace}(S^T P)` relative to :math:`\\text{trace}(A^T PBP^T)` 
+        during the optimization process.
     P0 : 2d-array, "barycenter", or "randomized" (default = "barycenter")
         The initial (guess) permutation matrix or search "position"
         `P0`.
@@ -372,13 +378,23 @@ def _quadratic_assignment_faq(
     # ValueError check
     A, B, partial_match = _common_input_validation(A, B, partial_match)
 
+    if S is None:
+        S = np.zeros(A.shape)
+    S = np.atleast_2d(S)
+
     msg = None
     if isinstance(P0, str) and P0 not in {"barycenter", "randomized"}:
         msg = "Invalid 'P0' parameter string"
     elif maxiter <= 0:
         msg = "'maxiter' must be a positive integer"
     elif tol <= 0:
         msg = "'tol' must be a positive float"
+    elif S.shape[0] != S.shape[1]:
+        msg = "`S` must be square"
+    elif S.ndim != 2:
+        msg = "`S` must have exactly two dimensions"
+    elif S.shape != A.shape:
+        msg = "`S`, `A`, and `B` matrices must be of equal size"
     if msg is not None:
         raise ValueError(msg)
 
@@ -389,7 +405,7 @@ def _quadratic_assignment_faq(
 
     # check outlier cases
     if n == 0 or partial_match.shape[0] == n:
-        score = _calc_score(A, B, partial_match[:, 1])
+        score = _calc_score(A, B, S, partial_match[:, 1])
         res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
         return OptimizeResult(res)
 
@@ -398,6 +414,7 @@ def _quadratic_assignment_faq(
         obj_func_scalar = -1
 
     nonseed_B = np.setdiff1d(range(n), partial_match[:, 1])
+    perm_S = np.copy(nonseed_B)
     if shuffle_input:
         nonseed_B = rng.permutation(nonseed_B)
         # shuffle_input to avoid results from inputs that were already matched
@@ -406,9 +423,12 @@ def _quadratic_assignment_faq(
     perm_A = np.concatenate([partial_match[:, 0], nonseed_A])
     perm_B = np.concatenate([partial_match[:, 1], nonseed_B])
 
+    S = S[:, perm_B]
+
     # definitions according to Seeded Graph Matching [2].
     A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds)
     B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds)
+    S22 = S[perm_S, n_seeds:]
 
     # [1] Algorithm 1 Line 1 - choose initialization
     if isinstance(P0, str):
@@ -429,7 +449,7 @@ def _quadratic_assignment_faq(
         _check_init_input(P0, n_unseed)
         P = P0
 
-    const_sum = A21 @ B21.T + A12.T @ B12
+    const_sum = A21 @ B21.T + A12.T @ B12 + S22
 
     # [1] Algorithm 1 Line 2 - loop while stopping criteria not met
     for n_iter in range(1, maxiter + 1):
@@ -451,8 +471,9 @@ def _quadratic_assignment_faq(
         BR22 = B22 @ R.T
         b22a = (AR22 * B22.T[cols]).sum()
         b22b = (A22 * BR22[cols]).sum()
+        s = (S22 * R).sum()
         a = (AR22.T * BR22).sum()
-        b = b21 + b12 + b22a + b22b
+        b = b21 + b12 + b22a + b22b + s
         # critical point of ax^2 + bx + c is at x = -d/(2*e)
         # if a * obj_func_scalar > 0, it is a minimum
         # if minimum is not in [0, 1], only endpoints need to be considered
@@ -476,7 +497,7 @@ def _quadratic_assignment_faq(
     unshuffled_perm = np.zeros(n, dtype=int)
     unshuffled_perm[perm_A] = perm_B[perm]
 
-    score = _calc_score(A, B, unshuffled_perm)
+    score = _calc_score(A, B, S, unshuffled_perm)
 
     res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter}
 

diff --git a/tests/test_match.py b/tests/test_match.py
@@ -6,7 +6,9 @@
 import math
 import random
 from graspologic.match import GraphMatch as GMP
-from graspologic.simulations import er_np
+from graspologic.simulations import er_np, sbm_corr
+from graspologic.embed import AdjacencySpectralEmbed
+from graspologic.align import SignFlips
 
 np.random.seed(0)
 
@@ -59,6 +61,22 @@ def test_SGM_inputs(self):
             GMP().fit(
                 np.identity(3), np.identity(3), -1 * np.arange(2), -1 * np.arange(2)
             )
+        with pytest.raises(ValueError):
+            GMP().fit(
+                np.random.random((4, 4)),
+                np.random.random((4, 4)),
+                np.arange(2),
+                np.arange(2),
+                np.random.random((3, 4)),
+            )
+        with pytest.raises(ValueError):
+            GMP().fit(
+                np.random.random((4, 4)),
+                np.random.random((4, 4)),
+                np.arange(2),
+                np.arange(2),
+                np.random.random((3, 3)),
+            )
 
     def _get_AB(self):
         # adjacency matrices from QAPLIB instance chr12c
@@ -140,3 +158,26 @@ def test_padding(self):
         res = gmp_adopted.fit(G1, G2)
 
         assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)
+
+    def test_sim(self):
+        n = 150
+        rho = 0.9
+        n_per_block = int(n / 3)
+        n_blocks = 3
+        block_members = np.array(n_blocks * [n_per_block])
+        block_probs = np.array(
+            [[0.2, 0.01, 0.01], [0.01, 0.1, 0.01], [0.01, 0.01, 0.2]]
+        )
+        directed = False
+        loops = False
+        A1, A2 = sbm_corr(
+            block_members, block_probs, rho, directed=directed, loops=loops
+        )
+        ase = AdjacencySpectralEmbed(n_components=3, algorithm="truncated")
+        x1 = ase.fit_transform(A1)
+        x2 = ase.fit_transform(A2)
+        xh1 = SignFlips().fit_transform(x1, x2)
+        S = xh1 @ x2.T
+        res = self.barygm.fit(A1, A2, S=S)
+
+        assert 0.7 <= (sum(res.perm_inds_ == np.arange(n)) / n)