graspologic-org · bdpedigo · Apr 1, 2021 · Mar 20, 2021 · Mar 23, 2021 · Mar 23, 2021
diff --git a/graspologic/match/gmp.py b/graspologic/match/gmp.py
@@ -2,12 +2,9 @@
 # Licensed under the MIT License.
 
 import numpy as np
-import math
-from scipy.optimize import linear_sum_assignment
-from scipy.optimize import minimize_scalar
 from sklearn.base import BaseEstimator
-from sklearn.utils import check_array
-from sklearn.utils import column_or_1d
+from sklearn.utils import check_array, column_or_1d
+
 from .qap import quadratic_assignment
 
 
@@ -32,17 +29,23 @@ class GraphMatch(BaseEstimator):
         the FAQ algorithm will undergo.
 
     init : string (default = 'barycenter') or 2d-array
-        The initial position chosen
-
-        If 2d-array, `init` must be :math:`m' x m'`, where :math:`m' = n - m`,
-        and it must be doubly stochastic: each of its rows and columns must
-        sum to 1.
+        The initial position chosen.
 
         "barycenter" : the non-informative “flat doubly stochastic matrix,”
-        :math:`J=1 \\times 1^T /n` , i.e the barycenter of the feasible region
+        :math:`J=1 \\times 1^T /n` , i.e the barycenter of the feasible region. This can
+        be thought of as the doubly stochastic matrix from which any permutation is
+        equally likely.
 
         "rand" : some random point near :math:`J, (J+K)/2`, where K is some random
-        doubly stochastic matrix
+        doubly stochastic matrix.
+
+        If 2d-array, ``init`` must be :math:`m' x m'`, where :math:`m'` is the number of
+        nonseeded vertices of ``B``. This initial position repesents a permutation or
+        assignment of the nonseeded vertices of ``B``, and thus must be doubly
+        stochastic (all of its rows and columns must sum to 1). Note that if using
+        seeds, this permutation/assignment is taken with respect to the nonseeded
+        vertices in the order in which they were input, even when
+        ``shuffle_input = True``.
 
     max_iter : int, positive (default = 30)
         Integer specifying the max number of Franke-Wolfe iterations.
@@ -58,7 +61,7 @@ class GraphMatch(BaseEstimator):
 
     gmp : bool (default = True)
         Gives users the option to solve QAP rather than the Graph Matching Problem
-        (GMP). This is accomplished through trivial negation of the objective function.
+        (GMP). This is accomplished through a negation of the objective function.
 
     padding : string (default = 'adopted')
         Allows user to specify padding scheme if `A` and `B` are not of equal size.
@@ -116,19 +119,14 @@ def __init__(
         else:
             msg = '"n_init" must be a positive integer'
             raise TypeError(msg)
-        if type(n_init) is int and n_init > 0:
-            self.n_init = n_init
-        else:
-            msg = '"n_init" must be a positive integer'
-            raise TypeError(msg)
-        if init == "rand":
+        if isinstance(init, np.ndarray):
+            self.init = init
+        elif init == "rand":
             self.init = "randomized"
         elif init == "barycenter":
             self.init = "barycenter"
-        elif not isinstance(init, str):
-            self.init = init
         else:
-            msg = 'Invalid "init_method" parameter string'
+            msg = 'Invalid "init" parameter string'
             raise ValueError(msg)
         if max_iter > 0 and type(max_iter) is int:
             self.max_iter = max_iter

diff --git a/graspologic/match/qap.py b/graspologic/match/qap.py
@@ -3,12 +3,11 @@
 # original code can be found here
 # https://github.com/scipy/scipy/blob/master/scipy/optimize/_qap.py
 
-import numpy as np
 import operator
-from scipy.optimize import linear_sum_assignment, OptimizeResult
 
+import numpy as np
 from scipy._lib._util import check_random_state
-import itertools
+from scipy.optimize import OptimizeResult, linear_sum_assignment
 
 
 def quadratic_assignment(A, B, method="faq", options=None):
@@ -162,7 +161,7 @@ def quadratic_assignment(A, B, method="faq", options=None):
         options = {}
 
     method = method.lower()
-    methods = {"faq": _quadratic_assignment_faq, "2opt": _quadratic_assignment_2opt}
+    methods = {"faq": _quadratic_assignment_faq}
     if method not in methods:
         raise ValueError(f"method {method} must be in {methods}.")
     res = methods[method](A, B, **options)
@@ -424,10 +423,15 @@ def _quadratic_assignment_faq(
             # Sinkhorn balancing
             K = _doubly_stochastic(K)
             P = J * 0.5 + K * 0.5
-    else:
+    elif isinstance(P0, np.ndarray):
         P0 = np.atleast_2d(P0)
         _check_init_input(P0, n_unseed)
-        P = P0
+        invert_inds = np.argsort(nonseed_B)
+        perm_nonseed_B = np.argsort(invert_inds)
+        P = P0[:, perm_nonseed_B]
+    else:
+        msg = "`init` must either be of type str or np.ndarray."
+        raise TypeError(msg)
 
     const_sum = A21 @ B21.T + A12.T @ B12
 
@@ -529,185 +533,3 @@ def _doubly_stochastic(P, tol=1e-3):
         P_eps = r[:, None] * P * c
 
     return P_eps
-
-
-def _quadratic_assignment_2opt(
-    A,
-    B,
-    maximize=False,
-    partial_match=None,
-    rng=None,
-    partial_guess=None,
-    **unknown_options,
-):
-    r"""
-    Solve the quadratic assignment problem (approximately).
-    This function solves the Quadratic Assignment Problem (QAP) and the
-    Graph Matching Problem (GMP) using the 2-opt algorithm [3]_.
-    Quadratic assignment solves problems of the following form:
-    .. math::
-        \min_P & \ {\ \text{trace}(A^T P B P^T)}\\
-        \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
-    where :math:`\mathcal{P}` is the set of all permutation matrices,
-    and :math:`A` and :math:`B` are square matrices.
-    Graph matching tries to *maximize* the same objective function.
-    This algorithm can be thought of as finding the alignment of the
-    nodes of two graphs that minimizes the number of induced edge
-    disagreements, or, in the case of weighted graphs, the sum of squared
-    edge weight differences.
-    Note that the quadratic assignment problem is NP-hard, is not
-    known to be solvable in polynomial time, and is computationally
-    intractable. Therefore, the results given are approximations,
-    not guaranteed to be exact solutions.
-    Parameters
-    ----------
-    A : 2d-array, square
-        The square matrix :math:`A` in the objective function above.
-    B : 2d-array, square
-        The square matrix :math:`B` in the objective function above.
-    method :  str in {'faq', '2opt'} (default: 'faq')
-        The algorithm used to solve the problem. This is the method-specific
-        documentation for '2opt'.
-        :ref:`'faq' <optimize.qap-faq>` is also available.
-    Options
-    -------
-    maximize : bool (default = False)
-        Setting `maximize` to ``True`` solves the Graph Matching Problem (GMP)
-        rather than the Quadratic Assingnment Problem (QAP).
-    rng : {None, int, `~np.random.RandomState`, `~np.random.Generator`}
-        This parameter defines the object to use for drawing random
-        variates.
-        If `rng` is ``None`` the `~np.random.RandomState` singleton is
-        used.
-        If `rng` is an int, a new ``RandomState`` instance is used,
-        seeded with `rng`.
-        If `rng` is already a ``RandomState`` or ``Generator``
-        instance, then that object is used.
-        Default is None.
-    partial_match : 2d-array of integers, optional, (default = None)
-        Allows the user to fix part of the matching between the two
-        matrices. In the literature, a partial match is also known as a
-        "seed".
-        Each row of `partial_match` specifies the indices of a pair of
-        corresponding nodes, that is, node ``partial_match[i, 0]`` of `A` is
-        matched to node ``partial_match[i, 1]`` of `B`. Accordingly,
-        ``partial_match`` is an array of size ``(m , 2)``, where ``m`` is
-        not greater than the number of nodes.
-    partial_guess : 2d-array of integers, optional, (default = None)
-        Allows the user to provide a guess for the matching between the two
-        matrices. Unlike `partial_match`, `partial_guess` does not fix the
-        indices; they are still free to be optimized.
-        Each row of `partial_guess` specifies the indices of a pair of
-        corresponding nodes, that is, node ``partial_guess[i, 0]`` of `A` is
-        matched to node ``partial_guess[i, 1]`` of `B`. Accordingly,
-        ``partial_guess`` is an array of size ``(m , 2)``, where ``m`` is
-        less than or equal to the number of nodes.
-    Returns
-    -------
-    res : OptimizeResult
-        A :class:`scipy.optimize.OptimizeResult` containing the following
-        fields.
-        col_ind : 1-D array
-            An array of column indices corresponding with the best
-            permutation of the nodes of `B` found.
-        fun : float
-            The corresponding value of the objective function.
-        nit : int
-            The number of iterations performed during optimization.
-    Notes
-    -----
-    This is a greedy algorithm that works similarly to bubble sort: beginning
-    with an initial permutation, it iteratively swaps pairs of indices to
-    improve the objective function until no such improvements are possible.
-    References
-    ----------
-    .. [3] "2-opt," Wikipedia.
-           https://en.wikipedia.org/wiki/2-opt
-    """
-
-    _check_unknown_options(unknown_options)
-    rng = check_random_state(rng)
-    A, B, partial_match = _common_input_validation(A, B, partial_match)
-
-    N = len(A)
-    # check outlier cases
-    if N == 0 or partial_match.shape[0] == N:
-        score = _calc_score(A, B, partial_match[:, 1])
-        res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
-        return OptimizeResult(res)
-
-    if partial_guess is None:
-        partial_guess = np.array([[], []]).T
-    partial_guess = np.atleast_2d(partial_guess).astype(int)
-
-    msg = None
-    if partial_guess.shape[0] > A.shape[0]:
-        msg = "`partial_guess` can have only as " "many entries as there are nodes"
-    elif partial_guess.shape[1] != 2:
-        msg = "`partial_guess` must have two columns"
-    elif partial_guess.ndim != 2:
-        msg = "`partial_guess` must have exactly two dimensions"
-    elif (partial_guess < 0).any():
-        msg = "`partial_guess` must contain only positive indices"
-    elif (partial_guess >= len(A)).any():
-        msg = "`partial_guess` entries must be less than number of nodes"
-    elif not len(set(partial_guess[:, 0])) == len(partial_guess[:, 0]) or not len(
-        set(partial_guess[:, 1])
-    ) == len(partial_guess[:, 1]):
-        msg = "`partial_guess` column entries must be unique"
-    if msg is not None:
-        raise ValueError(msg)
-
-    fixed_rows = None
-    if partial_match.size or partial_guess.size:
-        # use partial_match and partial_guess for initial permutation,
-        # but randomly permute the rest.
-        guess_rows = np.zeros(N, dtype=bool)
-        guess_cols = np.zeros(N, dtype=bool)
-        fixed_rows = np.zeros(N, dtype=bool)
-        fixed_cols = np.zeros(N, dtype=bool)
-        perm = np.zeros(N, dtype=int)
-
-        rg, cg = partial_guess.T
-        guess_rows[rg] = True
-        guess_cols[cg] = True
-        perm[guess_rows] = cg
-
-        # match overrides guess
-        rf, cf = partial_match.T
-        fixed_rows[rf] = True
-        fixed_cols[cf] = True
-        perm[fixed_rows] = cf
-
-        random_rows = ~fixed_rows & ~guess_rows
-        random_cols = ~fixed_cols & ~guess_cols
-        perm[random_rows] = rng.permutation(np.arange(N)[random_cols])
-    else:
-        perm = rng.permutation(np.arange(N))
-
-    best_score = _calc_score(A, B, perm)
-
-    i_free = np.arange(N)
-    if fixed_rows is not None:
-        i_free = i_free[~fixed_rows]
-
-    better = operator.gt if maximize else operator.lt
-    n_iter = 0
-    done = False
-    while not done:
-        # equivalent to nested for loops i in range(N), j in range(i, N)
-        for i, j in itertools.combinations_with_replacement(i_free, 2):
-            n_iter += 1
-            perm[i], perm[j] = perm[j], perm[i]
-            score = _calc_score(A, B, perm)
-            if better(score, best_score):
-                best_score = score
-                break
-            # faster to swap back than to create a new list every time
-            perm[i], perm[j] = perm[j], perm[i]
-        else:  # no swaps made
-            done = True
-
-    res = {"col_ind": perm, "fun": best_score, "nit": n_iter}
-
-    return OptimizeResult(res)
diff --git a/tests/test_match.py b/tests/test_match.py
@@ -140,3 +140,43 @@ def test_padding(self):
         res = gmp_adopted.fit(G1, G2)
 
         assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)
+
+    def test_custom_init(self):
+        A, B = self._get_AB()
+        n = len(A)
+        pi = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - [1] * n
+        custom_init = np.eye(n)
+        custom_init = custom_init[pi]
+
+        gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False)
+        gm.fit(A, B)
+
+        assert (gm.perm_inds_ == pi).all()
+        assert gm.score_ == 11156
+        # we had thought about doing the test
+        # `assert gm.n_iter_ == 1`
+        # but note that GM doesn't necessarily converge in 1 iteration here
+        # this is because when we start from the optimal permutation matrix, we do
+        # not start from the optimal over our convex relaxation (the doubly stochastics)
+        # but we do indeed recover the correct permutation after a small number of
+        # iterations
+
+    def test_custom_init_seeds(self):
+        A, B = self._get_AB()
+        n = len(A)
+        pi_original = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1
+        pi = np.array([5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1
+
+        pi[pi > 6] -= 1
+
+        # use seed 0 in A to 7 in B
+        seeds_A = [0]
+        seeds_B = [6]
+        custom_init = np.eye(n - 1)
+        custom_init = custom_init[pi]
+
+        gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False)
+        gm.fit(A, B, seeds_A=seeds_A, seeds_B=seeds_B)
+
+        assert (gm.perm_inds_ == pi_original).all()
+        assert gm.score_ == 11156