Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix custom initialization handling for GraphMatch #737

Merged
merged 18 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 20 additions & 22 deletions graspologic/match/gmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@
# Licensed under the MIT License.

import numpy as np
import math
from scipy.optimize import linear_sum_assignment
from scipy.optimize import minimize_scalar
from sklearn.base import BaseEstimator
from sklearn.utils import check_array
from sklearn.utils import column_or_1d
from sklearn.utils import check_array, column_or_1d

from .qap import quadratic_assignment


Expand All @@ -32,17 +29,23 @@ class GraphMatch(BaseEstimator):
the FAQ algorithm will undergo.

init : string (default = 'barycenter') or 2d-array
The initial position chosen

If 2d-array, `init` must be :math:`m' x m'`, where :math:`m' = n - m`,
and it must be doubly stochastic: each of its rows and columns must
sum to 1.
The initial position chosen.

"barycenter" : the non-informative “flat doubly stochastic matrix,”
:math:`J=1 \\times 1^T /n` , i.e the barycenter of the feasible region
:math:`J=1 \\times 1^T /n` , i.e the barycenter of the feasible region. This can
be thought of as the doubly stochastic matrix from which any permutation is
equally likely.

"rand" : some random point near :math:`J, (J+K)/2`, where K is some random
doubly stochastic matrix
doubly stochastic matrix.

If 2d-array, ``init`` must be :math:`m' x m'`, where :math:`m'` is the number of
nonseeded vertices of ``B``. This initial position repesents a permutation or
assignment of the nonseeded vertices of ``B``, and thus must be doubly
stochastic (all of its rows and columns must sum to 1). Note that if using
seeds, this permutation/assignment is taken with respect to the nonseeded
vertices in the order in which they were input, even when
``shuffle_input = True``.

max_iter : int, positive (default = 30)
Integer specifying the max number of Franke-Wolfe iterations.
Expand All @@ -58,7 +61,7 @@ class GraphMatch(BaseEstimator):

gmp : bool (default = True)
Gives users the option to solve QAP rather than the Graph Matching Problem
(GMP). This is accomplished through trivial negation of the objective function.
(GMP). This is accomplished through a negation of the objective function.

padding : string (default = 'adopted')
Allows user to specify padding scheme if `A` and `B` are not of equal size.
Expand Down Expand Up @@ -116,19 +119,14 @@ def __init__(
else:
msg = '"n_init" must be a positive integer'
raise TypeError(msg)
if type(n_init) is int and n_init > 0:
self.n_init = n_init
else:
msg = '"n_init" must be a positive integer'
raise TypeError(msg)
if init == "rand":
if isinstance(init, np.ndarray):
self.init = init
elif init == "rand":
self.init = "randomized"
elif init == "barycenter":
self.init = "barycenter"
elif not isinstance(init, str):
self.init = init
else:
msg = 'Invalid "init_method" parameter string'
msg = 'Invalid "init" parameter string'
raise ValueError(msg)
if max_iter > 0 and type(max_iter) is int:
self.max_iter = max_iter
Expand Down
198 changes: 10 additions & 188 deletions graspologic/match/qap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
# original code can be found here
# https://github.com/scipy/scipy/blob/master/scipy/optimize/_qap.py

import numpy as np
import operator
from scipy.optimize import linear_sum_assignment, OptimizeResult

import numpy as np
from scipy._lib._util import check_random_state
import itertools
from scipy.optimize import OptimizeResult, linear_sum_assignment


def quadratic_assignment(A, B, method="faq", options=None):
Expand Down Expand Up @@ -162,7 +161,7 @@ def quadratic_assignment(A, B, method="faq", options=None):
options = {}

method = method.lower()
methods = {"faq": _quadratic_assignment_faq, "2opt": _quadratic_assignment_2opt}
methods = {"faq": _quadratic_assignment_faq}
if method not in methods:
raise ValueError(f"method {method} must be in {methods}.")
res = methods[method](A, B, **options)
Expand Down Expand Up @@ -424,10 +423,15 @@ def _quadratic_assignment_faq(
# Sinkhorn balancing
K = _doubly_stochastic(K)
P = J * 0.5 + K * 0.5
else:
elif isinstance(P0, np.ndarray):
P0 = np.atleast_2d(P0)
_check_init_input(P0, n_unseed)
P = P0
invert_inds = np.argsort(nonseed_B)
perm_nonseed_B = np.argsort(invert_inds)
P = P0[:, perm_nonseed_B]
else:
msg = "`init` must either be of type str or np.ndarray."
raise TypeError(msg)

const_sum = A21 @ B21.T + A12.T @ B12

Expand Down Expand Up @@ -529,185 +533,3 @@ def _doubly_stochastic(P, tol=1e-3):
P_eps = r[:, None] * P * c

return P_eps


def _quadratic_assignment_2opt(
A,
B,
maximize=False,
partial_match=None,
rng=None,
partial_guess=None,
**unknown_options,
):
r"""
Solve the quadratic assignment problem (approximately).
This function solves the Quadratic Assignment Problem (QAP) and the
Graph Matching Problem (GMP) using the 2-opt algorithm [3]_.
Quadratic assignment solves problems of the following form:
.. math::
\min_P & \ {\ \text{trace}(A^T P B P^T)}\\
\mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
where :math:`\mathcal{P}` is the set of all permutation matrices,
and :math:`A` and :math:`B` are square matrices.
Graph matching tries to *maximize* the same objective function.
This algorithm can be thought of as finding the alignment of the
nodes of two graphs that minimizes the number of induced edge
disagreements, or, in the case of weighted graphs, the sum of squared
edge weight differences.
Note that the quadratic assignment problem is NP-hard, is not
known to be solvable in polynomial time, and is computationally
intractable. Therefore, the results given are approximations,
not guaranteed to be exact solutions.
Parameters
----------
A : 2d-array, square
The square matrix :math:`A` in the objective function above.
B : 2d-array, square
The square matrix :math:`B` in the objective function above.
method : str in {'faq', '2opt'} (default: 'faq')
The algorithm used to solve the problem. This is the method-specific
documentation for '2opt'.
:ref:`'faq' <optimize.qap-faq>` is also available.
Options
-------
maximize : bool (default = False)
Setting `maximize` to ``True`` solves the Graph Matching Problem (GMP)
rather than the Quadratic Assingnment Problem (QAP).
rng : {None, int, `~np.random.RandomState`, `~np.random.Generator`}
This parameter defines the object to use for drawing random
variates.
If `rng` is ``None`` the `~np.random.RandomState` singleton is
used.
If `rng` is an int, a new ``RandomState`` instance is used,
seeded with `rng`.
If `rng` is already a ``RandomState`` or ``Generator``
instance, then that object is used.
Default is None.
partial_match : 2d-array of integers, optional, (default = None)
Allows the user to fix part of the matching between the two
matrices. In the literature, a partial match is also known as a
"seed".
Each row of `partial_match` specifies the indices of a pair of
corresponding nodes, that is, node ``partial_match[i, 0]`` of `A` is
matched to node ``partial_match[i, 1]`` of `B`. Accordingly,
``partial_match`` is an array of size ``(m , 2)``, where ``m`` is
not greater than the number of nodes.
partial_guess : 2d-array of integers, optional, (default = None)
Allows the user to provide a guess for the matching between the two
matrices. Unlike `partial_match`, `partial_guess` does not fix the
indices; they are still free to be optimized.
Each row of `partial_guess` specifies the indices of a pair of
corresponding nodes, that is, node ``partial_guess[i, 0]`` of `A` is
matched to node ``partial_guess[i, 1]`` of `B`. Accordingly,
``partial_guess`` is an array of size ``(m , 2)``, where ``m`` is
less than or equal to the number of nodes.
Returns
-------
res : OptimizeResult
A :class:`scipy.optimize.OptimizeResult` containing the following
fields.
col_ind : 1-D array
An array of column indices corresponding with the best
permutation of the nodes of `B` found.
fun : float
The corresponding value of the objective function.
nit : int
The number of iterations performed during optimization.
Notes
-----
This is a greedy algorithm that works similarly to bubble sort: beginning
with an initial permutation, it iteratively swaps pairs of indices to
improve the objective function until no such improvements are possible.
References
----------
.. [3] "2-opt," Wikipedia.
https://en.wikipedia.org/wiki/2-opt
"""

_check_unknown_options(unknown_options)
rng = check_random_state(rng)
A, B, partial_match = _common_input_validation(A, B, partial_match)

N = len(A)
# check outlier cases
if N == 0 or partial_match.shape[0] == N:
score = _calc_score(A, B, partial_match[:, 1])
res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
return OptimizeResult(res)

if partial_guess is None:
partial_guess = np.array([[], []]).T
partial_guess = np.atleast_2d(partial_guess).astype(int)

msg = None
if partial_guess.shape[0] > A.shape[0]:
msg = "`partial_guess` can have only as " "many entries as there are nodes"
elif partial_guess.shape[1] != 2:
msg = "`partial_guess` must have two columns"
elif partial_guess.ndim != 2:
msg = "`partial_guess` must have exactly two dimensions"
elif (partial_guess < 0).any():
msg = "`partial_guess` must contain only positive indices"
elif (partial_guess >= len(A)).any():
msg = "`partial_guess` entries must be less than number of nodes"
elif not len(set(partial_guess[:, 0])) == len(partial_guess[:, 0]) or not len(
set(partial_guess[:, 1])
) == len(partial_guess[:, 1]):
msg = "`partial_guess` column entries must be unique"
if msg is not None:
raise ValueError(msg)

fixed_rows = None
if partial_match.size or partial_guess.size:
# use partial_match and partial_guess for initial permutation,
# but randomly permute the rest.
guess_rows = np.zeros(N, dtype=bool)
guess_cols = np.zeros(N, dtype=bool)
fixed_rows = np.zeros(N, dtype=bool)
fixed_cols = np.zeros(N, dtype=bool)
perm = np.zeros(N, dtype=int)

rg, cg = partial_guess.T
guess_rows[rg] = True
guess_cols[cg] = True
perm[guess_rows] = cg

# match overrides guess
rf, cf = partial_match.T
fixed_rows[rf] = True
fixed_cols[cf] = True
perm[fixed_rows] = cf

random_rows = ~fixed_rows & ~guess_rows
random_cols = ~fixed_cols & ~guess_cols
perm[random_rows] = rng.permutation(np.arange(N)[random_cols])
else:
perm = rng.permutation(np.arange(N))

best_score = _calc_score(A, B, perm)

i_free = np.arange(N)
if fixed_rows is not None:
i_free = i_free[~fixed_rows]

better = operator.gt if maximize else operator.lt
n_iter = 0
done = False
while not done:
# equivalent to nested for loops i in range(N), j in range(i, N)
for i, j in itertools.combinations_with_replacement(i_free, 2):
n_iter += 1
perm[i], perm[j] = perm[j], perm[i]
score = _calc_score(A, B, perm)
if better(score, best_score):
best_score = score
break
# faster to swap back than to create a new list every time
perm[i], perm[j] = perm[j], perm[i]
else: # no swaps made
done = True

res = {"col_ind": perm, "fun": best_score, "nit": n_iter}

return OptimizeResult(res)
40 changes: 40 additions & 0 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,43 @@ def test_padding(self):
res = gmp_adopted.fit(G1, G2)

assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)

def test_custom_init(self):
A, B = self._get_AB()
n = len(A)
pi = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - [1] * n
custom_init = np.eye(n)
custom_init = custom_init[pi]

gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False)
gm.fit(A, B)

assert (gm.perm_inds_ == pi).all()
assert gm.score_ == 11156
# we had thought about doing the test
# `assert gm.n_iter_ == 1`
# but note that GM doesn't necessarily converge in 1 iteration here
# this is because when we start from the optimal permutation matrix, we do
# not start from the optimal over our convex relaxation (the doubly stochastics)
# but we do indeed recover the correct permutation after a small number of
# iterations

def test_custom_init_seeds(self):
A, B = self._get_AB()
n = len(A)
pi_original = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1
pi = np.array([5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1

pi[pi > 6] -= 1

# use seed 0 in A to 7 in B
seeds_A = [0]
seeds_B = [6]
custom_init = np.eye(n - 1)
custom_init = custom_init[pi]

gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False)
gm.fit(A, B, seeds_A=seeds_A, seeds_B=seeds_B)

assert (gm.perm_inds_ == pi_original).all()
assert gm.score_ == 11156