dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)

Deps like these: || ( foo bar ) || ( bar baz ) Translate to disjunctive normal form (DNF): || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) ) Using DNF, if none of the packages are currently installed, then the ( bar bar ) choice will be automatically preferred since it is satisfied by the fewest number of packages. If the ( foo baz ) choice is already satisfied, then that choice will be preferred instead. Since DNF results in exponential explosion of the formula, only use DNF for the parts of the dependencies that have overlapping atoms. In order to simplify the implementation of the dnf_convert function, this patch also fixes _expand_new_virtuals to normalize results in the same way as use_reduce (with no redundant nested lists). Bug: https://bugs.gentoo.org/632026 Reviewed-by: Manuel Rüger <mrueg@gentoo.org> Reviewed-by: Alec Warner <antarus@gentoo.org>
gentoo · Nov 14, 2017 · 9fdaf9b · 9fdaf9b
1 parent 0936d87
commit 9fdaf9b
Show file tree

Hide file tree

Showing 5 changed files with 440 additions and 7 deletions.
diff --git a/pym/portage/dep/_dnf.py b/pym/portage/dep/_dnf.py
@@ -0,0 +1,90 @@
+# Copyright 2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from __future__ import unicode_literals
+
+import itertools
+
+
+def dnf_convert(dep_struct):
+	"""
+	Convert dep_struct to disjunctive normal form (DNF), where dep_struct
+	is either a conjunction or disjunction of the form produced by
+	use_reduce(opconvert=True).
+	"""
+	# Normalize input to have a top-level conjunction.
+	if isinstance(dep_struct, list):
+		if dep_struct and dep_struct[0] == '||':
+			dep_struct = [dep_struct]
+	else:
+		dep_struct = [dep_struct]
+
+	conjunction = []
+	disjunctions = []
+
+	for x in dep_struct:
+		if isinstance (x, list):
+			assert x and x[0] == '||', \
+				'Normalization error, nested conjunction found in %s' % (dep_struct,)
+			if any(isinstance(element, list) for element in x):
+				x_dnf = ['||']
+				for element in x[1:]:
+					if isinstance(element, list):
+						# Due to normalization, a disjunction must not be
+						# nested directly in another disjunction, so this
+						# must be a conjunction.
+						assert element, 'Normalization error, empty conjunction found in %s' % (x,)
+						assert element[0] != '||', \
+							'Normalization error, nested disjunction found in %s' % (x,)
+						element = dnf_convert(element)
+						if contains_disjunction(element):
+							assert (len(element) == 1 and
+								element[0] and element[0][0] == '||'), \
+								'Normalization error, expected single disjunction in %s' % (element,)
+							x_dnf.extend(element[0][1:])
+						else:
+							x_dnf.append(element)
+					else:
+						x_dnf.append(element)
+				x = x_dnf
+			disjunctions.append(x)
+		else:
+			conjunction.append(x)
+
+	if disjunctions and (conjunction or len(disjunctions) > 1):
+		dnf_form = ['||']
+		for x in itertools.product(*[x[1:] for x in disjunctions]):
+			normalized = conjunction[:]
+			for element in x:
+				if isinstance(element, list):
+					normalized.extend(element)
+				else:
+					normalized.append(element)
+			dnf_form.append(normalized)
+		result = [dnf_form]
+	else:
+		result = conjunction + disjunctions
+
+	return result
+
+
+def contains_disjunction(dep_struct):
+	"""
+	Search for a disjunction contained in dep_struct, where dep_struct
+	is either a conjunction or disjunction of the form produced by
+	use_reduce(opconvert=True). If dep_struct is a disjunction, then
+	this only returns True if there is a nested disjunction. Due to
+	normalization, recursion is only needed when dep_struct is a
+	disjunction containing a conjunction. If dep_struct is a conjunction,
+	then it is assumed that normalization has elevated any nested
+	disjunctions to the top-level.
+	"""
+	is_disjunction = dep_struct and dep_struct[0] == '||'
+	for x in dep_struct:
+		if isinstance(x, list):
+			assert x, 'Normalization error, empty conjunction found in %s' % (dep_struct,)
+			if x[0] == '||':
+				return True
+			elif is_disjunction and contains_disjunction(x):
+				return True
+	return False
diff --git a/pym/portage/dep/dep_check.py b/pym/portage/dep/dep_check.py
@@ -6,14 +6,20 @@
 __all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']
 
 import collections
+import itertools
 import logging
 import operator
 
 import portage
 from portage.dep import Atom, match_from_list, use_reduce
+from portage.dep._dnf import (
+	dnf_convert as _dnf_convert,
+	contains_disjunction as _contains_disjunction,
+)
 from portage.exception import InvalidDependString, ParseError
 from portage.localization import _
 from portage.util import writemsg, writemsg_level
+from portage.util.digraph import digraph
 from portage.util.SlotObject import SlotObject
 from portage.versions import vercmp, _pkg_str
 
@@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
 	atom because it wouldn't necessarily make sense to block all the components
 	of a compound virtual.  When more than one new-style virtual is matched,
 	the matches are sorted from highest to lowest versions and the atom is
-	expanded to || ( highest match ... lowest match )."""
+	expanded to || ( highest match ... lowest match ).
+
+	The result is normalized in the same way as use_reduce, having a top-level
+	conjuction, and no redundant nested lists.
+	"""
 	newsplit = []
 	mytrees = trees[myroot]
 	portdb = mytrees["porttree"].dbapi
@@ -54,14 +64,38 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
 		portdb = trees[myroot]["bintree"].dbapi
 	pprovideddict = mysettings.pprovideddict
 	myuse = kwargs["myuse"]
+	is_disjunction = mysplit and mysplit[0] == '||'
 	for x in mysplit:
 		if x == "||":
 			newsplit.append(x)
 			continue
 		elif isinstance(x, list):
-			newsplit.append(_expand_new_virtuals(x, edebug, mydbapi,
+			assert x, 'Normalization error, empty conjunction found in %s' % (mysplit,)
+			if is_disjunction:
+				assert x[0] != '||', \
+					'Normalization error, nested disjunction found in %s' % (mysplit,)
+			else:
+				assert x[0] == '||', \
+					'Normalization error, nested conjunction found in %s' % (mysplit,)
+			x_exp = _expand_new_virtuals(x, edebug, mydbapi,
 				mysettings, myroot=myroot, trees=trees, use_mask=use_mask,
-				use_force=use_force, **kwargs))
+				use_force=use_force, **kwargs)
+			if is_disjunction:
+				if len(x_exp) == 1:
+					x = x_exp[0]
+					if isinstance(x, list):
+						# Due to normalization, a conjunction must not be
+						# nested directly in another conjunction, so this
+						# must be a disjunction.
+						assert x and x[0] == '||', \
+							'Normalization error, nested conjunction found in %s' % (x_exp,)
+						newsplit.extend(x[1:])
+					else:
+						newsplit.append(x)
+				else:
+					newsplit.append(x_exp)
+			else:
+				newsplit.extend(x_exp)
 			continue
 
 		if not isinstance(x, Atom):
@@ -101,6 +135,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
 					a.append(Atom(x.replace(x.cp, y.cp, 1)))
 				if not a:
 					newsplit.append(x)
+				elif is_disjunction:
+					newsplit.extend(a)
 				elif len(a) == 1:
 					newsplit.append(a[0])
 				else:
@@ -218,11 +254,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
 			newsplit.append(x)
 			if atom_graph is not None:
 				atom_graph.add((x, id(x)), graph_parent)
+		elif is_disjunction:
+			newsplit.extend(a)
 		elif len(a) == 1:
-			newsplit.append(a[0])
+			newsplit.extend(a[0])
 		else:
 			newsplit.append(['||'] + a)
 
+	# For consistency with related functions like use_reduce, always
+	# normalize the result to have a top-level conjunction.
+	if is_disjunction:
+		newsplit = [newsplit]
+
 	return newsplit
 
 def dep_eval(deplist):
@@ -612,9 +655,9 @@ def mydbapi_match_pkgs(atom):
 	for choices in choice_bins:
 		if len(choices) < 2:
 			continue
-		# Prefer choices with all_installed_slots for bug #480736.
-		choices.sort(key=operator.attrgetter('all_installed_slots'),
-			reverse=True)
+		# Prefer choices with all_installed_slots for bug #480736, and
+		# choices with a smaller number of packages for bug #632026.
+		choices.sort(key=lambda x: (not x.all_installed_slots, len(x.slot_map)))
 		for choice_1 in choices[1:]:
 			cps = set(choice_1.cp_map)
 			for choice_2 in choices:
@@ -741,6 +784,9 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
 	except ParseError as e:
 		return [0, "%s" % (e,)]
 
+	if mysettings.local_config: # if not repoman
+		mysplit = _overlap_dnf(mysplit)
+
 	mysplit2 = dep_wordreduce(mysplit,
 		mysettings, mydbapi, mode, use_cache=use_cache)
 	if mysplit2 is None:
@@ -755,6 +801,82 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
 
 	return [1, selected_atoms]
 
+
+def _overlap_dnf(dep_struct):
+	"""
+	Combine overlapping || groups using disjunctive normal form (DNF), in
+	order to minimize the number of packages chosen to satisfy cases like
+	"|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
+	groups are excluded from the conversion, since DNF leads to exponential
+	explosion of the formula.
+	"""
+	if not _contains_disjunction(dep_struct):
+		return dep_struct
+
+	# map atom.cp to disjunctions
+	cp_map = collections.defaultdict(list)
+	# graph atom.cp, with edges connecting atoms in the same disjunction
+	overlap_graph = digraph()
+	# map id(disjunction) to index in dep_struct, for deterministic output
+	order_map = {}
+	order_key = lambda x: order_map[id(x)]
+	result = []
+	for i, x in enumerate(dep_struct):
+		if isinstance(x, list):
+			assert x and x[0] == '||', \
+				'Normalization error, nested conjunction found in %s' % (dep_struct,)
+			order_map[id(x)] = i
+			prev_cp = None
+			for atom in _iter_flatten(x):
+				if isinstance(atom, Atom) and not atom.blocker:
+					cp_map[atom.cp].append(x)
+					overlap_graph.add(atom.cp, parent=prev_cp)
+					prev_cp = atom.cp
+			if prev_cp is None: # only contains blockers
+				result.append(x)
+		else:
+			result.append(x)
+
+	# group together disjunctions having atom.cp overlap
+	traversed = set()
+	for cp in overlap_graph:
+		if cp in traversed:
+			continue
+		disjunctions = {}
+		stack = [cp]
+		while stack:
+			cp = stack.pop()
+			traversed.add(cp)
+			for x in cp_map[cp]:
+				disjunctions[id(x)] = x
+			for other_cp in itertools.chain(overlap_graph.child_nodes(cp),
+				overlap_graph.parent_nodes(cp)):
+				if other_cp not in traversed:
+					stack.append(other_cp)
+
+		if len(disjunctions) > 1:
+			# convert overlapping disjunctions to DNF
+			result.extend(_dnf_convert(
+				sorted(disjunctions.values(), key=order_key)))
+		else:
+			# pass through non-overlapping disjunctions
+			result.append(disjunctions.popitem()[1])
+
+	return result
+
+
+def _iter_flatten(dep_struct):
+	"""
+	Yield nested elements of dep_struct.
+	"""
+	for x in dep_struct:
+		if isinstance(x, list):
+			for x in _iter_flatten(x):
+				yield x
+		else:
+			yield x
+
+
 def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
 	"Reduces the deplist to ones and zeros"
 	deplist=mydeplist[:]

diff --git a/pym/portage/tests/dep/test_dnf_convert.py b/pym/portage/tests/dep/test_dnf_convert.py
@@ -0,0 +1,48 @@
+# Copyright 2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from portage.tests import TestCase
+from portage.dep import use_reduce
+from portage.dep._dnf import dnf_convert
+
+class DNFConvertTestCase(TestCase):
+
+	def testDNFConvert(self):
+
+		test_cases = (
+			(
+				'|| ( A B ) || ( C D )',
+				[['||', ['A', 'C'], ['A', 'D'], ['B', 'C'], ['B', 'D']]],
+			),
+			(
+				'|| ( A B ) || ( B C )',
+				[['||', ['A', 'B'], ['A', 'C'], ['B', 'B'], ['B', 'C']]],
+			),
+			(
+				'|| ( A ( B C D ) )',
+				[['||', 'A', ['B', 'C', 'D']]],
+			),
+			(
+				'|| ( A ( B C D ) ) E',
+				[['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
+			),
+			(
+				'|| ( A ( B C ) ) || ( D E ) F',
+				[['||', ['F', 'A', 'D'], ['F', 'A', 'E'], ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
+			),
+			(
+				'|| ( A ( B C || ( D E ) ) ( F G ) H )',
+				[['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], ['F', 'G'], 'H']],
+			),
+			(
+				'|| ( A ( B C || ( D E ) ) F )',
+				[['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], 'F']],
+			),
+			(
+				'|| ( A ( C || ( D E ) || ( F G ) ) H )',
+				[['||', 'A', ['C', 'D', 'F'], ['C', 'D', 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
+			),
+		)
+
+		for dep_str, result in test_cases:
+			self.assertEqual(dnf_convert(use_reduce(dep_str, opconvert=True)), result)
diff --git a/pym/portage/tests/dep/test_overlap_dnf.py b/pym/portage/tests/dep/test_overlap_dnf.py
@@ -0,0 +1,28 @@
+# Copyright 2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from portage.tests import TestCase
+from portage.dep import Atom, use_reduce
+from portage.dep.dep_check import _overlap_dnf
+
+class OverlapDNFTestCase(TestCase):
+
+	def testOverlapDNF(self):
+
+		test_cases = (
+			(
+				'|| ( cat/A cat/B ) cat/E || ( cat/C cat/D )',
+				['cat/E', ['||', 'cat/A', 'cat/B'], ['||', 'cat/C', 'cat/D']],
+			),
+			(
+				'|| ( cat/A cat/B ) cat/D || ( cat/B cat/C )',
+				['cat/D', ['||', ['cat/A', 'cat/B'], ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
+			),
+			(
+				'|| ( cat/A cat/B ) || ( cat/C cat/D )  || ( ( cat/B cat/E ) cat/F )',
+				[['||', ['cat/A', 'cat/B', 'cat/E'], ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']], ['||', 'cat/C', 'cat/D']],
+			),
+		)
+
+		for dep_str, result in test_cases:
+			self.assertEqual(_overlap_dnf(use_reduce(dep_str, token_class=Atom, opconvert=True)), result)