Skip to content

Commit

Permalink
dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)
Browse files Browse the repository at this point in the history
Deps like these:

  || ( foo bar ) || ( bar baz )

Translate to disjunctive normal form (DNF):

  || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) )

Using DNF, if none of the packages are currently installed,
then the ( bar bar ) choice will be automatically preferred
since it is satisfied by the fewest number of packages.
If the ( foo baz ) choice is already satisfied, then that
choice will be preferred instead.

Since DNF results in exponential explosion of the formula,
only use DNF for the parts of the dependencies that have
overlapping atoms.

In order to simplify the implementation of the dnf_convert
function, this patch also fixes _expand_new_virtuals to
normalize results in the same way as use_reduce (with no
redundant nested lists).

Bug: https://bugs.gentoo.org/632026
Reviewed-by: Manuel Rüger <mrueg@gentoo.org>
Reviewed-by: Alec Warner <antarus@gentoo.org>
  • Loading branch information
zmedico committed Nov 14, 2017
1 parent 0936d87 commit 9fdaf9b
Show file tree
Hide file tree
Showing 5 changed files with 440 additions and 7 deletions.
90 changes: 90 additions & 0 deletions pym/portage/dep/_dnf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2017 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

from __future__ import unicode_literals

import itertools


def dnf_convert(dep_struct):
"""
Convert dep_struct to disjunctive normal form (DNF), where dep_struct
is either a conjunction or disjunction of the form produced by
use_reduce(opconvert=True).
"""
# Normalize input to have a top-level conjunction.
if isinstance(dep_struct, list):
if dep_struct and dep_struct[0] == '||':
dep_struct = [dep_struct]
else:
dep_struct = [dep_struct]

conjunction = []
disjunctions = []

for x in dep_struct:
if isinstance (x, list):
assert x and x[0] == '||', \
'Normalization error, nested conjunction found in %s' % (dep_struct,)
if any(isinstance(element, list) for element in x):
x_dnf = ['||']
for element in x[1:]:
if isinstance(element, list):
# Due to normalization, a disjunction must not be
# nested directly in another disjunction, so this
# must be a conjunction.
assert element, 'Normalization error, empty conjunction found in %s' % (x,)
assert element[0] != '||', \
'Normalization error, nested disjunction found in %s' % (x,)
element = dnf_convert(element)
if contains_disjunction(element):
assert (len(element) == 1 and
element[0] and element[0][0] == '||'), \
'Normalization error, expected single disjunction in %s' % (element,)
x_dnf.extend(element[0][1:])
else:
x_dnf.append(element)
else:
x_dnf.append(element)
x = x_dnf
disjunctions.append(x)
else:
conjunction.append(x)

if disjunctions and (conjunction or len(disjunctions) > 1):
dnf_form = ['||']
for x in itertools.product(*[x[1:] for x in disjunctions]):
normalized = conjunction[:]
for element in x:
if isinstance(element, list):
normalized.extend(element)
else:
normalized.append(element)
dnf_form.append(normalized)
result = [dnf_form]
else:
result = conjunction + disjunctions

return result


def contains_disjunction(dep_struct):
"""
Search for a disjunction contained in dep_struct, where dep_struct
is either a conjunction or disjunction of the form produced by
use_reduce(opconvert=True). If dep_struct is a disjunction, then
this only returns True if there is a nested disjunction. Due to
normalization, recursion is only needed when dep_struct is a
disjunction containing a conjunction. If dep_struct is a conjunction,
then it is assumed that normalization has elevated any nested
disjunctions to the top-level.
"""
is_disjunction = dep_struct and dep_struct[0] == '||'
for x in dep_struct:
if isinstance(x, list):
assert x, 'Normalization error, empty conjunction found in %s' % (dep_struct,)
if x[0] == '||':
return True
elif is_disjunction and contains_disjunction(x):
return True
return False
136 changes: 129 additions & 7 deletions pym/portage/dep/dep_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@
__all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']

import collections
import itertools
import logging
import operator

import portage
from portage.dep import Atom, match_from_list, use_reduce
from portage.dep._dnf import (
dnf_convert as _dnf_convert,
contains_disjunction as _contains_disjunction,
)
from portage.exception import InvalidDependString, ParseError
from portage.localization import _
from portage.util import writemsg, writemsg_level
from portage.util.digraph import digraph
from portage.util.SlotObject import SlotObject
from portage.versions import vercmp, _pkg_str

Expand All @@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
atom because it wouldn't necessarily make sense to block all the components
of a compound virtual. When more than one new-style virtual is matched,
the matches are sorted from highest to lowest versions and the atom is
expanded to || ( highest match ... lowest match )."""
expanded to || ( highest match ... lowest match ).
The result is normalized in the same way as use_reduce, having a top-level
conjuction, and no redundant nested lists.
"""
newsplit = []
mytrees = trees[myroot]
portdb = mytrees["porttree"].dbapi
Expand All @@ -54,14 +64,38 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
portdb = trees[myroot]["bintree"].dbapi
pprovideddict = mysettings.pprovideddict
myuse = kwargs["myuse"]
is_disjunction = mysplit and mysplit[0] == '||'
for x in mysplit:
if x == "||":
newsplit.append(x)
continue
elif isinstance(x, list):
newsplit.append(_expand_new_virtuals(x, edebug, mydbapi,
assert x, 'Normalization error, empty conjunction found in %s' % (mysplit,)
if is_disjunction:
assert x[0] != '||', \
'Normalization error, nested disjunction found in %s' % (mysplit,)
else:
assert x[0] == '||', \
'Normalization error, nested conjunction found in %s' % (mysplit,)
x_exp = _expand_new_virtuals(x, edebug, mydbapi,
mysettings, myroot=myroot, trees=trees, use_mask=use_mask,
use_force=use_force, **kwargs))
use_force=use_force, **kwargs)
if is_disjunction:
if len(x_exp) == 1:
x = x_exp[0]
if isinstance(x, list):
# Due to normalization, a conjunction must not be
# nested directly in another conjunction, so this
# must be a disjunction.
assert x and x[0] == '||', \
'Normalization error, nested conjunction found in %s' % (x_exp,)
newsplit.extend(x[1:])
else:
newsplit.append(x)
else:
newsplit.append(x_exp)
else:
newsplit.extend(x_exp)
continue

if not isinstance(x, Atom):
Expand Down Expand Up @@ -101,6 +135,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
a.append(Atom(x.replace(x.cp, y.cp, 1)))
if not a:
newsplit.append(x)
elif is_disjunction:
newsplit.extend(a)
elif len(a) == 1:
newsplit.append(a[0])
else:
Expand Down Expand Up @@ -218,11 +254,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
newsplit.append(x)
if atom_graph is not None:
atom_graph.add((x, id(x)), graph_parent)
elif is_disjunction:
newsplit.extend(a)
elif len(a) == 1:
newsplit.append(a[0])
newsplit.extend(a[0])
else:
newsplit.append(['||'] + a)

# For consistency with related functions like use_reduce, always
# normalize the result to have a top-level conjunction.
if is_disjunction:
newsplit = [newsplit]

return newsplit

def dep_eval(deplist):
Expand Down Expand Up @@ -612,9 +655,9 @@ def mydbapi_match_pkgs(atom):
for choices in choice_bins:
if len(choices) < 2:
continue
# Prefer choices with all_installed_slots for bug #480736.
choices.sort(key=operator.attrgetter('all_installed_slots'),
reverse=True)
# Prefer choices with all_installed_slots for bug #480736, and
# choices with a smaller number of packages for bug #632026.
choices.sort(key=lambda x: (not x.all_installed_slots, len(x.slot_map)))
for choice_1 in choices[1:]:
cps = set(choice_1.cp_map)
for choice_2 in choices:
Expand Down Expand Up @@ -741,6 +784,9 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
except ParseError as e:
return [0, "%s" % (e,)]

if mysettings.local_config: # if not repoman
mysplit = _overlap_dnf(mysplit)

mysplit2 = dep_wordreduce(mysplit,
mysettings, mydbapi, mode, use_cache=use_cache)
if mysplit2 is None:
Expand All @@ -755,6 +801,82 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,

return [1, selected_atoms]


def _overlap_dnf(dep_struct):
"""
Combine overlapping || groups using disjunctive normal form (DNF), in
order to minimize the number of packages chosen to satisfy cases like
"|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
groups are excluded from the conversion, since DNF leads to exponential
explosion of the formula.
"""
if not _contains_disjunction(dep_struct):
return dep_struct

# map atom.cp to disjunctions
cp_map = collections.defaultdict(list)
# graph atom.cp, with edges connecting atoms in the same disjunction
overlap_graph = digraph()
# map id(disjunction) to index in dep_struct, for deterministic output
order_map = {}
order_key = lambda x: order_map[id(x)]
result = []
for i, x in enumerate(dep_struct):
if isinstance(x, list):
assert x and x[0] == '||', \
'Normalization error, nested conjunction found in %s' % (dep_struct,)
order_map[id(x)] = i
prev_cp = None
for atom in _iter_flatten(x):
if isinstance(atom, Atom) and not atom.blocker:
cp_map[atom.cp].append(x)
overlap_graph.add(atom.cp, parent=prev_cp)
prev_cp = atom.cp
if prev_cp is None: # only contains blockers
result.append(x)
else:
result.append(x)

# group together disjunctions having atom.cp overlap
traversed = set()
for cp in overlap_graph:
if cp in traversed:
continue
disjunctions = {}
stack = [cp]
while stack:
cp = stack.pop()
traversed.add(cp)
for x in cp_map[cp]:
disjunctions[id(x)] = x
for other_cp in itertools.chain(overlap_graph.child_nodes(cp),
overlap_graph.parent_nodes(cp)):
if other_cp not in traversed:
stack.append(other_cp)

if len(disjunctions) > 1:
# convert overlapping disjunctions to DNF
result.extend(_dnf_convert(
sorted(disjunctions.values(), key=order_key)))
else:
# pass through non-overlapping disjunctions
result.append(disjunctions.popitem()[1])

return result


def _iter_flatten(dep_struct):
"""
Yield nested elements of dep_struct.
"""
for x in dep_struct:
if isinstance(x, list):
for x in _iter_flatten(x):
yield x
else:
yield x


def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
"Reduces the deplist to ones and zeros"
deplist=mydeplist[:]
Expand Down
48 changes: 48 additions & 0 deletions pym/portage/tests/dep/test_dnf_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2017 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

from portage.tests import TestCase
from portage.dep import use_reduce
from portage.dep._dnf import dnf_convert

class DNFConvertTestCase(TestCase):

def testDNFConvert(self):

test_cases = (
(
'|| ( A B ) || ( C D )',
[['||', ['A', 'C'], ['A', 'D'], ['B', 'C'], ['B', 'D']]],
),
(
'|| ( A B ) || ( B C )',
[['||', ['A', 'B'], ['A', 'C'], ['B', 'B'], ['B', 'C']]],
),
(
'|| ( A ( B C D ) )',
[['||', 'A', ['B', 'C', 'D']]],
),
(
'|| ( A ( B C D ) ) E',
[['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
),
(
'|| ( A ( B C ) ) || ( D E ) F',
[['||', ['F', 'A', 'D'], ['F', 'A', 'E'], ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
),
(
'|| ( A ( B C || ( D E ) ) ( F G ) H )',
[['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], ['F', 'G'], 'H']],
),
(
'|| ( A ( B C || ( D E ) ) F )',
[['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], 'F']],
),
(
'|| ( A ( C || ( D E ) || ( F G ) ) H )',
[['||', 'A', ['C', 'D', 'F'], ['C', 'D', 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
),
)

for dep_str, result in test_cases:
self.assertEqual(dnf_convert(use_reduce(dep_str, opconvert=True)), result)
28 changes: 28 additions & 0 deletions pym/portage/tests/dep/test_overlap_dnf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2017 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

from portage.tests import TestCase
from portage.dep import Atom, use_reduce
from portage.dep.dep_check import _overlap_dnf

class OverlapDNFTestCase(TestCase):

def testOverlapDNF(self):

test_cases = (
(
'|| ( cat/A cat/B ) cat/E || ( cat/C cat/D )',
['cat/E', ['||', 'cat/A', 'cat/B'], ['||', 'cat/C', 'cat/D']],
),
(
'|| ( cat/A cat/B ) cat/D || ( cat/B cat/C )',
['cat/D', ['||', ['cat/A', 'cat/B'], ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
),
(
'|| ( cat/A cat/B ) || ( cat/C cat/D ) || ( ( cat/B cat/E ) cat/F )',
[['||', ['cat/A', 'cat/B', 'cat/E'], ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']], ['||', 'cat/C', 'cat/D']],
),
)

for dep_str, result in test_cases:
self.assertEqual(_overlap_dnf(use_reduce(dep_str, token_class=Atom, opconvert=True)), result)
Loading

0 comments on commit 9fdaf9b

Please sign in to comment.