Skip to content

Commit

Permalink
Re-add per-sign maximal subsets for matching.
Browse files Browse the repository at this point in the history
  • Loading branch information
carljm committed Dec 6, 2020
1 parent d1ce74d commit b5576e2
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 16 deletions.
34 changes: 23 additions & 11 deletions beancount_import/matching.py
Expand Up @@ -704,7 +704,7 @@ def get_aggregate_posting_candidates(
5. Subsets may not sum to zero, or contain any sub-subsets that sum to zero.
6. To limit the computational cost, subsets are limited to at most 4
elements, except that all maximal subsets are also returned.
elements, except that all same-sign maximal subsets are also returned.
The returned subsets are not, in general, disjoint.
Expand All @@ -730,15 +730,26 @@ def get_aggregate_posting_candidates(
def posting_set_id(postings):
return tuple(id(x) for x in postings)

def add_subset(account, currency, subset):
def partition(predicate, postings):
t = []
f = []
for p in postings:
if predicate(p):
t.append(p)
else:
f.append(p)
return t, f

def add_subset(account, currency, subset, check_zero=True):
total = sum(x.units.number for x in subset)
if total == ZERO:
sum_to_zero.add(posting_set_id(subset))
return
for subsubset_size in range(2, len(subset)):
for subsubset in itertools.combinations(subset, subsubset_size):
if posting_set_id(subsubset) in sum_to_zero:
return
if check_zero:
if total == ZERO:
sum_to_zero.add(posting_set_id(subset))
return
for subsubset_size in range(2, len(subset)):
for subsubset in itertools.combinations(subset, subsubset_size):
if posting_set_id(subsubset) in sum_to_zero:
return
aggregate_posting = Posting(
account=account,
units=Amount(currency=currency, number=total),
Expand All @@ -751,8 +762,9 @@ def add_subset(account, currency, subset):
for (account, currency), posting_list in possible_sets.items():
if len(posting_list) == 1:
continue
if len(posting_list) > max_subset_size:
add_subset(account, currency, posting_list)
for samesign_list in partition(lambda p: p.units.number > ZERO, posting_list):
if len(samesign_list) > max_subset_size:
add_subset(account, currency, samesign_list, check_zero=False)
for subset_size in range(
2, min(len(posting_list) + 1, max_subset_size + 1)):
for subset in itertools.combinations(posting_list, subset_size):
Expand Down
64 changes: 59 additions & 5 deletions beancount_import/matching_test.py
Expand Up @@ -786,11 +786,11 @@ def test_match_grouped_differing_signs_sum_zero():
Expenses:FIXME 1.35 USD
note1: "B"
Expenses:FIXME 2.90 USD
note1: "D"
note1: "C"
Expenses:FIXME -1.35 USD
note1: "F"
note1: "D"
Expenses:FIXME -2.90 USD
note1: "G"
note1: "E"
""",
journal="""
2020-12-05 * "Narration"
Expand All @@ -807,14 +807,68 @@ def test_match_grouped_differing_signs_sum_zero():
note3: "A"
Assets:Bank -1.35 USD
cleared: TRUE
note1: "F"
note1: "D"
note2: "A"
Expenses:Foo 1.35 USD
note1: "B"
note3: "B"
Expenses:FIXME 2.90 USD
note1: "D"
note1: "C"
Expenses:FIXME -2.90 USD
note1: "E"
""",
)

def test_match_grouped_maximal_differing_signs():
# Maximal matching groups are still per-sign.
assert_match(
pending_candidate="""
2020-12-05 * "Narration"
note1: "A"
Expenses:FIXME 1 USD
note1: "B"
Expenses:FIXME 2 USD
note1: "C"
Expenses:FIXME 3 USD
note1: "D"
Expenses:FIXME 4 USD
note1: "E"
Expenses:FIXME 5 USD
note1: "F"
Expenses:FIXME -15 USD
note1: "G"
""",
journal="""
2020-12-05 * "Narration"
note2: "A"
Assets:Bank -15 USD
cleared: TRUE
note2: "B"
Expenses:Foo 15 USD
note2: "C"
""",
matches="""
2020-12-05 * "Narration"
note1: "A"
note2: "A"
Assets:Bank -15 USD
cleared: TRUE
note1: "G"
note2: "B"
Expenses:Foo 1 USD
note1: "B"
note2: "C"
Expenses:Foo 2 USD
note1: "C"
note2: "C"
Expenses:Foo 3 USD
note1: "D"
note2: "C"
Expenses:Foo 4 USD
note1: "E"
note2: "C"
Expenses:Foo 5 USD
note1: "F"
note2: "C"
""",
)

0 comments on commit b5576e2

Please sign in to comment.