From b5576e2f34f4446411d7994abc139b4b40bf04d4 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Sun, 6 Dec 2020 09:31:33 -0700 Subject: [PATCH] Re-add per-sign maximal subsets for matching. --- beancount_import/matching.py | 34 ++++++++++------ beancount_import/matching_test.py | 64 ++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 16 deletions(-) diff --git a/beancount_import/matching.py b/beancount_import/matching.py index 5157fd12..c01f451f 100644 --- a/beancount_import/matching.py +++ b/beancount_import/matching.py @@ -704,7 +704,7 @@ def get_aggregate_posting_candidates( 5. Subsets may not sum to zero, or contain any sub-subsets that sum to zero. 6. To limit the computational cost, subsets are limited to at most 4 - elements, except that all maximal subsets are also returned. + elements, except that all same-sign maximal subsets are also returned. The returned subsets are not, in general, disjoint. @@ -730,15 +730,26 @@ def get_aggregate_posting_candidates( def posting_set_id(postings): return tuple(id(x) for x in postings) - def add_subset(account, currency, subset): + def partition(predicate, postings): + t = [] + f = [] + for p in postings: + if predicate(p): + t.append(p) + else: + f.append(p) + return t, f + + def add_subset(account, currency, subset, check_zero=True): total = sum(x.units.number for x in subset) - if total == ZERO: - sum_to_zero.add(posting_set_id(subset)) - return - for subsubset_size in range(2, len(subset)): - for subsubset in itertools.combinations(subset, subsubset_size): - if posting_set_id(subsubset) in sum_to_zero: - return + if check_zero: + if total == ZERO: + sum_to_zero.add(posting_set_id(subset)) + return + for subsubset_size in range(2, len(subset)): + for subsubset in itertools.combinations(subset, subsubset_size): + if posting_set_id(subsubset) in sum_to_zero: + return aggregate_posting = Posting( account=account, units=Amount(currency=currency, number=total), @@ -751,8 +762,9 @@ def add_subset(account, currency, subset): for (account, currency), posting_list in possible_sets.items(): if len(posting_list) == 1: continue - if len(posting_list) > max_subset_size: - add_subset(account, currency, posting_list) + for samesign_list in partition(lambda p: p.units.number > ZERO, posting_list): + if len(samesign_list) > max_subset_size: + add_subset(account, currency, samesign_list, check_zero=False) for subset_size in range( 2, min(len(posting_list) + 1, max_subset_size + 1)): for subset in itertools.combinations(posting_list, subset_size): diff --git a/beancount_import/matching_test.py b/beancount_import/matching_test.py index ee008f1b..57bf370b 100755 --- a/beancount_import/matching_test.py +++ b/beancount_import/matching_test.py @@ -786,11 +786,11 @@ def test_match_grouped_differing_signs_sum_zero(): Expenses:FIXME 1.35 USD note1: "B" Expenses:FIXME 2.90 USD - note1: "D" + note1: "C" Expenses:FIXME -1.35 USD - note1: "F" + note1: "D" Expenses:FIXME -2.90 USD - note1: "G" + note1: "E" """, journal=""" 2020-12-05 * "Narration" @@ -807,14 +807,68 @@ def test_match_grouped_differing_signs_sum_zero(): note3: "A" Assets:Bank -1.35 USD cleared: TRUE - note1: "F" + note1: "D" note2: "A" Expenses:Foo 1.35 USD note1: "B" note3: "B" Expenses:FIXME 2.90 USD - note1: "D" + note1: "C" Expenses:FIXME -2.90 USD + note1: "E" + """, + ) + +def test_match_grouped_maximal_differing_signs(): + # Maximal matching groups are still per-sign. + assert_match( + pending_candidate=""" + 2020-12-05 * "Narration" + note1: "A" + Expenses:FIXME 1 USD + note1: "B" + Expenses:FIXME 2 USD + note1: "C" + Expenses:FIXME 3 USD + note1: "D" + Expenses:FIXME 4 USD + note1: "E" + Expenses:FIXME 5 USD + note1: "F" + Expenses:FIXME -15 USD note1: "G" """, + journal=""" + 2020-12-05 * "Narration" + note2: "A" + Assets:Bank -15 USD + cleared: TRUE + note2: "B" + Expenses:Foo 15 USD + note2: "C" + """, + matches=""" + 2020-12-05 * "Narration" + note1: "A" + note2: "A" + Assets:Bank -15 USD + cleared: TRUE + note1: "G" + note2: "B" + Expenses:Foo 1 USD + note1: "B" + note2: "C" + Expenses:Foo 2 USD + note1: "C" + note2: "C" + Expenses:Foo 3 USD + note1: "D" + note2: "C" + Expenses:Foo 4 USD + note1: "E" + note2: "C" + Expenses:Foo 5 USD + note1: "F" + note2: "C" + """, )