Skip to content

Commit

Permalink
Improve ultipro_google import: regression test, efficiency improvemen…
Browse files Browse the repository at this point in the history
…t, and employer contribution matching (#133)

* ultipro_google: Add regression test.

The test does not pass on Windows due to how the parser handles
newlines.

Install poppler-utils during the Linux build workflow to make pdftotext
available.

* ultipro_google: Memoize row_name to account translation.

This will be a bit faster for large imports than doing an O(n) lookup
for every posting, where n is the number of potential accounts, because
row_names are repeated often.

* ultipro_google: Support employer matched deductions.

This is done by creating a corresponding earnings posting.
  • Loading branch information
m-d-brown committed Nov 15, 2021
1 parent 253ef80 commit e73fb56
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 36 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/build.yml
Expand Up @@ -38,6 +38,9 @@ jobs:
working-directory: frontend
- run: npm run tsc
working-directory: frontend
- name: Install pdftotext (on Linux)
if: ${{ runner.os == 'Linux' }}
run: sudo apt-get install poppler-utils
- name: Install Python packaging/test tools
run: python -m pip install tox wheel
- name: Show package version
Expand Down
112 changes: 76 additions & 36 deletions beancount_import/source/ultipro_google.py
Expand Up @@ -99,6 +99,7 @@
import datetime
import os
import collections
import functools
import re
from beancount.core.number import D, ZERO
from beancount.core.data import Open, Transaction, Posting, Amount, Entries, Directive, EMPTY_SET
Expand Down Expand Up @@ -132,19 +133,15 @@ def __init__(self,


def make_import_result(parse_result: ultipro_google_statement.ParseResult,
accounts: Rules, config: Config,
account_pattern_for_row_name, config: Config,
info: dict) -> ImportResult:
"""Generate journal entries based on a payroll statement.
:param all_values: parsed payroll statement.
:param errors: errors from parsing payroll statement.
:param accounts: maps section names to lists of rules specifying the account
corresponding to a line entry in the statement. For the 'Earnings',
'Deductions', 'Taxes', and 'Net Pay Distribution' sections, the rules
are specified as (description_regex, account) pairs. The
description_regex is matched against the textual description for the
line entry (it must match the entire string).. All account names are
first transformed by calling format with the year parameter set to the
:param account_pattern_for_row_name: A function that takes (row_name,
section) and returns an account pattern. The pattern is later
transformed by calling format with the year parameter set to the
appropriate year.
:param config: specifies the configuration.
Expand Down Expand Up @@ -177,32 +174,52 @@ def make_import_result(parse_result: ultipro_google_statement.ParseResult,
txn.meta[config.period_start_date_key] = start_date
txn.meta[config.period_end_date_key] = end_date

for section in ['Earnings', 'Deductions', 'Taxes', 'Net Pay Distribution']:
if section == 'Net Pay Distribution':
field_name = 'Amount'
else:
field_name = 'Current'
cur_accounts = accounts[section]
def add_posting(section, row_name, value):
account_pattern = account_pattern_for_row_name(row_name, section)
txn.postings.append(
Posting(
account=account_pattern.format(year=year),
units=Amount(currency=currency, number=value),
cost=None,
meta={config.desc_key: '%s: %s' % (section, row_name)},
price=None,
flag=None,
))

for section, field_names, sign in [
(
'Earnings',
[('Current', False)],
-1, # Earnings are recorded as negative amounts
),
(
'Deductions',
[('Current', False),
('Current:Employer', True)],
1),
(
'Taxes',
[('Current', False)],
1,
),
(
'Net Pay Distribution',
[('Amount', False)],
1,
),
]:
for row_name, fields in all_values[section]:
value = fields[field_name]
if section == 'Earnings':
value = -value
if value == ZERO:
continue
account = FIXME_ACCOUNT
for row_re, account_pattern in cur_accounts:
if re.fullmatch(row_re, row_name) is not None:
account = account_pattern.format(year=year)
break
txn.postings.append(
Posting(
account=account,
units=Amount(currency=currency, number=value),
cost=None,
meta={config.desc_key: '%s: %s' % (section, row_name)},
price=None,
flag=None,
))
for field_name, employer_match in field_names:
value = fields.get(field_name)
if value is None or value == ZERO:
continue
value *= sign
if employer_match:
row_name += ' Employer Match'
add_posting(section, row_name, value)
if employer_match:
assert section == 'Deductions'
add_posting('Earnings', row_name, -value)

return ImportResult(date=txn.date, entries=[txn], info=info)

Expand Down Expand Up @@ -249,11 +266,11 @@ def _preprocess_entries(self, entries: Entries):

def _get_import_result(self,
parse_result: ultipro_google_statement.ParseResult,
rules: Rules, path: str):
account_pattern_for_row_name, path: str):
return make_import_result(
config=self,
parse_result=parse_result,
accounts=rules,
account_pattern_for_row_name=account_pattern_for_row_name,
info=dict(
type='application/pdf',
filename=path,
Expand Down Expand Up @@ -307,11 +324,34 @@ def prepare(self, journal, results: SourceResults):
rules = self.rules.copy()
rules.setdefault('Net Pay Distribution', []).extend(net_pay_rules)

# This cache exists only for the duration of the
# self._get_import_result calls that follow.
@functools.lru_cache(maxsize=None)
def account_pattern_for_row_name(row_name, section):
"""Returns an account patern.
Uses `rules, which maps section names to lists of rules
specifying the account corresponding to a line entry in the
statement. For the 'Earnings', 'Deductions', 'Taxes', and 'Net Pay
Distribution' sections, the rules are specified as
(description_regex, account) pairs. The description_regex is
matched against the textual description for the line entry (it must
match the entire string). All account patterns are transformed
by calling format with the year parameter set to the appropriate
year.
"""
for row_re, account_pattern in rules[section]:
if re.fullmatch(row_re, row_name) is not None:
return account_pattern
return FIXME_ACCOUNT

parsed_statements.sort(key=lambda x: (x[0], x[1]))
for pay_date, _, parse_result, filename in parsed_statements:
results.add_pending_entry(
self._get_import_result(
parse_result=parse_result, rules=rules, path=path))
parse_result,
account_pattern_for_row_name,
path))

for seen_key, entries in documents_seen_in_journal.items():
num_expected = (1 if seen_key in documents_seen_in_directory else 0)
Expand Down
58 changes: 58 additions & 0 deletions beancount_import/source/ultipro_google_test.py
@@ -0,0 +1,58 @@
import os
import shutil
import sys

import pytest

from . import ultipro_google
from .source_test import check_source_example

testdata_dir = os.path.realpath(
os.path.join(
os.path.dirname(__file__), '..', '..', 'testdata', 'source', 'ultipro_google'))

@pytest.mark.skipif(
shutil.which('pdftotext') is None,
reason='the pdftotext program must be available')
@pytest.mark.skipif(
sys.platform.startswith('win'),
reason='parsing does not work with Windows newlines')
@pytest.mark.parametrize('name', ['test_basic'])
def test_source(name: str):
example_dir = os.path.join(testdata_dir, name)
check_source_example(
example_dir=example_dir,
# source_spec is the example in ultipro_google.py.
source_spec=dict(
module='beancount_import.source.ultipro_google',
company_name='Google',
key_prefix='google_payroll',
currency='USD',
directory=example_dir,
rules={
'Earnings': [
('Regular Pay', 'Income:Google:Salary'),
('Annual Bonus', 'Income:Google:Annual-Bonus'),
('HSA ER Seed', 'Income:Google:HSA'),
],
'Deductions': [
('Dental', 'Expenses:Health:Dental:Insurance'),
('Medical', 'Expenses:Health:Medical:Insurance'),
],
'Taxes': [
('Federal Income Tax',
'Income:Expenses:Taxes:TY{year:04d}:Federal:Income'),
('Employee Medicare',
'Income:Expenses:Taxes:TY{year:04d}:Federal:Medicare'),
('Social Security Employee Tax',
'Income:Expenses:Taxes:TY{year:04d}:Federal:Social-Security'),
('CA State Income Tax',
'Income:Expenses:Taxes:TY{year:04d}:California:Income'),
('CA Private Disability Employee',
'Income:Expenses:Taxes:TY{year:04d}:California:Disability'),
],
'Net Pay Distribution': [
('x+1234', 'Assets:Checking:My-Bank'),
],
}),
replacements=[(testdata_dir, '<testdata>')])
Binary file not shown.
55 changes: 55 additions & 0 deletions testdata/source/ultipro_google/test_basic/20210108-1.txt
@@ -0,0 +1,55 @@
Google LLC
1600 Amphitheatre Parkway
Mountain View, CA 94043
650-253-0000
Pay Statement
Period Start Date 12/21/2020
Period End Date 01/03/2021
Pay Date 01/08/2021
Document 98765432
Net Pay $600.00
Pay Details
Bean Counter
123 Import St
New York, NY 10001
USA
Employee Number 000123456
SSN XXX-XX-XXXX
Job Accountant
Pay Rate $100.0000
Pay Frequency Biweekly
Pay Group Salaried Employees
Location US-NYC-NYC
Cost Center 111 - Finance - Accounting
Earnings
Pay Type Hours Pay Rate Current YTD
Dom Part $50.00 $100.00
Regular Pay 8.0000 $100.0000 $800.00
Regular Pay 72.0000 $100.0000 $7,200.00 $16,000.00
Total Hours Worked 80.0000 Total Hours 80.0000
Deductions
Employee Employer
Deduction Based On Pre-Tax Current YTD Current YTD
401K Pretax $8,000.00 Yes $6,000.00 $12,000.00 $3,000.00 $6,000.00
Dental $1.11 Yes $1.11 $2.22 $0.00 $0.00
Dmstc Part - NR $50.00 No $50.00 $100.00 $0.00 $0.00
Medical $2.22 Yes $2.22 $4.44 $0.00 $0.00
Vision $3.33 Yes $3.33 $6.66 $0.00 $0.00
Taxes
Tax Based On Current YTD
Federal Income Tax $2,000.00 $200.00 $400.00
Employee Medicare $8,000.00 $50.00 $100.00
Social Security Employee Tax $8,000.00 $50.00 $150.00
NY State Income Tax $3,000.00 $100.00 $100.00
Paid Time Off
Plan Current Balance
Vacation 0.0000 0.0000
Net Pay Distribution
Account Number Account Type Amount
xxxxxxxx1234 Checking $600.00
Total $600.00
Pay Summary
Gross FIT Taxable Wages Taxes Deductions Net Pay
Current $8,050.00 $2,000.00 $400.00 $6,056.66 $600.00
YTD $16,000 $4,000.00 $800.00 $9,000.00 $1200.00

Empty file.
111 changes: 111 additions & 0 deletions testdata/source/ultipro_google/test_basic/import_results.beancount
@@ -0,0 +1,111 @@
;; date: 2021-01-08
;; info: {"filename": "<testdata>/test_basic/20210108-1.pdf", "type": "application/pdf"}

; features: [
; {
; "amount": "-50.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Earnings: Dom Part"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "6000.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Deductions: 401K Pretax"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "3000.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Deductions: 401K Pretax Employer Match"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "-3000.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Earnings: 401K Pretax Employer Match"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "50.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Deductions: Dmstc Part - NR"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "3.33 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Deductions: Vision"
; ]
; },
; "source_account": ""
; },
; {
; "amount": "100.00 USD",
; "date": "2021-01-08",
; "key_value_pairs": {
; "google_payroll_desc": [
; "Taxes: NY State Income Tax"
; ]
; },
; "source_account": ""
; }
; ]
2021-01-08 * "Google" "Payroll"
associated_data0: "{\"description\": \"Google payroll statement\", \"meta\": [\"google_payroll_document\", \"98765432\"], \"path\": \"<testdata>/test_basic/20210108-1.pdf\", \"type\": \"application/pdf\"}"
google_payroll_document: "98765432"
google_payroll_pay_date: 2021-01-08
google_payroll_period_end_date: 2021-01-03
google_payroll_period_start_date: 2020-12-21
Expenses:FIXME -50.00 USD
google_payroll_desc: "Earnings: Dom Part"
Income:Google:Salary -800.00 USD
google_payroll_desc: "Earnings: Regular Pay"
Income:Google:Salary -7200.00 USD
google_payroll_desc: "Earnings: Regular Pay"
Expenses:FIXME 6000.00 USD
google_payroll_desc: "Deductions: 401K Pretax"
Expenses:FIXME 3000.00 USD
google_payroll_desc: "Deductions: 401K Pretax Employer Match"
Expenses:FIXME -3000.00 USD
google_payroll_desc: "Earnings: 401K Pretax Employer Match"
Expenses:Health:Dental:Insurance 1.11 USD
google_payroll_desc: "Deductions: Dental"
Expenses:FIXME 50.00 USD
google_payroll_desc: "Deductions: Dmstc Part - NR"
Expenses:Health:Medical:Insurance 2.22 USD
google_payroll_desc: "Deductions: Medical"
Expenses:FIXME 3.33 USD
google_payroll_desc: "Deductions: Vision"
Income:Expenses:Taxes:TY2021:Federal:Income 200.00 USD
google_payroll_desc: "Taxes: Federal Income Tax"
Income:Expenses:Taxes:TY2021:Federal:Medicare 50.00 USD
google_payroll_desc: "Taxes: Employee Medicare"
Income:Expenses:Taxes:TY2021:Federal:Social-Security 50.00 USD
google_payroll_desc: "Taxes: Social Security Employee Tax"
Expenses:FIXME 100.00 USD
google_payroll_desc: "Taxes: NY State Income Tax"
Assets:Checking:My-Bank 600.00 USD
google_payroll_desc: "Net Pay Distribution: xxxxxxxx1234"
Empty file.

0 comments on commit e73fb56

Please sign in to comment.