Merge 204193a into bd39126

jbms · Jun 14, 2020 · 74e0eef · 74e0eef
2 parents bd39126 + 204193a
commit 74e0eef
Show file tree

Hide file tree

Showing 25 changed files with 1,548 additions and 1,488 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.3
+current_version = 1.4.0
 tag = True
 commit = True
 message = chore: update package version to {new_version}

diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,5 @@ beancount_import/frontend_dist
 .tox
 .coverage
 htmlcov/
+.*.ofx
+*~
diff --git a/Makefile b/Makefile
@@ -0,0 +1,58 @@
+## -*- mode: make -*-
+
+GIT = git
+PYTHON = python
+MYPY = mypy
+PIP = pip
+PROJECT = beancount-import
+
+# OS specific section
+ifeq '$(findstring ;,$(PATH))' ';'
+    detected_OS := Windows
+else
+    detected_OS := $(shell uname 2>/dev/null || echo Unknown)
+    detected_OS := $(patsubst CYGWIN%,Cygwin,$(detected_OS))
+    detected_OS := $(patsubst MSYS%,MSYS,$(detected_OS))
+    detected_OS := $(patsubst MINGW%,MSYS,$(detected_OS))
+endif
+
+ifeq ($(detected_OS),Windows)
+    RM_EGGS = pushd $(CONDA_PREFIX) && del /s/p $(PROJECT).egg-link $(PROJECT)-nspkg.pth
+else
+    RM_EGGS = cd $(CONDA_PREFIX) && find . \( -name $(PROJECT).egg-link -o -name $(PROJECT)-nspkg.pth \) -exec rm -i {} \;
+endif
+
+.PHONY: clean install test dist distclean upload
+
+clean:
+	$(PYTHON) setup.py clean --all
+	$(RM_EGGS)
+	$(PYTHON) -Bc "import pathlib; [p.unlink() for p in pathlib.Path('.').rglob('*.py[co]')]"
+	$(PYTHON) -Bc "import pathlib; [p.rmdir() for p in pathlib.Path('.').rglob('__pycache__')]"
+	-$(PYTHON) -Bc "import shutil; shutil.rmtree('.pytest_cache')"
+
+install: clean
+	$(PIP) install -e .
+
+test:
+	#$(MYPY) --show-error-codes src
+	$(PYTHON) -m pytest --exitfirst
+
+dist: install test
+	$(PYTHON) setup.py sdist bdist_wheel
+	$(PYTHON) -m twine check dist/*
+
+upload_test: dist
+	$(PYTHON) -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+
+upload: dist
+	$(PYTHON) -m twine upload dist/*
+
+# This is GNU specific I guess
+VERSION = $(shell $(PYTHON) __about__.py)
+
+TAG = v$(VERSION)
+
+tag:
+	git tag -a $(TAG) -m "$(TAG)"
+	git push origin $(TAG)
diff --git a/README.md b/README.md
@@ -12,6 +12,12 @@ each other and with existing transactions.
 - Pluggable data source architecture, including existing support for OFX (cash,
   investment, and retirement accounts), Mint.com, Amazon.com, and Venmo.
 
+- Due to the existing OFX support and the tool
+  [ofxstatement](https://github.com/kedder/ofxstatement) it is very easy to
+  import any financial source provided there is an ofxstatement plugin for
+  it. A lot of european financial institutes are available via plugins besides
+  a plugin for the generic Swift MT940 format.
+
 - Robustly associates imported transactions with the source data, to
   automatically avoid duplicates.
 
@@ -124,15 +130,22 @@ The currently supported set of data sources is:
   [Morgan Stanley StockPlan Connect](https://stockplanconnect.com).
 - [beancount_import.source.ultipro_google](beancount_import/source/ultipro_google.py):
   imports Google employee Ultipro payroll statements.
+- [beancount_import.source.icscards](beancount_import/source/icscards.py):
+  imports ICSCards PDF files (after converting them to an OFX file).
 
 Refer to the individual data source documentation for details on configuration.
 
 # Usage
 
 To run Beancount-import, create a Python script that invokes the
 `beancount_import.webserver.main` function.  Refer to the examples
-[fresh](examples/fresh/run.py) and
-[manually_entered](examples/manually_entered/run.py).
+[fresh](examples/fresh/run.py),
+[manually_entered](examples/manually_entered/run.py) and
+[convert2ofx](examples/convert2ofx/run.py).
+
+The convert2ofx example needs you to have the plugin ofxstatement-dutch
+installed, please follow the instructions of the
+[ofxstatement-dutch README](https://github.com/gpaulissen/ofxstatement-dutch).
 
 ## Errors
 

diff --git a/beancount_import/reconcile.py b/beancount_import/reconcile.py
@@ -860,7 +860,7 @@ def _make_candidates_from_import_result(self, next_pending):
 
     def get_next_candidates(self, skip_ids: Optional[Dict[str, int]] = None):
         if self.pending_data:
-            if skip_ids is None:
+            if skip_ids is None:  # pragma: no cover
                 skip_ids = collections.Counter()
             new_skip_ids = collections.Counter()  # type: Dict[str, int]
             for i, pending in enumerate(self.pending_data):

diff --git a/beancount_import/source/mint.py b/beancount_import/source/mint.py
@@ -43,7 +43,7 @@
 expression like the following to specify the Mint source:
 
     dict(module='beancount_import.source.mint',
-         directory=os.path.join(journal_dir, 'data', 'mint', 'mint.csv'),
+         filename=os.path.join(journal_dir, 'data', 'mint', 'mint.csv'),
          balances_directory=os.path.join(journal_dir, 'data', 'mint'),
     )
 

diff --git a/beancount_import/source/ofx.py b/beancount_import/source/ofx.py
@@ -31,6 +31,20 @@
 excluded.  Therefore, if downloading manually, you should just ensure that there
 are no gaps in the date ranges selected; overlap will not cause any problems.
 
+Converting to OFX file
+======================
+
+Thanks to the ofxstatement project on GitHub there are several converters to
+OFX.  The module beancount_import.source.ofx contains a function convert2ofx
+to convert files to OFX.  Its parameters are: input_file_type, filenames and
+force. The input files are converted to a hidden file (hence prefixed by a
+dot) and suffixed with .ofx. So x.pdf becomes .x.pdf.ofx. There is no need to
+store these converted OFX files in your repository.
+
+To use the function:
+
+    from beancount_import.source.ofx import convert2ofx
+
 Specifying the source to beancount_import
 =========================================
 
@@ -41,16 +55,56 @@
          ofx_filenames=(
              glob.glob(os.path.join(journal_dir, 'data/institution1/*/*.ofx'))
              + glob.glob(os.path.join(journal_dir, 'data/institution2/*/*.ofx'))
+             + convert2ofx('mt940', glob.glob(os.path.join(journal_dir, 'data/institution3/*/*.mt940')))
          ),
          cache_filename=os.path.join(journal_dir, 'data/ofx_cache.pickle'),
+         checknum_numeric=lambda ofx_filename: False,
+         check_balance=lambda ofx_filename: False,
     )
 
 where `journal_dir` refers to the financial/ directory.
 
 The `cache_filename` key is optional, but is recommended to speed up parsing if
 you have a large amount of OFX data.  When using the `cache_filename` option,
 adding and deleting OFX files is fine, but if you modify existing OFX files, you
-must delete the cahe file manually.
+must delete the cache file manually.
+
+The `checknum_numeric` key is optional but can be used to handle numeric
+conversion for the CHECKNUM tag in the OFX file. The OFX standard says that
+CHECKNUM is just an alphanumeric string but the default behaviour of
+beancount-import was to try to convert it to a number. The `check_num` key is
+a callable function based on the filename being proessed.
+
+Emit balance yes or no?
+-----------------------
+The `check_balance` key is optional but can be used to emit balances only if
+they are known to be correct. The value False does not do any checks (old
+behaviour) and just emits any balance seen. The value True will use the
+following algorithm to determine the balance.
+
+We will use OFX tags BALAMT, DTASOF and DTEND to discuss the way how the
+balance is determined. BALAMT is the balance amount on date/time
+DTASOF. DTASOF is by definition the current date/time (date as of now), not
+the date/time of the closing balance of all transactions listed!  DTEND should
+be (according to OFX) the EXCLUSIVE end date/time for the list of transactions
+retrieved. But some banks use it as an INCLUSIVE date (without a time
+component) though.
+Please note that some financial systems may include the time and some may not.
+Anyway, these are the relevant cases:
+1) DTEND is less than DTASOF.
+So there may be financial transactions on day DTASOF between DTEND and DTASOF
+but they will not be listed in the file, so we just do not know if BALAMT is
+the same on another time for example at the start of the day of DTASOF. Hence,
+we can not determine the balance at the start of day DTASOF reliably so we
+just do NOT emit it.
+2) DTEND equals DTASOF.
+We know can calculate the balance at the beginning of day DTASOF by just
+deducting the transactions on day DTEND. We can NOT calculate the balance the
+day after DTASOF since there may be transactions later on day DTASOF. But that
+is not important: one balance is okay.
+3) DTEND missing or DTEND is greater than DTASOF.
+Like case 2 but just to be sure we also deduct transactions greater than
+DTASOF.
 
 Specifying individual accounts
 ==============================
@@ -415,11 +469,13 @@
 
 import pickle
 import re
-from typing import Set, Tuple, Any, Dict, Union, List, Optional, NamedTuple
+from typing import Set, Tuple, Any, Dict, Union, List, Optional, NamedTuple, Callable
 import os
 import collections
 import datetime
 import tempfile
+import sys
+from subprocess import check_call, STDOUT
 
 import bs4
 from atomicwrites import atomic_write
@@ -596,12 +652,19 @@ def get_securities(soup: bs4.BeautifulSoup) -> List[SecurityInfo]:
 # Tolerance allowed in transaction balancing.  In units of base currency used, e.g. USD.
 TOLERANCE = 0.05
 
+CHECKNUM_NUMERIC = True   # True is old behavior, not conform OFX
+
+CHECK_BALANCE = False     # False is old behavior
+
+
 class ParsedOfxStatement(object):
-    def __init__(self, seen_fitids, filename, securities_map, org, stmtrs):
+    def __init__(self, seen_fitids, filename, securities_map, org, stmtrs,
+                 checknum_numeric=CHECKNUM_NUMERIC, check_balance=CHECK_BALANCE):
         filename = os.path.abspath(filename)
         self.filename = filename
         self.securities_map = securities_map
         self.org = org
+        self.checknum_numeric = checknum_numeric
         account_id = self.account_id = find_child(stmtrs, 'acctid')
         self.broker_id = find_child(stmtrs, 'brokerid') or ''
 
@@ -619,6 +682,24 @@ def __init__(self, seen_fitids, filename, securities_map, org, stmtrs):
         cash_activity_dates = self.cash_activity_dates = set()
 
         self.ofx_id = account_ofx_id = (org, self.broker_id, account_id)
+
+        if check_balance:
+            dtend = stmtrs.find(re.compile('banktranlist'))
+            if dtend:  # pragma: no cover
+                # Use find_child and not dtend.find().get_text()
+                dtend = find_child(dtend, 'dtend')
+                if dtend:  # pragma: no cover
+                    # The dtend text should be a date/time starting with %Y%m%d but some OFX files
+                    # do not conform to a time but the date part is correct.
+                    # Please note that just the date component is enough.
+                    try:
+                        dtend = parse_ofx_time(dtend[:8] + "000000").date()
+                        assert dtend is not None, "dtend should not be None"
+                    except ValueError as e:  # pragma: no cover
+                        sys.stderr.write("The DTEND tag (%s) can not be converted to a date\n" % (dtend))
+                        dtend = None
+        else:
+            pass
 
         for invtranlist in stmtrs.find_all(re.compile('invtranlist|banktranlist')):
             for tran in invtranlist.find_all(
@@ -676,10 +757,21 @@ def __init__(self, seen_fitids, filename, securities_map, org, stmtrs):
             bal_amount_str = find_child(bal, 'balamt')
             if not bal_amount_str.strip(): continue
             bal_amount = D(bal_amount_str)
-            date = find_child(bal, 'dtasof', parse_ofx_time).date()
+            dtasof = find_child(bal, 'dtasof', parse_ofx_time).date()
+            if check_balance:
+                # See above (Emit balance yes or no?)
+                # Case 1
+                if dtend is not None and dtend < dtasof:  # pragma: no cover
+                    continue
+                # Cases 2 and 3
+                for raw in raw_transactions:
+                    if raw.date >= dtasof:  # include > dtasof for case 3
+                        bal_amount -= raw.total
+            else:
+                pass            
             raw_cash_balance_entries.append(
                 RawCashBalanceEntry(
-                    date=date, number=bal_amount, filename=filename))
+                    date=dtasof, number=bal_amount, filename=filename))
 
 
         for invposlist in stmtrs.find_all('invposlist'):
@@ -838,9 +930,14 @@ def get_subaccount_cash(inv401ksource: Optional[str] = None) -> str:
                 posting_meta[OFX_NAME_KEY] = name
 
             if raw.checknum:
-                stripped_checknum = raw.checknum.lstrip('0')
-                if stripped_checknum:
-                    posting_meta[CHECK_KEY] = D(stripped_checknum)
+                # GJP 2020-01-18
+                # The CHECKNUM field is not numeric as described in the OFX 2.2 specification
+                if self.checknum_numeric:
+                    stripped_checknum = raw.checknum.lstrip('0')
+                    if stripped_checknum:
+                        posting_meta[CHECK_KEY] = D(stripped_checknum)
+                else:
+                    posting_meta[CHECK_KEY] = raw.checknum
 
             cash_transfer_transaction_amount = None
             if raw.trantype == 'INCOME' or raw.trantype == 'INVBANKTRAN' or raw.trantype == 'STMTTRN':
@@ -1147,7 +1244,8 @@ def get_subaccount_cash(inv401ksource: Optional[str] = None) -> str:
 
 
 class ParsedOfxFile(object):
-    def __init__(self, seen_fitids, filename):
+    def __init__(self, seen_fitids, filename,
+                 checknum_numeric=CHECKNUM_NUMERIC, check_balance=CHECK_BALANCE):
         self.filename = filename
         parsed_statements = self.parsed_statements = []
 
@@ -1169,7 +1267,9 @@ def __init__(self, seen_fitids, filename):
                     filename=filename,
                     securities_map=securities_map,
                     org=org,
-                    stmtrs=stmtrs))
+                    stmtrs=stmtrs,
+                    checknum_numeric=checknum_numeric,
+                    check_balance=check_balance))
 
 
 def get_account_map(accounts):
@@ -1324,6 +1424,8 @@ class OfxSource(Source):
     def __init__(self,
                  ofx_filenames: List[str],
                  cache_filename: Optional[str] = None,
+                 checknum_numeric: Callable[[str], bool] = lambda ofx_filename: CHECKNUM_NUMERIC,
+                 check_balance: Callable[[str], bool] = lambda ofx_filename: CHECK_BALANCE,
                  **kwargs) -> None:
         super().__init__(**kwargs)
         self.ofx_filenames = [os.path.realpath(x) for x in ofx_filenames]
@@ -1355,7 +1457,10 @@ def __init__(self,
                 continue
             self.log_status('ofx: loading %s' % filename)
             self.parsed_files.append(
-                ParsedOfxFile(self.source_fitids, filename))
+                ParsedOfxFile(self.source_fitids,
+                              filename,
+                              checknum_numeric(filename),
+                              check_balance(filename)))
 
         if cache_filename is not None:
             cache_data = {
@@ -1401,6 +1506,28 @@ def name(self):
 def load(spec, log_status):
     return OfxSource(log_status=log_status, **spec)
 
+def convert2ofx(input_file_type: str,
+                filenames: List[str],
+                force: Optional[bool] = False):  # pragma: no cover
+    ofx_filenames = []
+    for file in [os.path.realpath(x) for x in filenames]:
+        head, tail = os.path.split(file)
+        ofx_file = os.path.join(head, '.' + tail + '.ofx')
+        ofx_file_newer = False
+        try:
+            if not(force):
+                if os.stat(ofx_file).st_mtime > os.stat(file).st_mtime:
+                    ofx_file_newer = True
+        except:
+            pass
+
+        if not(ofx_file_newer):
+            # Create a process for ofxstatement
+            ofxstatement = ["ofxstatement", "convert", "-t", input_file_type]
+            ofxstatement.extend([file, ofx_file])
+            check_call(ofxstatement, stderr=STDOUT)
+        ofx_filenames.append(ofx_file)
+    return ofx_filenames
 
 if __name__ == '__main__':
     import argparse

diff --git a/beancount_import/source/ofx_test.py b/beancount_import/source/ofx_test.py
@@ -35,13 +35,15 @@
     ('test_amex', 'amex.ofx'),
 ]
 
-
 @pytest.mark.parametrize('name,ofx_filename', examples)
 def test_source(name: str, ofx_filename: str):
+    tests = ['test_amex','test_bank_medium','test_checking_emptyledgerbal','test_suncorp']
     check_source_example(
         example_dir=os.path.join(testdata_dir, name),
         source_spec={
             'module': 'beancount_import.source.ofx',
             'ofx_filenames': [os.path.join(testdata_dir, ofx_filename)],
+            'checknum_numeric': lambda oxf_filename: name not in tests,
+            'check_balance': lambda oxf_filename: name in tests
         },
         replacements=[(testdata_dir, '<testdata>')])