Skip to content

Commit

Permalink
Merge cbbe06d into 9eb4361
Browse files Browse the repository at this point in the history
  • Loading branch information
dumbPy committed Aug 7, 2020
2 parents 9eb4361 + cbbe06d commit 754a75b
Show file tree
Hide file tree
Showing 15 changed files with 591 additions and 0 deletions.
153 changes: 153 additions & 0 deletions beancount_import/source/generic_importer_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""This module implements a Source Subclass for wrapping
`beancount.ingest.importer.ImporterProtocol` subclasses importers.
The importers are considered athoritative of the account they represent.
The Transaction.narration set by each importer is copied to Posting.meta[source_desc]
This helps in predicting postings for similar transaction while allowing the
user to change the Transaction description and payee from UI
(see readme.md for more on source_desc)
This `source_desc` meta is also used for check cleared postings and should not be
changed manually
Author: Sufiyan Adhikari(github.com/dumbPy)
"""

import os
from glob import glob
from collections import defaultdict
import itertools
from typing import Hashable, List, Dict, Optional

from beancount.core.data import Transaction, Posting, Directive
from beancount.core.amount import Amount
from beancount.ingest.importer import ImporterProtocol
from beancount.ingest.cache import get_file

from ..matching import FIXME_ACCOUNT, SimpleInventory
from . import ImportResult, SourceResults
from ..journal_editor import JournalEditor
from .description_based_source import DescriptionBasedSource, get_pending_and_invalid_entries
from .mint import _get_key_from_posting


class ImporterSource(DescriptionBasedSource):
def __init__(self,
directory: str,
account: str,
importer: ImporterProtocol,
**kwargs) -> None:
super().__init__(**kwargs)
self.directory = os.path.expanduser(directory)
self.importer = importer
self.account = account

# get _FileMemo object for each file
files = [get_file(f) for f in
filter(os.path.isfile,
glob(os.path.join(directory, '**', '*'), recursive=True)
)
]
# filter the valid files for this importer
self.files = [f for f in files if self.importer.identify(f)]

@property
def name(self) -> str:
return self.importer.name()

def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None:
results.add_account(self.account)

entries:Dict[Hashable,List[Directive]] = defaultdict(list)
for f in self.files:
f_entries = self.importer.extract(f, existing_entries=journal.entries)
# collect all entries in current statement, grouped by hash
hashed_entries = defaultdict(list)
for entry in f_entries:
key_ = self._get_key_from_imported_entry(entry)
self._add_description(entry)
hashed_entries[key_].append(entry)
# deduplicate across statements
for key_ in hashed_entries:
# skip the existing entries from other statements. add remaining
n = len(entries[key_])
entries[key_].extend(hashed_entries[key_][n:])

get_pending_and_invalid_entries(
raw_entries=list(itertools.chain.from_iterable(entries.values())),
journal_entries=journal.all_entries,
account_set=set([self.account]),
get_key_from_posting=_get_key_from_posting,
get_key_from_raw_entry=self._get_key_from_imported_entry,
make_import_result=self._make_import_result,
results=results)

def _add_description(self, entry: Transaction):
if not isinstance(entry, Transaction): return None
postings: List[Posting] = entry.postings
to_mutate = []
for i, posting in enumerate(postings):
if posting.account != self.account: continue
if isinstance(posting.meta, dict):
posting.meta["source_desc"] = entry.narration
posting.meta["date"] = entry.date
break
else:
to_mutate.append(i)
break
for i in to_mutate:
p = postings.pop(i)
p = Posting(p.account, p.units, p.cost, p.price, p.flag,
{"source_desc":entry.narration, "date": entry.date})
postings.insert(i, p)

def _get_source_posting(self, entry:Transaction) -> Optional[Posting]:
for posting in entry.postings:
if posting.account == self.account:
return posting
return None

def _get_key_from_imported_entry(self, entry:Transaction) -> Hashable:
source_posting = self._get_source_posting(entry)
if source_posting is None:
raise ValueError("entry has no postings for {self.account}")
return (self.account,
entry.date,
source_posting.units,
entry.narration)

def _make_import_result(self, imported_entry:Directive):
if isinstance(imported_entry, Transaction): balance_amounts(imported_entry)
result = ImportResult(
date=imported_entry.date, info=get_info(imported_entry), entries=[imported_entry])
# delete filename since it is used by beancount-import to determine if the
# entry is from journal.
imported_entry.meta.pop('filename')
return result


def get_info(raw_entry: Directive) -> dict:
return dict(
type=get_file(raw_entry.meta['filename']).mimetype(),
filename=raw_entry.meta['filename'],
line=raw_entry.meta['lineno'],
)

def balance_amounts(txn:Transaction)-> None:
"""Add FIXME account for the remaing amount to balance accounts"""
inventory = SimpleInventory()
for posting in txn.postings:
inventory += posting.units
for currency in inventory:
txn.postings.append(
Posting(
account=FIXME_ACCOUNT,
units=Amount(currency=currency, number=-inventory[currency]),
cost=None,
price=None,
flag=None,
meta={},
))


def load(spec, log_status):
return ImporterSource(log_status=log_status, **spec)
38 changes: 38 additions & 0 deletions beancount_import/source/generic_importer_source_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

import pytest

from .source_test import check_source_example
from beancount.ingest.importers.csv import Importer as CSVImporter, Col

testdata_dir = os.path.realpath(
os.path.join(
os.path.dirname(__file__), '..', '..', 'testdata', 'source', 'generic_importer'))

examples = [
'test_basic',
'test_invalid',
'test_training_examples'
]

importer = CSVImporter({Col.DATE: 'Date',
Col.NARRATION1: 'Description',
Col.AMOUNT: 'Amount',
},
'Assets:Bank',
'USD',
'"Date","Description","Amount"',
)


@pytest.mark.parametrize('name', examples)
def test_source(name: str):
check_source_example(
example_dir=os.path.join(testdata_dir, name),
source_spec={
'module': 'beancount_import.source.generic_importer_source',
'directory': testdata_dir,
'account': 'Assets:Bank',
'importer': importer,
},
replacements=[(testdata_dir, '<testdata>')])
7 changes: 7 additions & 0 deletions testdata/source/generic_importer/generic_statement.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"Date","Description","Amount"
2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-,-1
2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-,-1
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1
2020-01-02,ATM-WD Some Random ATM Machine,500
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1
2020-01-05,Transfer to 1234567890123,300
1 change: 1 addition & 0 deletions testdata/source/generic_importer/test_basic/accounts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Bank
113 changes: 113 additions & 0 deletions testdata/source/generic_importer/test_basic/import_results.beancount
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
;; date: 2020-01-01
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 1, "type": "text/csv"}

; features: [
; {
; "amount": "-1 USD",
; "date": "2020-01-01",
; "key_value_pairs": {
; "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
date: 2020-01-01
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Expenses:FIXME 1 USD

;; date: 2020-01-01
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 2, "type": "text/csv"}

; features: [
; {
; "amount": "-1 USD",
; "date": "2020-01-01",
; "key_value_pairs": {
; "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
date: 2020-01-01
source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
Expenses:FIXME 1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 3, "type": "text/csv"}

; features: [
; {
; "amount": "1 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
date: 2020-01-02
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Expenses:FIXME -1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 5, "type": "text/csv"}

; features: [
; {
; "amount": "1 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
date: 2020-01-02
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Expenses:FIXME -1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 4, "type": "text/csv"}

; features: [
; {
; "amount": "500 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "ATM-WD Some Random ATM Machine"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "ATM-WD Some Random ATM Machine"
Assets:Bank 500 USD
date: 2020-01-02
source_desc: "ATM-WD Some Random ATM Machine"
Expenses:FIXME -500 USD

;; date: 2020-01-05
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 6, "type": "text/csv"}

; features: [
; {
; "amount": "300 USD",
; "date": "2020-01-05",
; "key_value_pairs": {
; "desc": "Transfer to 1234567890123"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-05 * "Transfer to 1234567890123"
Assets:Bank 300 USD
date: 2020-01-05
source_desc: "Transfer to 1234567890123"
Expenses:FIXME -300 USD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1900-01-01 open Assets:Bank
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
1 change: 1 addition & 0 deletions testdata/source/generic_importer/test_invalid/accounts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Bank
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

68 changes: 68 additions & 0 deletions testdata/source/generic_importer/test_invalid/journal.beancount
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
1900-01-01 open Assets:Bank
1900-01-01 open Assets:Cash
1900-01-01 open Expenses:Misc
1900-01-01 open Liabilities:JohnDoe


2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
count: 1
date: 2020-01-01
cleared: TRUE
invalid0: "1 extra"
Expenses:Misc 1 USD

2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
count: 2
date: 2020-01-01
cleared: TRUE
invalid0: "1 extra"
Expenses:Misc 1 USD

2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
count: 3
date: 2020-01-01
cleared: TRUE
Expenses:Misc 1 USD

2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
date: 2020-01-02
cleared: TRUE
Expenses:Misc -1 USD

2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
date: 2020-01-02
cleared: TRUE
Expenses:Misc -1 USD

2020-01-02 * "ATM-WD Some Random ATM Machine"
Assets:Bank 500 USD
source_desc: "ATM-WD Some Random ATM Machine"
date: 2020-01-02
cleared: TRUE
Assets:Cash -500 USD

2020-01-05 * "Transfer to 1234567890123"
Assets:Bank 300 USD
source_desc: "Transfer to 1234567890123"
date: 2020-01-05
cleared: TRUE
Liabilities:JohnDoe -300 USD

2020-01-06 * "Transfer to 1234567890321"
info: "doesn't exist in statement hence invalid"
Assets:Bank 111.11 USD
source_desc: "Transfer to 1234567890123"
date: 2020-01-05
cleared: TRUE
invalid1: "1 extra"
Liabilities:JohnDoe -111.11 USD

0 comments on commit 754a75b

Please sign in to comment.