Skip to content

Commit

Permalink
Merge bc19ed6 into 9eb4361
Browse files Browse the repository at this point in the history
  • Loading branch information
dumbPy committed Aug 7, 2020
2 parents 9eb4361 + bc19ed6 commit e18cd18
Show file tree
Hide file tree
Showing 15 changed files with 602 additions and 0 deletions.
164 changes: 164 additions & 0 deletions beancount_import/source/generic_importer_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""This module implements a Source Subclass for wrapping
`beancount.ingest.importer.ImporterProtocol` subclasses importers.
The importers are considered athoritative of the account they represent.
The Transaction.narration set by each importer is copied to Posting.meta[source_desc]
This helps in predicting postings for similar transaction while allowing the
user to change the Transaction description and payee from UI
(see readme.md for more on source_desc)
This `source_desc` meta is also used for check cleared postings and should not be
changed manually
Author: Sufiyan Adhikari(github.com/dumbPy)
"""

import os
import hashlib
from glob import glob
from typing import List
from collections import defaultdict
import itertools
import datetime

from beancount.core.data import Transaction, Posting, Directive
from beancount.core.amount import Amount
from beancount.ingest.importer import ImporterProtocol
from beancount.core.compare import hash_entry
from beancount.ingest.cache import get_file

from ..matching import FIXME_ACCOUNT, SimpleInventory
from . import ImportResult, SourceResults
from ..journal_editor import JournalEditor
from .description_based_source import DescriptionBasedSource, get_pending_and_invalid_entries
from .mint import _get_key_from_posting


class ImporterSource(DescriptionBasedSource):
def __init__(self,
directory: str,
account: str,
importer: ImporterProtocol,
**kwargs) -> None:
super().__init__(**kwargs)
self.directory = os.path.expanduser(directory)
self.importer = importer
self.account = account

# get _FileMemo object for each file
files = [get_file(f) for f in
filter(os.path.isfile,
glob(os.path.join(directory, '**', '*'), recursive=True)
)
]
# filter the valid files for this importer
self.files = [f for f in files if self.importer.identify(f)]

@property
def name(self):
return self.importer.name()

def prepare(self, journal: 'JournalEditor', results: SourceResults) -> None:
results.add_account(self.account)
entries = defaultdict(list)
for f in self.files:
f_entries = self.importer.extract(f, existing_entries=journal.entries)
# collect all entries in current statement, grouped by hash
hashed_entries = defaultdict(list)
for entry in f_entries:
key_ = self._get_key_from_imported_entry(entry)
self._add_description(entry)
hashed_entries[key_].append(entry)
# deduplicate across statements
for key_ in hashed_entries:
# skip the existing entries from other statements. add remaining
n = len(entries[key_])
entries[key_].extend(hashed_entries[key_][n:])

get_pending_and_invalid_entries(
raw_entries=list(itertools.chain.from_iterable(entries.values())),
journal_entries=journal.all_entries,
account_set=set([self.account]),
get_key_from_posting=_get_key_from_posting,
get_key_from_raw_entry=self._get_key_from_imported_entry,
make_import_result=self._make_import_result,
results=results)

def _add_description(self, entry: Transaction):
if not isinstance(entry, Transaction): return None
postings = entry.postings #type: ignore
to_mutate = []
for i, posting in enumerate(postings):
if posting.account != self.account: continue
if isinstance(posting.meta, dict):
posting.meta["source_desc"] = entry.narration
posting.meta["date"] = entry.date
break
else:
to_mutate.append(i)
break
for i in to_mutate:
p = postings.pop(i)
p = Posting(p.account, p.units, p.cost, p.price, p.flag,
{"source_desc":entry.narration, "date": entry.date})
postings.insert(i, p)

def _get_source_posting(self, entry:Transaction):
for posting in entry.postings:
if posting.account == self.account: return posting

def is_posting_cleared(self, posting: Posting) -> bool:
"""Given than this source is athoritative of the account of a particular posting,
return if that posting is cleared.
All postings which have `source_desc` meta key are considered cleared
"""
if posting.account != self.account: return False
return super().is_posting_cleared(posting)

def _get_key_from_imported_entry(self, entry:Transaction):
return (self.account,
entry.date,
self._get_source_posting(entry).units,
entry.narration)

def _make_import_result(self, imported_entry:Directive):
if isinstance(imported_entry, Transaction): balance_amounts(imported_entry)
result = ImportResult(
date=imported_entry.date, info=get_info(imported_entry), entries=[imported_entry])
# delete filename since it is used by beancount-import to determine if the
# entry is from journal.
imported_entry.meta.pop('filename')
return result

def _get_key_from_posting(entry: Transaction, posting: Posting,
source_postings: List[Posting], source_desc: str,
posting_date: datetime.date):
del entry
del source_postings
return (posting.account, posting_date, posting.units, source_desc)

def get_info(raw_entry: Directive) -> dict:
return dict(
type=get_file(raw_entry.meta['filename']).mimetype(),
filename=raw_entry.meta['filename'],
line=raw_entry.meta['lineno'],
)

def balance_amounts(txn:Transaction)-> None:
"""Add FIXME account for the remaing amount to balance accounts"""
inventory = SimpleInventory()
for posting in txn.postings:
inventory += posting.units
for currency in inventory:
txn.postings.append(
Posting(
account=FIXME_ACCOUNT,
units=Amount(currency=currency, number=-inventory[currency]),
cost=None,
price=None,
flag=None,
meta={},
))


def load(spec, log_status):
return ImporterSource(log_status=log_status, **spec)
38 changes: 38 additions & 0 deletions beancount_import/source/generic_importer_source_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

import pytest

from .source_test import check_source_example
from beancount.ingest.importers.csv import Importer as CSVImporter, Col

testdata_dir = os.path.realpath(
os.path.join(
os.path.dirname(__file__), '..', '..', 'testdata', 'source', 'generic_importer'))

examples = [
'test_basic',
'test_invalid',
'test_training_examples'
]

importer = CSVImporter({Col.DATE: 'Date',
Col.NARRATION1: 'Description',
Col.AMOUNT: 'Amount',
},
'Assets:Bank',
'USD',
'"Date","Description","Amount"',
)


@pytest.mark.parametrize('name', examples)
def test_source(name: str):
check_source_example(
example_dir=os.path.join(testdata_dir, name),
source_spec={
'module': 'beancount_import.source.generic_importer_source',
'directory': testdata_dir,
'account': 'Assets:Bank',
'importer': importer,
},
replacements=[(testdata_dir, '<testdata>')])
7 changes: 7 additions & 0 deletions testdata/source/generic_importer/generic_statement.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"Date","Description","Amount"
2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-,-1
2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-,-1
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1
2020-01-02,ATM-WD Some Random ATM Machine,500
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-,1
2020-01-05,Transfer to 1234567890123,300
1 change: 1 addition & 0 deletions testdata/source/generic_importer/test_basic/accounts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Bank
113 changes: 113 additions & 0 deletions testdata/source/generic_importer/test_basic/import_results.beancount
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
;; date: 2020-01-01
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 1, "type": "text/csv"}

; features: [
; {
; "amount": "-1 USD",
; "date": "2020-01-01",
; "key_value_pairs": {
; "desc": "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-01 * "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
date: 2020-01-01
source_desc: "by debit card-OTHPG 063441 GOOGLE CLOUD INDIA PVTTHANE-"
Expenses:FIXME 1 USD

;; date: 2020-01-01
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 2, "type": "text/csv"}

; features: [
; {
; "amount": "-1 USD",
; "date": "2020-01-01",
; "key_value_pairs": {
; "desc": "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-01 * "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
Assets:Bank -1 USD
date: 2020-01-01
source_desc: "by debit card-OTHPG 063444 GOOGLE CLOUD INDIA PVTTHANE-"
Expenses:FIXME 1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 3, "type": "text/csv"}

; features: [
; {
; "amount": "1 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
date: 2020-01-02
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Expenses:FIXME -1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 5, "type": "text/csv"}

; features: [
; {
; "amount": "1 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Assets:Bank 1 USD
date: 2020-01-02
source_desc: "BULK POSTING- 00000008237 250120 GOOGLE CLOUD INDIA PVT-"
Expenses:FIXME -1 USD

;; date: 2020-01-02
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 4, "type": "text/csv"}

; features: [
; {
; "amount": "500 USD",
; "date": "2020-01-02",
; "key_value_pairs": {
; "desc": "ATM-WD Some Random ATM Machine"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-02 * "ATM-WD Some Random ATM Machine"
Assets:Bank 500 USD
date: 2020-01-02
source_desc: "ATM-WD Some Random ATM Machine"
Expenses:FIXME -500 USD

;; date: 2020-01-05
;; info: {"filename": "<testdata>/generic_statement.csv", "line": 6, "type": "text/csv"}

; features: [
; {
; "amount": "300 USD",
; "date": "2020-01-05",
; "key_value_pairs": {
; "desc": "Transfer to 1234567890123"
; },
; "source_account": "Assets:Bank"
; }
; ]
2020-01-05 * "Transfer to 1234567890123"
Assets:Bank 300 USD
date: 2020-01-05
source_desc: "Transfer to 1234567890123"
Expenses:FIXME -300 USD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1900-01-01 open Assets:Bank
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
1 change: 1 addition & 0 deletions testdata/source/generic_importer/test_invalid/accounts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Bank
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

0 comments on commit e18cd18

Please sign in to comment.