Skip to content

Commit

Permalink
Merge pull request quentinsf#93 from kalafut/incremental_pr
Browse files Browse the repository at this point in the history
Add --incremental and --skip-dupes options
  • Loading branch information
petdr committed Oct 17, 2016
2 parents 1d66c3b + 54d45f0 commit 3ae0843
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 9 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Options can either be used from command line or in configuration file.
--desc STR CSV column number matching description
--effective-date INT CSV column number matching effective date
--encoding STR text encoding of CSV input file
--incremental append output as transactions are processed
--ledger-date-format STR
date format for ledger output file
--ledger-decimal-comma
Expand All @@ -89,7 +90,9 @@ Options can either be used from command line or in configuration file.
--accounts-file FILE file which holds a list of allowed accounts
--quiet, -q do not prompt if account can be deduced
--reverse reverse the order of entries in the CSV file
--skip-dupes skip transactions that have already been imported
--skip-lines INT number of lines to skip from CSV file
--skip-older-than skip entries more than X days old
--tags, -t prompt for transaction tags
--template-file FILE file which holds the template
-h, --help show this help message and exit
Expand Down Expand Up @@ -219,6 +222,10 @@ is the text encoding of the CSV input file. Default is `utf-8`. The encoding
should be specified if the CSV file contains non-ASCII characters (typically in
the transaction description) in an encoding other than UTF-8.

**`--incremental`**

appends output as transactions are processed. The default flow is to process all CSV input and then output the result. When `--incremental` is specified, output is written after every transaction. This allows one to stop (ctrl-c) and restart to progressively process a CSV file (`--skip-dupes` is a useful companion option). This option cannot be used with `--reverse`.

**`--ledger-date-format STR`**

describes the date format to be used when creating ledger entries. If
Expand Down Expand Up @@ -283,6 +290,10 @@ is `False`.

will print ledger entries in reverse of their order in the CSV file.

**`--skip-dupes`**

will skip transactions if the exact CSV already appears as a `; CSV: ...` comment in the current ledgerfile (which means your output template will need this comment). This can help if you download statements without using a precise date range. A useful pattern is to include CSV comments for both "sides" of a transaction if you download from multiple sources that resolve to a single transaction (e.g. paying a credit card from checking).

**`--skip-lines INT`**

is the number of lines to skip from the beginning of the CSV file.
Expand Down
65 changes: 56 additions & 9 deletions icsv2ledger.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def get_locale_currency_symbol():
'quiet': False,
'reverse': False,
'skip_lines': str(1),
'skip_dupes': False,
'incremental': False,
'tags': False,
'delimiter': ',',
'csv_decimal_comma': False,
Expand Down Expand Up @@ -233,7 +235,7 @@ def parse_args_and_config_file():
parser.add_argument(
'outfile',
nargs='?',
type=FileType('w', encoding='utf-8'),
type=FileType('a', encoding='utf-8'),
default=sys.stdout,
help=('output filename or stdout in Ledger syntax'
' (default: {0})'.format('stdout')))
Expand Down Expand Up @@ -265,6 +267,16 @@ def parse_args_and_config_file():
type=int,
help=('number of lines to skip from CSV file'
' (default: {0})'.format(DEFAULTS.skip_lines)))
parser.add_argument(
'--skip-dupes',
action='store_true',
help=('skip transactions that have already been imported'
' (default: {0})'.format(DEFAULTS.skip_dupes)))
parser.add_argument(
'--incremental',
action='store_true',
help=('append output as transactions are processed'
' (default: {0})'.format(DEFAULTS.incremental)))
parser.add_argument(
'--reverse',
action='store_true',
Expand Down Expand Up @@ -388,6 +400,10 @@ def parse_args_and_config_file():
file=sys.stderr)
sys.exit(1)

if args.incremental and args.reverse:
print('reverse cannot be used in incremental mode')
sys.exit(1)

if args.encoding != args.infile.encoding:
args.infile = io.TextIOWrapper(args.infile.detach(),
encoding=args.encoding)
Expand Down Expand Up @@ -486,8 +502,8 @@ def journal_entry(self, transaction_index, payee, account, tags):
uuid_regex = re.compile(r"UUID:", re.IGNORECASE)
uuid = [v for v in tags if uuid_regex.match(v)]
if uuid:
uuid = uuid[0]
tags.remove(uuid)
uuid = uuid[0]
tags.remove(uuid)
format_data = {
'date': self.date,
'effective_date': self.effective_date,
Expand All @@ -509,7 +525,12 @@ def journal_entry(self, transaction_index, payee, account, tags):
'md5sum': self.md5sum,
'csv': self.raw_csv}
format_data.update(self.addons)
return template.format(**format_data)

# generate and clean output
output_lines = template.format(**format_data).split('\n')
output = '\n'.join([x.rstrip() for x in output_lines])

return output

def get_field_at_index(fields, index, csv_decimal_comma, ledger_decimal_comma):
"""
Expand Down Expand Up @@ -551,6 +572,16 @@ def get_field_at_index(fields, index, csv_decimal_comma, ledger_decimal_comma):
return value


def csv_from_ledger(ledger_file):
pattern = re.compile(r"^\s*[;#]\s*CSV:\s*(.*?)\s*$")
csv_comments = set()
with open(ledger_file) as f:
for line in f:
m = pattern.match(line)
if m:
csv_comments.add(m.group(1))
return csv_comments


def payees_from_ledger(ledger_file):
return from_ledger(ledger_file, 'payees')
Expand Down Expand Up @@ -710,6 +741,7 @@ def main():
if options.ledger_file:
possible_accounts = accounts_from_ledger(options.ledger_file)
possible_payees = payees_from_ledger(options.ledger_file)
csv_comments = csv_from_ledger(options.ledger_file)

# Read mappings
mappings = []
Expand Down Expand Up @@ -781,11 +813,15 @@ def process_input_output(in_file, out_file):
Process them.
Write Ledger lines either to filename or stdout.
"""
if not options.incremental:
out_file.truncate(0)

csv_lines = in_file.readlines()
if in_file.name == '<stdin>':
reset_stdin()
ledger_lines = process_csv_lines(csv_lines)
print(*ledger_lines, sep='\n', file=out_file)
for line in process_csv_lines(csv_lines):
print(line, sep='\n', file=out_file)
out_file.flush()

def process_csv_lines(csv_lines):
dialect = None
Expand All @@ -803,12 +839,23 @@ def process_csv_lines(csv_lines):
if len(row) == 0:
continue

# Skip any lines already in the ledger file
if options.skip_dupes and csv_lines[options.skip_lines + i].strip() in csv_comments:
continue

entry = Entry(row, csv_lines[options.skip_lines + i],
options)
if (options.skip_older_than < 0) or (entry.days_old <= options.skip_older_than):
payee, account, tags = get_payee_and_account(entry)
ledger_lines.append(
entry.journal_entry(i + 1, payee, account, tags))
try:
payee, account, tags = get_payee_and_account(entry)
except KeyboardInterrupt:
print()
sys.exit(0)
line = entry.journal_entry(i + 1, payee, account, tags)
if options.incremental:
yield line
else:
ledger_lines.append(line)

if options.reverse:
ledger_lines.reverse()
Expand Down

0 comments on commit 3ae0843

Please sign in to comment.