From 54d45f0552a1ce04c7bfb8ffc8da9d2c601d9bd1 Mon Sep 17 00:00:00 2001 From: Jim Kalafut Date: Sun, 16 Oct 2016 15:08:11 -0700 Subject: [PATCH] Add --incremental and --skip-dupes options Closes #92 --- README.md | 11 +++++++++ icsv2ledger.py | 65 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0085788..696cbef 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Options can either be used from command line or in configuration file. --desc STR CSV column number matching description --effective-date INT CSV column number matching effective date --encoding STR text encoding of CSV input file + --incremental append output as transactions are processed --ledger-date-format STR date format for ledger output file --ledger-decimal-comma @@ -89,7 +90,9 @@ Options can either be used from command line or in configuration file. --accounts-file FILE file which holds a list of allowed accounts --quiet, -q do not prompt if account can be deduced --reverse reverse the order of entries in the CSV file + --skip-dupes skip transactions that have already been imported --skip-lines INT number of lines to skip from CSV file + --skip-older-than skip entries more than X days old --tags, -t prompt for transaction tags --template-file FILE file which holds the template -h, --help show this help message and exit @@ -219,6 +222,10 @@ is the text encoding of the CSV input file. Default is `utf-8`. The encoding should be specified if the CSV file contains non-ASCII characters (typically in the transaction description) in an encoding other than UTF-8. +**`--incremental`** + +appends output as transactions are processed. The default flow is to process all CSV input and then output the result. When `--incremental` is specified, output is written after every transaction. This allows one to stop (ctrl-c) and restart to progressively process a CSV file (`--skip-dupes` is a useful companion option). This option cannot be used with `--reverse`. + **`--ledger-date-format STR`** describes the date format to be used when creating ledger entries. If @@ -283,6 +290,10 @@ is `False`. will print ledger entries in reverse of their order in the CSV file. +**`--skip-dupes`** + +will skip transactions if the exact CSV already appears as a `; CSV: ...` comment in the current ledgerfile (which means your output template will need this comment). This can help if you download statements without using a precise date range. A useful pattern is to include CSV comments for both "sides" of a transaction if you download from multiple sources that resolve to a single transaction (e.g. paying a credit card from checking). + **`--skip-lines INT`** is the number of lines to skip from the beginning of the CSV file. diff --git a/icsv2ledger.py b/icsv2ledger.py index efc9d2c..1e9e689 100755 --- a/icsv2ledger.py +++ b/icsv2ledger.py @@ -100,6 +100,8 @@ def get_locale_currency_symbol(): 'quiet': False, 'reverse': False, 'skip_lines': str(1), + 'skip_dupes': False, + 'incremental': False, 'tags': False, 'delimiter': ',', 'csv_decimal_comma': False, @@ -233,7 +235,7 @@ def parse_args_and_config_file(): parser.add_argument( 'outfile', nargs='?', - type=FileType('w', encoding='utf-8'), + type=FileType('a', encoding='utf-8'), default=sys.stdout, help=('output filename or stdout in Ledger syntax' ' (default: {0})'.format('stdout'))) @@ -265,6 +267,16 @@ def parse_args_and_config_file(): type=int, help=('number of lines to skip from CSV file' ' (default: {0})'.format(DEFAULTS.skip_lines))) + parser.add_argument( + '--skip-dupes', + action='store_true', + help=('skip transactions that have already been imported' + ' (default: {0})'.format(DEFAULTS.skip_dupes))) + parser.add_argument( + '--incremental', + action='store_true', + help=('append output as transactions are processed' + ' (default: {0})'.format(DEFAULTS.incremental))) parser.add_argument( '--reverse', action='store_true', @@ -388,6 +400,10 @@ def parse_args_and_config_file(): file=sys.stderr) sys.exit(1) + if args.incremental and args.reverse: + print('reverse cannot be used in incremental mode') + sys.exit(1) + if args.encoding != args.infile.encoding: args.infile = io.TextIOWrapper(args.infile.detach(), encoding=args.encoding) @@ -486,8 +502,8 @@ def journal_entry(self, transaction_index, payee, account, tags): uuid_regex = re.compile(r"UUID:", re.IGNORECASE) uuid = [v for v in tags if uuid_regex.match(v)] if uuid: - uuid = uuid[0] - tags.remove(uuid) + uuid = uuid[0] + tags.remove(uuid) format_data = { 'date': self.date, 'effective_date': self.effective_date, @@ -509,7 +525,12 @@ def journal_entry(self, transaction_index, payee, account, tags): 'md5sum': self.md5sum, 'csv': self.raw_csv} format_data.update(self.addons) - return template.format(**format_data) + + # generate and clean output + output_lines = template.format(**format_data).split('\n') + output = '\n'.join([x.rstrip() for x in output_lines]) + + return output def get_field_at_index(fields, index, csv_decimal_comma, ledger_decimal_comma): """ @@ -551,6 +572,16 @@ def get_field_at_index(fields, index, csv_decimal_comma, ledger_decimal_comma): return value +def csv_from_ledger(ledger_file): + pattern = re.compile(r"^\s*[;#]\s*CSV:\s*(.*?)\s*$") + csv_comments = set() + with open(ledger_file) as f: + for line in f: + m = pattern.match(line) + if m: + csv_comments.add(m.group(1)) + return csv_comments + def payees_from_ledger(ledger_file): return from_ledger(ledger_file, 'payees') @@ -710,6 +741,7 @@ def main(): if options.ledger_file: possible_accounts = accounts_from_ledger(options.ledger_file) possible_payees = payees_from_ledger(options.ledger_file) + csv_comments = csv_from_ledger(options.ledger_file) # Read mappings mappings = [] @@ -781,11 +813,15 @@ def process_input_output(in_file, out_file): Process them. Write Ledger lines either to filename or stdout. """ + if not options.incremental: + out_file.truncate(0) + csv_lines = in_file.readlines() if in_file.name == '': reset_stdin() - ledger_lines = process_csv_lines(csv_lines) - print(*ledger_lines, sep='\n', file=out_file) + for line in process_csv_lines(csv_lines): + print(line, sep='\n', file=out_file) + out_file.flush() def process_csv_lines(csv_lines): dialect = None @@ -803,12 +839,23 @@ def process_csv_lines(csv_lines): if len(row) == 0: continue + # Skip any lines already in the ledger file + if options.skip_dupes and csv_lines[options.skip_lines + i].strip() in csv_comments: + continue + entry = Entry(row, csv_lines[options.skip_lines + i], options) if (options.skip_older_than < 0) or (entry.days_old <= options.skip_older_than): - payee, account, tags = get_payee_and_account(entry) - ledger_lines.append( - entry.journal_entry(i + 1, payee, account, tags)) + try: + payee, account, tags = get_payee_and_account(entry) + except KeyboardInterrupt: + print() + sys.exit(0) + line = entry.journal_entry(i + 1, payee, account, tags) + if options.incremental: + yield line + else: + ledger_lines.append(line) if options.reverse: ledger_lines.reverse()