From 6e84cdb2a6878e1583be81a84b7a588c5e193af0 Mon Sep 17 00:00:00 2001 From: Ben Blount Date: Thu, 7 Jan 2021 18:21:35 -0800 Subject: [PATCH] Use WSL for pdftotext on Windows pdftotext is not distributed for Windows and I was unable to get it working as a native version even after substantial effort. In contrast it's quite easy to get via WSL (Windows Subsystem for Linux) where you can install it in any of the available Linux distributions. --- beancount_import/source/ultipro_google_statement.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/beancount_import/source/ultipro_google_statement.py b/beancount_import/source/ultipro_google_statement.py index 82827c55..09d8f83d 100644 --- a/beancount_import/source/ultipro_google_statement.py +++ b/beancount_import/source/ultipro_google_statement.py @@ -1,5 +1,5 @@ """Parses a Google employee PDF pay statement from Ultipro.""" - +import os from typing import NamedTuple, Dict, Any, List, Optional, Tuple, Union, Callable, Match import datetime import collections @@ -295,7 +295,11 @@ def parse_hours(x: Optional[str]) -> Optional[Decimal]: def parse_filename(path: str): - text = subprocess.check_output(['pdftotext', '-raw', path, '-']).decode() + PDFTOTEXT_ENV='PDFTOTEXT_BINARY' + pdftotext='pdftotext' + if os.getenv(PDFTOTEXT_ENV): + pdftotext=os.getenv(PDFTOTEXT_ENV) + text = subprocess.check_output([pdftotext, '-raw', path, '-']).decode() return parse(text)