From 6e84cdb2a6878e1583be81a84b7a588c5e193af0 Mon Sep 17 00:00:00 2001
From: Ben Blount <bblount@google.com>
Date: Thu, 7 Jan 2021 18:21:35 -0800
Subject: [PATCH] Use WSL for pdftotext on Windows

pdftotext is not distributed for Windows and I was unable to get
it working as a native version even after substantial effort.
In contrast it's quite easy to get via WSL (Windows Subsystem for Linux)
where you can install it in any of the available Linux distributions.
---
 beancount_import/source/ultipro_google_statement.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/beancount_import/source/ultipro_google_statement.py b/beancount_import/source/ultipro_google_statement.py
index 82827c55..09d8f83d 100644
--- a/beancount_import/source/ultipro_google_statement.py
+++ b/beancount_import/source/ultipro_google_statement.py
@@ -1,5 +1,5 @@
 """Parses a Google employee PDF pay statement from Ultipro."""
-
+import os
 from typing import NamedTuple, Dict, Any, List, Optional, Tuple, Union, Callable, Match
 import datetime
 import collections
@@ -295,7 +295,11 @@ def parse_hours(x: Optional[str]) -> Optional[Decimal]:
 
 
 def parse_filename(path: str):
-    text = subprocess.check_output(['pdftotext', '-raw', path, '-']).decode()
+    PDFTOTEXT_ENV='PDFTOTEXT_BINARY'
+    pdftotext='pdftotext'
+    if os.getenv(PDFTOTEXT_ENV):
+        pdftotext=os.getenv(PDFTOTEXT_ENV)
+    text = subprocess.check_output([pdftotext, '-raw', path, '-']).decode()
     return parse(text)