Skip to content

Commit

Permalink
fix: Force utf-8 encoding when reading and writing text files
Browse files Browse the repository at this point in the history
This may fix character encoding problems when the system default encoding is not
UTF-8.
  • Loading branch information
jbms committed Mar 24, 2019
1 parent 131c357 commit f2eb320
Show file tree
Hide file tree
Showing 10 changed files with 18 additions and 17 deletions.
4 changes: 2 additions & 2 deletions beancount_import/source/amazon_invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def parse_invoice(path: str) -> Order:

def parse_regular_order_invoice(path: str) -> Order:
errors = []
with open(path, 'r') as f:
with open(path, 'rb') as f:
soup = bs4.BeautifulSoup(f.read(), 'lxml')
shipments = parse_shipments(soup)
payment_table_header = soup.find(
Expand Down Expand Up @@ -462,7 +462,7 @@ def get_text_lines(parent_node):

def parse_digital_order_invoice(path: str) -> Order:
errors = []
with open(path, 'r') as f:
with open(path, 'rb') as f:
soup = bs4.BeautifulSoup(f.read(), 'lxml')

digital_order_pattern = 'Digital Order: (.*)'
Expand Down
6 changes: 3 additions & 3 deletions beancount_import/source/amazon_invoice_sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def remove_tag(soup: bs4.BeautifulSoup, tag: str):

def sanitize_invoice(input_path: str, output_path: str,
credit_card_digits: str):
with open(input_path, 'r') as f:
soup = bs4.BeautifulSoup(f.read(), 'lxml')
with open(input_path, 'rb') as fb:
soup = bs4.BeautifulSoup(fb.read(), 'lxml')
comments = soup.find_all(text=lambda text: isinstance(text, bs4.Comment))
remove_tag(soup, 'script')
remove_tag(soup, 'style')
Expand All @@ -93,7 +93,7 @@ def sanitize_invoice(input_path: str, output_path: str,
output_name, _ = sanitize_order_ids(
os.path.basename(input_path), order_id_replacements)
output_path = os.path.join(output_path, output_name)
with open(output_path, 'w') as f:
with open(output_path, 'w', encoding='utf-8', newline='\n') as f:
f.write(new_output)


Expand Down
2 changes: 1 addition & 1 deletion beancount_import/source/google_purchases.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def prepare(self, journal, results: SourceResults):
path = os.path.join(self.directory,
prefix + receipt_id + json_suffix)
self.log_status('google_purchases: processing %s' % (path, ))
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8', newline='\n') as f:
receipt = json.load(f)
if receipt_id in takeout_receipt_ids:
import_result = make_takeout_import_result(
Expand Down
6 changes: 3 additions & 3 deletions beancount_import/source/healthequity.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def load_cash_transactions(filename: str, account: str,
expected_field_names = ['Date', 'Transaction', 'Amount', 'Cash Balance']
transactions = []
filename = os.path.abspath(filename)
with open(filename, 'r', newline='') as csvfile:
with open(filename, 'r', encoding='utf-8', newline='') as csvfile:
reader = csv.DictReader(csvfile)
if reader.fieldnames != expected_field_names:
raise RuntimeError(
Expand Down Expand Up @@ -236,7 +236,7 @@ def load_fund_transactions(filename: str,
]
transactions = [] # type: List[FundTransaction]
filename = os.path.abspath(filename)
with open(filename, 'r', newline='') as csvfile:
with open(filename, 'r', encoding='utf-8', newline='') as csvfile:
reader = csv.DictReader(csvfile)
if reader.fieldnames != expected_field_names:
raise RuntimeError(
Expand Down Expand Up @@ -268,7 +268,7 @@ def load_balances(filename: str, date: datetime.date,
]
balances = [] # type: List[ImportedBalance]
filename = os.path.abspath(filename)
with open(filename, 'r', newline='') as csvfile:
with open(filename, 'r', encoding='utf-8', newline='') as csvfile:
reader = csv.DictReader(csvfile)
if reader.fieldnames != expected_field_names:
raise RuntimeError(
Expand Down
4 changes: 2 additions & 2 deletions beancount_import/source/mint.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def load_transactions(filename: str, currency: str = 'USD') -> List[MintEntry]:
try:
entries = []
filename = os.path.abspath(filename)
with open(filename, 'r', newline='') as csvfile:
with open(filename, 'r', encoding='utf-8', newline='') as csvfile:
reader = csv.DictReader(csvfile)
if reader.fieldnames != expected_field_names:
raise RuntimeError(
Expand Down Expand Up @@ -232,7 +232,7 @@ def load_balances(filename: str) -> List[RawBalance]:
]
balances = []
filename = os.path.abspath(filename)
with open(filename, 'r', newline='') as csvfile:
with open(filename, 'r', encoding='utf-8', newline='') as csvfile:
reader = csv.DictReader(csvfile)
if reader.fieldnames != expected_field_names:
raise RuntimeError(
Expand Down
3 changes: 2 additions & 1 deletion beancount_import/source/ofx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,8 +1123,9 @@ def __init__(self, seen_fitids, filename):
self.filename = filename
parsed_statements = self.parsed_statements = []

with open(filename, 'r') as f:
with open(filename, 'rb') as f:
contents = f.read()
# A byte string passed to BeautifulSoup is assumed to be UTF-8
soup = bs4.BeautifulSoup(contents, 'html.parser')

# Get the description of securities used in this file.
Expand Down
2 changes: 1 addition & 1 deletion beancount_import/source/paypal.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ def prepare(self, journal: JournalEditor, results: SourceResults):
path = os.path.join(self.directory,
txn_id + transaction_json_suffix)
self.log_status('paypal: processing %s' % (path, ))
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8', newline='\n') as f:
txn = json.load(f)
jsonschema.validate(txn, transaction_schema)
results.add_pending_entry(
Expand Down
4 changes: 2 additions & 2 deletions beancount_import/source/paypal_sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@

def sanitize(input_path: str, output_directory: str):
txn_id = os.path.splitext(os.path.basename(input_path))[0]
with open(input_path, 'r') as f:
with open(input_path, 'r', encoding='utf-8', newline='\n') as f:
content = f.read()
base_36_chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
new_txn_id = ''.join(base_36_chars[random.randint(0, 35)] for _ in txn_id)
content = content.replace(txn_id, new_txn_id)
output_path = os.path.join(output_directory, new_txn_id + '.json')
with open(output_path, 'w') as f:
with open(output_path, 'w', encoding='utf-8', newline='\n') as f:
f.write(content)
print('Wrote: %s' % output_path)

Expand Down
2 changes: 1 addition & 1 deletion beancount_import/source/venmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def get_info(raw: Union[RawTransaction, RawBalance]):

def load_csv(path: str, field_names: List[str]) -> List[Dict[str, Union[str,int]]]:
path = os.path.abspath(path)
with open(path, 'r', newline='') as f:
with open(path, 'r', newline='', encoding='utf-8') as f:
csv_reader = csv.DictReader(f)
assert csv_reader.fieldnames == field_names
return [add_line_and_filename(x, path, line_i + 1) for line_i, x in enumerate(csv_reader)]
Expand Down
2 changes: 1 addition & 1 deletion beancount_import/source/waveapps.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def prepare(self, journal, results: SourceResults):
path = os.path.join(self.receipt_directory,
receipt_id + json_suffix)
self.log_status('waveapps: processing %s' % (path, ))
with open(path, 'r') as f:
with open(path, 'r', newline='\n', encoding='utf-8') as f:
receipt = json.load(f)
jsonschema.validate(receipt, schema)
if receipt['status'] != 'Ready':
Expand Down

0 comments on commit f2eb320

Please sign in to comment.