From 46633b1f00204a310e630009f0420218186439e2 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Sun, 21 Dec 2014 14:33:31 +0100 Subject: [PATCH] Don't raise exception on invalid UTF-8 output This could be reproduced with: ./test/spec_tests.py --program kramdown --pattern Enti with Kramdown version 1.5.0. The main problem is that it would stop future tests from being carried out. After this commit it is just counted as a failure. The error message printed is of the form: Unicode error: 'utf8' codec can't decode byte 0xfc in position 8: invalid start byte '

# \xd3\x92 \xcf\xa0 \xef\xbf\xbd

\n' '

# \xd3\x92 \xcf\xa0 \xfc\x85\xb8\xb0\xa9\xb8

\n' --- test/spec_tests.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/test/spec_tests.py b/test/spec_tests.py index 23ae5029..99ac0dda 100755 --- a/test/spec_tests.py +++ b/test/spec_tests.py @@ -37,8 +37,13 @@ def do_test(test, normalize): [retcode, actual_html, err] = cmark.to_html(test['markdown']) if retcode == 0: expected_html = test['html'] + unicode_error = None if normalize: - passed = normalize_html(actual_html) == normalize_html(expected_html) + try: + passed = normalize_html(actual_html) == normalize_html(expected_html) + except UnicodeDecodeError, e: + unicode_error = e + passed = False else: passed = actual_html == expected_html if passed: @@ -46,11 +51,16 @@ def do_test(test, normalize): else: print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) sys.stdout.write(test['markdown']) - expected_html_lines = expected_html.splitlines(True) - actual_html_lines = actual_html.splitlines(True) - for diffline in unified_diff(expected_html_lines, actual_html_lines, - "expected HTML", "actual HTML"): - sys.stdout.write(diffline) + if unicode_error: + print "Unicode error: " + str(unicode_error) + print repr(expected_html) + print repr(actual_html) + else: + expected_html_lines = expected_html.splitlines(True) + actual_html_lines = actual_html.splitlines(True) + for diffline in unified_diff(expected_html_lines, actual_html_lines, + "expected HTML", "actual HTML"): + sys.stdout.write(diffline) sys.stdout.write('\n') return 'fail' else: