forked from commonmark/cmark
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for non-matching entities (#161)
* Add test to illustrate issue * Provide some test fixes * Don't neglect CounterClockwiseContourIntegral * Fix ~10% of cases not matching strncmp returns 0 if the first 'len' bytes of cmark_entities[i].entity match s; we check equal length in the first if by checking if cmark_entities[i].entity[len] == 0, but we neglect the case where cmp == 0 && cmark_entities[i].entity[len] != 0. This should be treated as the same as cmp < 0, because strcmp("abc", "abcd") < 0. * Don't depend on py3.3 in tests
- Loading branch information
Showing
5 changed files
with
75 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
import re | ||
import os | ||
import argparse | ||
import sys | ||
import platform | ||
import html | ||
from cmark import CMark | ||
|
||
def get_entities(): | ||
regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}' | ||
with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f: | ||
code = f.read() | ||
entities = [] | ||
for entity, utf8 in re.findall(regex, code, re.MULTILINE): | ||
utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8') | ||
entities.append((entity, utf8)) | ||
return entities | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description='Run cmark tests.') | ||
parser.add_argument('--program', dest='program', nargs='?', default=None, | ||
help='program to test') | ||
parser.add_argument('--library-dir', dest='library_dir', nargs='?', | ||
default=None, help='directory containing dynamic library') | ||
args = parser.parse_args(sys.argv[1:]) | ||
|
||
cmark = CMark(prog=args.program, library_dir=args.library_dir) | ||
|
||
entities = get_entities() | ||
|
||
passed = 0 | ||
errored = 0 | ||
failed = 0 | ||
|
||
exceptions = { | ||
'quot': '"', | ||
'QUOT': '"', | ||
|
||
# These are broken, but I'm not too worried about them. | ||
'nvlt': '<⃒', | ||
'nvgt': '>⃒', | ||
} | ||
|
||
print("Testing entities:") | ||
for entity, utf8 in entities: | ||
[rc, actual, err] = cmark.to_html("&{};".format(entity)) | ||
check = exceptions.get(entity, utf8) | ||
|
||
if rc != 0: | ||
errored += 1 | ||
print(entity, '[ERRORED (return code {})]'.format(rc)) | ||
print(err) | ||
elif check in actual: | ||
print(entity, '[PASSED]') | ||
passed += 1 | ||
else: | ||
print(entity, '[FAILED]') | ||
print(repr(actual)) | ||
failed += 1 | ||
|
||
print("{} passed, {} failed, {} errored".format(passed, failed, errored)) | ||
if failed == 0 and errored == 0: | ||
exit(0) | ||
else: | ||
exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters