Skip to content

Commit

Permalink
Fix for non-matching entities (#161)
Browse files Browse the repository at this point in the history
* Add test to illustrate issue

* Provide some test fixes

* Don't neglect CounterClockwiseContourIntegral

* Fix ~10% of cases not matching

strncmp returns 0 if the first 'len' bytes of cmark_entities[i].entity
match s; we check equal length in the first if by checking if
cmark_entities[i].entity[len] == 0, but we neglect the case where cmp ==
0 && cmark_entities[i].entity[len] != 0.  This should be treated as the
same as cmp < 0, because strcmp("abc", "abcd") < 0.

* Don't depend on py3.3 in tests
  • Loading branch information
Yuki Izumi authored and jgm committed Nov 4, 2016
1 parent 14fe768 commit 64e1394
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/entities.inc
Expand Up @@ -6,7 +6,7 @@ struct cmark_entity_node {
};

#define CMARK_ENTITY_MIN_LENGTH 2
#define CMARK_ENTITY_MAX_LENGTH 31
#define CMARK_ENTITY_MAX_LENGTH 32
#define CMARK_NUM_ENTITIES 2125

static const struct cmark_entity_node cmark_entities[] = {
Expand Down
2 changes: 1 addition & 1 deletion src/houdini_html_u.c
Expand Up @@ -16,7 +16,7 @@ static const unsigned char *S_lookup(int i, int low, int hi,
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
return (const unsigned char *)cmark_entities[i].bytes;
} else if (cmp < 0 && i > low) {
} else if (cmp <= 0 && i > low) {
j = i - ((i - low) / 2);
if (j == i)
j -= 1;
Expand Down
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Expand Up @@ -60,6 +60,10 @@ IF (PYTHONINTERP_FOUND)
"${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
)

add_test(entity_executable
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
"--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
)

ELSE(PYTHONINTERP_FOUND)

Expand Down
68 changes: 68 additions & 0 deletions test/entity_tests.py
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import os
import argparse
import sys
import platform
import html
from cmark import CMark

def get_entities():
regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
code = f.read()
entities = []
for entity, utf8 in re.findall(regex, code, re.MULTILINE):
utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
entities.append((entity, utf8))
return entities

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])

cmark = CMark(prog=args.program, library_dir=args.library_dir)

entities = get_entities()

passed = 0
errored = 0
failed = 0

exceptions = {
'quot': '&quot;',
'QUOT': '&quot;',

# These are broken, but I'm not too worried about them.
'nvlt': '&lt;⃒',
'nvgt': '&gt;⃒',
}

print("Testing entities:")
for entity, utf8 in entities:
[rc, actual, err] = cmark.to_html("&{};".format(entity))
check = exceptions.get(entity, utf8)

if rc != 0:
errored += 1
print(entity, '[ERRORED (return code {})]'.format(rc))
print(err)
elif check in actual:
print(entity, '[PASSED]')
passed += 1
else:
print(entity, '[FAILED]')
print(repr(actual))
failed += 1

print("{} passed, {} failed, {} errored".format(passed, failed, errored))
if failed == 0 and errored == 0:
exit(0)
else:
exit(1)
2 changes: 1 addition & 1 deletion tools/make_entities_inc.py
Expand Up @@ -20,7 +20,7 @@
};
#define CMARK_ENTITY_MIN_LENGTH 2
#define CMARK_ENTITY_MAX_LENGTH 31""")
#define CMARK_ENTITY_MAX_LENGTH 32""")

print("#define CMARK_NUM_ENTITIES " + str(len(entities)));

Expand Down

0 comments on commit 64e1394

Please sign in to comment.