From 5e385d0744764cc22c66af898ed11859df0b6225 Mon Sep 17 00:00:00 2001 From: Quentin Carbonneaux Date: Thu, 7 May 2020 12:26:37 +0200 Subject: [PATCH 1/3] Cleanup formatting in .. coqtop:: directives --- doc/tools/coqrst/coqdoc/main.py | 22 +++++----------------- doc/tools/coqrst/coqdomain.py | 14 +++++++++++--- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/doc/tools/coqrst/coqdoc/main.py b/doc/tools/coqrst/coqdoc/main.py index a3fc069e6c85..9a12d5e6f8c1 100644 --- a/doc/tools/coqrst/coqdoc/main.py +++ b/doc/tools/coqrst/coqdoc/main.py @@ -48,28 +48,16 @@ def coqdoc(coq_code, coqdoc_bin=None): finally: os.remove(filename) -def is_whitespace_string(elem): - return isinstance(elem, NavigableString) and elem.strip() == "" - -def strip_soup(soup, pred): - """Strip elements matching pred from front and tail of soup.""" - while soup.contents and pred(soup.contents[-1]): - soup.contents.pop() - - skip = 0 - for elem in soup.contents: - if not pred(elem): - break - skip += 1 - - soup.contents[:] = soup.contents[skip:] - def lex(source): """Convert source into a stream of (css_classes, token_string).""" coqdoc_output = coqdoc(source) soup = BeautifulSoup(coqdoc_output, "html.parser") root = soup.find(class_='code') - strip_soup(root, is_whitespace_string) + if root.children: + # strip the leading '\n' + first = next(root.children) + if isinstance(first, NavigableString) and first.string[0] == '\n': + first.string.replace_with(first.string[1:]) for elem in root.children: if isinstance(elem, NavigableString): yield [], elem diff --git a/doc/tools/coqrst/coqdomain.py b/doc/tools/coqrst/coqdomain.py index 9d51d2198aeb..253fc5283fce 100644 --- a/doc/tools/coqrst/coqdomain.py +++ b/doc/tools/coqrst/coqdomain.py @@ -905,9 +905,17 @@ def is_coqtop_block(node): return isinstance(node, nodes.Element) and 'coqtop_options' in node @staticmethod - def split_sentences(source): + def split_sentences(node): """Split Coq sentences in source. Could be improved.""" - return re.split(r"(?<=(? Date: Fri, 8 May 2020 13:29:50 +0200 Subject: [PATCH 2/3] Simplify splitting --- doc/tools/coqrst/coqdomain.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/doc/tools/coqrst/coqdomain.py b/doc/tools/coqrst/coqdomain.py index 253fc5283fce..df11960403cc 100644 --- a/doc/tools/coqrst/coqdomain.py +++ b/doc/tools/coqrst/coqdomain.py @@ -905,17 +905,13 @@ def is_coqtop_block(node): return isinstance(node, nodes.Element) and 'coqtop_options' in node @staticmethod - def split_sentences(node): - """Split Coq sentences in source. Could be improved.""" - lines = map(lambda s: s.rstrip(), node.rawsource.splitlines()) - out = [""] - for l in lines: - out[-1] = out[-1] + l + "\n" - if l.endswith("."): - out.append("") - if out[-1] == "": - out.pop() - return out + def split_lines(source): + """Split Coq input in chunks + + A chunk is a minimal sequence of consecutive lines of the input that + ends with a '.' + """ + return re.split(r"(?<=(? Date: Fri, 8 May 2020 19:12:50 +0200 Subject: [PATCH 3/3] Recursively look for the first string node --- doc/tools/coqrst/coqdoc/main.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/doc/tools/coqrst/coqdoc/main.py b/doc/tools/coqrst/coqdoc/main.py index 9a12d5e6f8c1..de0d912c03b7 100644 --- a/doc/tools/coqrst/coqdoc/main.py +++ b/doc/tools/coqrst/coqdoc/main.py @@ -48,16 +48,22 @@ def coqdoc(coq_code, coqdoc_bin=None): finally: os.remove(filename) +def first_string_node(node): + """Return the first string node, or None if does not exist""" + while node.children: + node = next(node.children) + if isinstance(node, NavigableString): + return node + def lex(source): """Convert source into a stream of (css_classes, token_string).""" coqdoc_output = coqdoc(source) soup = BeautifulSoup(coqdoc_output, "html.parser") root = soup.find(class_='code') - if root.children: - # strip the leading '\n' - first = next(root.children) - if isinstance(first, NavigableString) and first.string[0] == '\n': - first.string.replace_with(first.string[1:]) + # strip the leading '\n' + first = first_string_node(root) + if first and first.string[0] == '\n': + first.string.replace_with(first.string[1:]) for elem in root.children: if isinstance(elem, NavigableString): yield [], elem