From e9508d203ace70bc0055f4671a5a4e7736016230 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 19:40:36 +0200 Subject: [PATCH 1/6] Add SyntaxTreeNode --- markdown_it/token.py | 205 ++++++++++++++++++++++++++++++++- markdown_it/utils.py | 11 ++ tests/test_syntax_tree_node.py | 15 +++ 3 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 tests/test_syntax_tree_node.py diff --git a/markdown_it/token.py b/markdown_it/token.py index 3bc5b659..4ef5a43d 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -1,7 +1,9 @@ -from typing import Any, List, Optional, Union +from typing import Any, List, Optional, Union, NamedTuple, Sequence, Tuple, Dict import attr +from .utils import _removesuffix + @attr.s(slots=True) class Token: @@ -166,3 +168,204 @@ def nest_tokens(tokens: List[Token]) -> List[Union[Token, NestedTokens]]: child.children = nest_tokens(nested_tokens[1:-1]) return output + + +class SyntaxTreeNode: + """A Markdown syntax tree node. + + A class that can be used to construct a tree representation of a linear + `markdown-it-py` token stream. + + Each node in the tree represents either: + - root of the Markdown document + - a single unnested `Token` + - a `Token` "_open" and "_close" token pair, and the tokens nested in + between + """ + + class _NesterTokens(NamedTuple): + opening: Token + closing: Token + + def __init__(self) -> None: + """Initialize a root node with no children. + + You probably need `SyntaxTreeNode.from_tokens` instead. + """ + # Only nodes representing an unnested token have self.token + self.token: Optional[Token] = None + + # Only containers have nester tokens + self.nester_tokens: Optional[SyntaxTreeNode._NesterTokens] = None + + # Root node does not have self.parent + self.parent: Optional["SyntaxTreeNode"] = None + + # Empty list unless a non-empty container, or unnested token that has + # children (i.e. inline or img) + self.children: List["SyntaxTreeNode"] = [] + + @staticmethod + def from_tokens(tokens: Sequence[Token]) -> "SyntaxTreeNode": + root = SyntaxTreeNode() + root._set_children_from_tokens(tokens) + return root + + def to_tokens(self) -> List[Token]: + def recursive_collect_tokens(node: "SyntaxTreeNode", token_list: list) -> None: + if node.type == "root": + for child in node.children: + recursive_collect_tokens(child, token_list) + elif node.token: + token_list.append(node.token) + else: + assert node.nester_tokens + token_list.append(node.nester_tokens.opening) + for child in node.children: + recursive_collect_tokens(child, token_list) + token_list.append(node.nester_tokens.closing) + + tokens: List[Token] = [] + recursive_collect_tokens(self, tokens) + return tokens + + @property + def siblings(self) -> Sequence["SyntaxTreeNode"]: + if not self.parent: + return [self] + return self.parent.children + + @property + def type(self) -> str: + if not self.token and not self.nester_tokens: + return "root" + if self.token: + return self.token.type + assert self.nester_tokens + return _removesuffix(self.nester_tokens.opening.type, "_open") + + @property + def next_sibling(self) -> Optional["SyntaxTreeNode"]: + self_index = self.siblings.index(self) + if self_index + 1 < len(self.siblings): + return self.siblings[self_index + 1] + return None + + @property + def previous_sibling(self) -> Optional["SyntaxTreeNode"]: + self_index = self.siblings.index(self) + if self_index - 1 >= 0: + return self.siblings[self_index - 1] + return None + + def _make_child( + self, + *, + token: Optional[Token] = None, + nester_tokens: Optional[_NesterTokens] = None, + ) -> "SyntaxTreeNode": + if token and nester_tokens or not token and not nester_tokens: + raise ValueError("must specify either `token` or `nester_tokens`") + child = SyntaxTreeNode() + if token: + child.token = token + else: + child.nester_tokens = nester_tokens + child.parent = self + self.children.append(child) + return child + + def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None: + """Convert the token stream to a tree structure.""" + reversed_tokens = list(reversed(tokens)) + while reversed_tokens: + token = reversed_tokens.pop() + + if token.nesting == 0: + child = self._make_child(token=token) + if token.children: + child._set_children_from_tokens(token.children) + continue + + assert token.nesting == 1 + + nested_tokens = [token] + nesting = 1 + while reversed_tokens and nesting != 0: + token = reversed_tokens.pop() + nested_tokens.append(token) + nesting += token.nesting + if nesting != 0: + raise ValueError(f"unclosed tokens starting {nested_tokens[0]}") + + child = self._make_child( + nester_tokens=SyntaxTreeNode._NesterTokens( + nested_tokens[0], nested_tokens[-1] + ) + ) + child._set_children_from_tokens(nested_tokens[1:-1]) + + # NOTE: + # The values of the properties defined below directly map to properties + # of the underlying `Token`s. A root node does not translate to a `Token` + # object, so calling these property getters on a root node will raise an + # `AttributeError`. + # + # There is no mapping for `Token.nesting` because getting a `bool` of + # `SyntaxTreeNode.nester_tokens` provides that data, and can be called on + # any node type, including root. + + def _attribute_token(self) -> Token: + if self.token: + return self.token + if self.nester_tokens: + return self.nester_tokens.opening + raise AttributeError("Root node does not have the accessed attribute") + + @property + def tag(self) -> str: + return self._attribute_token().tag + + @property + def attrs(self) -> Dict[str, Any]: + token_attrs = self._attribute_token().attrs + if token_attrs is None: + return {} + # Type ignore because `Token`s attribute types are not perfect + return dict(token_attrs) # type: ignore + + @property + def map(self) -> Optional[Tuple[int, int]]: + map_ = self._attribute_token().map + if map_: + # Type ignore because `Token`s attribute types are not perfect + return tuple(map_) # type: ignore + return None + + @property + def level(self) -> int: + return self._attribute_token().level + + @property + def content(self) -> str: + return self._attribute_token().content + + @property + def markup(self) -> str: + return self._attribute_token().markup + + @property + def info(self) -> str: + return self._attribute_token().info + + @property + def meta(self) -> dict: + return self._attribute_token().meta + + @property + def block(self) -> bool: + return self._attribute_token().block + + @property + def hidden(self) -> bool: + return self._attribute_token().hidden diff --git a/markdown_it/utils.py b/markdown_it/utils.py index 4ac02d27..013f4db3 100644 --- a/markdown_it/utils.py +++ b/markdown_it/utils.py @@ -37,3 +37,14 @@ def read_fixture_file(path: Union[str, Path]) -> List[list]: last_pos = i return tests + + +def _removesuffix(string: str, suffix: str) -> str: + """Remove a suffix from a string. + + Replace this with str.removesuffix() from stdlib when minimum Python + version is 3.9. + """ + if suffix and string.endswith(suffix): + return string[: -len(suffix)] + return string diff --git a/tests/test_syntax_tree_node.py b/tests/test_syntax_tree_node.py new file mode 100644 index 00000000..8565d4dc --- /dev/null +++ b/tests/test_syntax_tree_node.py @@ -0,0 +1,15 @@ +from markdown_it import MarkdownIt +from markdown_it.token import SyntaxTreeNode + +EXAMPLE_MARKDOWN = """ +## Heading here + +Some paragraph text and **emphasis here** +""" + + +def test_tree_to_tokens_conversion(): + mdit = MarkdownIt() + tokens = mdit.parse(EXAMPLE_MARKDOWN) + tokens_after_roundtrip = SyntaxTreeNode.from_tokens(tokens).to_tokens() + assert tokens == tokens_after_roundtrip From 4f1dd36970ce94eb21df51c244efb7d30155781c Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 20:07:54 +0200 Subject: [PATCH 2/6] Deprecate nest_tokens --- markdown_it/token.py | 212 +----------------- markdown_it/tree.py | 209 +++++++++++++++++ ...{test_syntax_tree_node.py => test_tree.py} | 2 +- 3 files changed, 218 insertions(+), 205 deletions(-) create mode 100644 markdown_it/tree.py rename tests/{test_syntax_tree_node.py => test_tree.py} (88%) diff --git a/markdown_it/token.py b/markdown_it/token.py index 4ef5a43d..989ef62e 100644 --- a/markdown_it/token.py +++ b/markdown_it/token.py @@ -1,9 +1,8 @@ -from typing import Any, List, Optional, Union, NamedTuple, Sequence, Tuple, Dict +from typing import Any, List, Optional, Union +import warnings import attr -from .utils import _removesuffix - @attr.s(slots=True) class Token: @@ -136,6 +135,12 @@ def nest_tokens(tokens: List[Token]) -> List[Union[Token, NestedTokens]]: ``NestedTokens`` contain the open and close tokens and a list of children of all tokens in between (recursively nested) """ + warnings.warn( + "`markdown_it.token.nest_tokens` and `markdown_it.token.NestedTokens`" + " are deprecated. Please migrate to `markdown_it.tree.SyntaxTreeNode`", + DeprecationWarning, + ) + output: List[Union[Token, NestedTokens]] = [] tokens = list(reversed(tokens)) @@ -168,204 +173,3 @@ def nest_tokens(tokens: List[Token]) -> List[Union[Token, NestedTokens]]: child.children = nest_tokens(nested_tokens[1:-1]) return output - - -class SyntaxTreeNode: - """A Markdown syntax tree node. - - A class that can be used to construct a tree representation of a linear - `markdown-it-py` token stream. - - Each node in the tree represents either: - - root of the Markdown document - - a single unnested `Token` - - a `Token` "_open" and "_close" token pair, and the tokens nested in - between - """ - - class _NesterTokens(NamedTuple): - opening: Token - closing: Token - - def __init__(self) -> None: - """Initialize a root node with no children. - - You probably need `SyntaxTreeNode.from_tokens` instead. - """ - # Only nodes representing an unnested token have self.token - self.token: Optional[Token] = None - - # Only containers have nester tokens - self.nester_tokens: Optional[SyntaxTreeNode._NesterTokens] = None - - # Root node does not have self.parent - self.parent: Optional["SyntaxTreeNode"] = None - - # Empty list unless a non-empty container, or unnested token that has - # children (i.e. inline or img) - self.children: List["SyntaxTreeNode"] = [] - - @staticmethod - def from_tokens(tokens: Sequence[Token]) -> "SyntaxTreeNode": - root = SyntaxTreeNode() - root._set_children_from_tokens(tokens) - return root - - def to_tokens(self) -> List[Token]: - def recursive_collect_tokens(node: "SyntaxTreeNode", token_list: list) -> None: - if node.type == "root": - for child in node.children: - recursive_collect_tokens(child, token_list) - elif node.token: - token_list.append(node.token) - else: - assert node.nester_tokens - token_list.append(node.nester_tokens.opening) - for child in node.children: - recursive_collect_tokens(child, token_list) - token_list.append(node.nester_tokens.closing) - - tokens: List[Token] = [] - recursive_collect_tokens(self, tokens) - return tokens - - @property - def siblings(self) -> Sequence["SyntaxTreeNode"]: - if not self.parent: - return [self] - return self.parent.children - - @property - def type(self) -> str: - if not self.token and not self.nester_tokens: - return "root" - if self.token: - return self.token.type - assert self.nester_tokens - return _removesuffix(self.nester_tokens.opening.type, "_open") - - @property - def next_sibling(self) -> Optional["SyntaxTreeNode"]: - self_index = self.siblings.index(self) - if self_index + 1 < len(self.siblings): - return self.siblings[self_index + 1] - return None - - @property - def previous_sibling(self) -> Optional["SyntaxTreeNode"]: - self_index = self.siblings.index(self) - if self_index - 1 >= 0: - return self.siblings[self_index - 1] - return None - - def _make_child( - self, - *, - token: Optional[Token] = None, - nester_tokens: Optional[_NesterTokens] = None, - ) -> "SyntaxTreeNode": - if token and nester_tokens or not token and not nester_tokens: - raise ValueError("must specify either `token` or `nester_tokens`") - child = SyntaxTreeNode() - if token: - child.token = token - else: - child.nester_tokens = nester_tokens - child.parent = self - self.children.append(child) - return child - - def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None: - """Convert the token stream to a tree structure.""" - reversed_tokens = list(reversed(tokens)) - while reversed_tokens: - token = reversed_tokens.pop() - - if token.nesting == 0: - child = self._make_child(token=token) - if token.children: - child._set_children_from_tokens(token.children) - continue - - assert token.nesting == 1 - - nested_tokens = [token] - nesting = 1 - while reversed_tokens and nesting != 0: - token = reversed_tokens.pop() - nested_tokens.append(token) - nesting += token.nesting - if nesting != 0: - raise ValueError(f"unclosed tokens starting {nested_tokens[0]}") - - child = self._make_child( - nester_tokens=SyntaxTreeNode._NesterTokens( - nested_tokens[0], nested_tokens[-1] - ) - ) - child._set_children_from_tokens(nested_tokens[1:-1]) - - # NOTE: - # The values of the properties defined below directly map to properties - # of the underlying `Token`s. A root node does not translate to a `Token` - # object, so calling these property getters on a root node will raise an - # `AttributeError`. - # - # There is no mapping for `Token.nesting` because getting a `bool` of - # `SyntaxTreeNode.nester_tokens` provides that data, and can be called on - # any node type, including root. - - def _attribute_token(self) -> Token: - if self.token: - return self.token - if self.nester_tokens: - return self.nester_tokens.opening - raise AttributeError("Root node does not have the accessed attribute") - - @property - def tag(self) -> str: - return self._attribute_token().tag - - @property - def attrs(self) -> Dict[str, Any]: - token_attrs = self._attribute_token().attrs - if token_attrs is None: - return {} - # Type ignore because `Token`s attribute types are not perfect - return dict(token_attrs) # type: ignore - - @property - def map(self) -> Optional[Tuple[int, int]]: - map_ = self._attribute_token().map - if map_: - # Type ignore because `Token`s attribute types are not perfect - return tuple(map_) # type: ignore - return None - - @property - def level(self) -> int: - return self._attribute_token().level - - @property - def content(self) -> str: - return self._attribute_token().content - - @property - def markup(self) -> str: - return self._attribute_token().markup - - @property - def info(self) -> str: - return self._attribute_token().info - - @property - def meta(self) -> dict: - return self._attribute_token().meta - - @property - def block(self) -> bool: - return self._attribute_token().block - - @property - def hidden(self) -> bool: - return self._attribute_token().hidden diff --git a/markdown_it/tree.py b/markdown_it/tree.py new file mode 100644 index 00000000..a88ebb0c --- /dev/null +++ b/markdown_it/tree.py @@ -0,0 +1,209 @@ +"""A tree representation of a linear markdown-it token stream. + +This module is not part of upstream JavaScript markdown-it. +""" +from typing import NamedTuple, Sequence, Tuple, Dict, List, Optional, Any + +from .token import Token +from .utils import _removesuffix + + +class SyntaxTreeNode: + """A Markdown syntax tree node. + + A class that can be used to construct a tree representation of a linear + `markdown-it-py` token stream. + + Each node in the tree represents either: + - root of the Markdown document + - a single unnested `Token` + - a `Token` "_open" and "_close" token pair, and the tokens nested in + between + """ + + class _NesterTokens(NamedTuple): + opening: Token + closing: Token + + def __init__(self) -> None: + """Initialize a root node with no children. + + You probably need `SyntaxTreeNode.from_tokens` instead. + """ + # Only nodes representing an unnested token have self.token + self.token: Optional[Token] = None + + # Only containers have nester tokens + self.nester_tokens: Optional[SyntaxTreeNode._NesterTokens] = None + + # Root node does not have self.parent + self.parent: Optional["SyntaxTreeNode"] = None + + # Empty list unless a non-empty container, or unnested token that has + # children (i.e. inline or img) + self.children: List["SyntaxTreeNode"] = [] + + @staticmethod + def from_tokens(tokens: Sequence[Token]) -> "SyntaxTreeNode": + root = SyntaxTreeNode() + root._set_children_from_tokens(tokens) + return root + + def to_tokens(self) -> List[Token]: + def recursive_collect_tokens(node: "SyntaxTreeNode", token_list: list) -> None: + if node.type == "root": + for child in node.children: + recursive_collect_tokens(child, token_list) + elif node.token: + token_list.append(node.token) + else: + assert node.nester_tokens + token_list.append(node.nester_tokens.opening) + for child in node.children: + recursive_collect_tokens(child, token_list) + token_list.append(node.nester_tokens.closing) + + tokens: List[Token] = [] + recursive_collect_tokens(self, tokens) + return tokens + + @property + def siblings(self) -> Sequence["SyntaxTreeNode"]: + if not self.parent: + return [self] + return self.parent.children + + @property + def type(self) -> str: + if not self.token and not self.nester_tokens: + return "root" + if self.token: + return self.token.type + assert self.nester_tokens + return _removesuffix(self.nester_tokens.opening.type, "_open") + + @property + def next_sibling(self) -> Optional["SyntaxTreeNode"]: + self_index = self.siblings.index(self) + if self_index + 1 < len(self.siblings): + return self.siblings[self_index + 1] + return None + + @property + def previous_sibling(self) -> Optional["SyntaxTreeNode"]: + self_index = self.siblings.index(self) + if self_index - 1 >= 0: + return self.siblings[self_index - 1] + return None + + def _make_child( + self, + *, + token: Optional[Token] = None, + nester_tokens: Optional[_NesterTokens] = None, + ) -> "SyntaxTreeNode": + if token and nester_tokens or not token and not nester_tokens: + raise ValueError("must specify either `token` or `nester_tokens`") + child = SyntaxTreeNode() + if token: + child.token = token + else: + child.nester_tokens = nester_tokens + child.parent = self + self.children.append(child) + return child + + def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None: + """Convert the token stream to a tree structure.""" + reversed_tokens = list(reversed(tokens)) + while reversed_tokens: + token = reversed_tokens.pop() + + if token.nesting == 0: + child = self._make_child(token=token) + if token.children: + child._set_children_from_tokens(token.children) + continue + + assert token.nesting == 1 + + nested_tokens = [token] + nesting = 1 + while reversed_tokens and nesting != 0: + token = reversed_tokens.pop() + nested_tokens.append(token) + nesting += token.nesting + if nesting != 0: + raise ValueError(f"unclosed tokens starting {nested_tokens[0]}") + + child = self._make_child( + nester_tokens=SyntaxTreeNode._NesterTokens( + nested_tokens[0], nested_tokens[-1] + ) + ) + child._set_children_from_tokens(nested_tokens[1:-1]) + + # NOTE: + # The values of the properties defined below directly map to properties + # of the underlying `Token`s. A root node does not translate to a `Token` + # object, so calling these property getters on a root node will raise an + # `AttributeError`. + # + # There is no mapping for `Token.nesting` because getting a `bool` of + # `SyntaxTreeNode.nester_tokens` provides that data, and can be called on + # any node type, including root. + + def _attribute_token(self) -> Token: + if self.token: + return self.token + if self.nester_tokens: + return self.nester_tokens.opening + raise AttributeError("Root node does not have the accessed attribute") + + @property + def tag(self) -> str: + return self._attribute_token().tag + + @property + def attrs(self) -> Dict[str, Any]: + token_attrs = self._attribute_token().attrs + if token_attrs is None: + return {} + # Type ignore because `Token`s attribute types are not perfect + return dict(token_attrs) # type: ignore + + @property + def map(self) -> Optional[Tuple[int, int]]: + map_ = self._attribute_token().map + if map_: + # Type ignore because `Token`s attribute types are not perfect + return tuple(map_) # type: ignore + return None + + @property + def level(self) -> int: + return self._attribute_token().level + + @property + def content(self) -> str: + return self._attribute_token().content + + @property + def markup(self) -> str: + return self._attribute_token().markup + + @property + def info(self) -> str: + return self._attribute_token().info + + @property + def meta(self) -> dict: + return self._attribute_token().meta + + @property + def block(self) -> bool: + return self._attribute_token().block + + @property + def hidden(self) -> bool: + return self._attribute_token().hidden diff --git a/tests/test_syntax_tree_node.py b/tests/test_tree.py similarity index 88% rename from tests/test_syntax_tree_node.py rename to tests/test_tree.py index 8565d4dc..c4b29ebe 100644 --- a/tests/test_syntax_tree_node.py +++ b/tests/test_tree.py @@ -1,5 +1,5 @@ from markdown_it import MarkdownIt -from markdown_it.token import SyntaxTreeNode +from markdown_it.tree import SyntaxTreeNode EXAMPLE_MARKDOWN = """ ## Heading here From de1a0551f4f7cd73e1d3c3506d89dfba06bcbe04 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 20:14:19 +0200 Subject: [PATCH 3/6] More accurate typehint --- markdown_it/tree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/markdown_it/tree.py b/markdown_it/tree.py index a88ebb0c..2ce07c77 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -50,7 +50,9 @@ def from_tokens(tokens: Sequence[Token]) -> "SyntaxTreeNode": return root def to_tokens(self) -> List[Token]: - def recursive_collect_tokens(node: "SyntaxTreeNode", token_list: list) -> None: + def recursive_collect_tokens( + node: "SyntaxTreeNode", token_list: List[Token] + ) -> None: if node.type == "root": for child in node.children: recursive_collect_tokens(child, token_list) From 55ef65e7b2050e3a6814df895d7e5f9c2a0bca1d Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 21:06:08 +0200 Subject: [PATCH 4/6] Add is_nested. Add docstrings --- markdown_it/tree.py | 49 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 2ce07c77..73eaecf5 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -12,7 +12,8 @@ class SyntaxTreeNode: """A Markdown syntax tree node. A class that can be used to construct a tree representation of a linear - `markdown-it-py` token stream. + `markdown-it-py` token stream. Use `SyntaxTreeNode.from_tokens` to + initialize instead of the `__init__` method. Each node in the tree represents either: - root of the Markdown document @@ -45,11 +46,17 @@ def __init__(self) -> None: @staticmethod def from_tokens(tokens: Sequence[Token]) -> "SyntaxTreeNode": + """Instantiate a `SyntaxTreeNode` from a token stream. + + This is the standard method for instantiating `SyntaxTreeNode`. + """ root = SyntaxTreeNode() root._set_children_from_tokens(tokens) return root def to_tokens(self) -> List[Token]: + """Recover the linear token stream.""" + def recursive_collect_tokens( node: "SyntaxTreeNode", token_list: List[Token] ) -> None: @@ -69,14 +76,35 @@ def recursive_collect_tokens( recursive_collect_tokens(self, tokens) return tokens + @property + def is_nested(self) -> bool: + """Is this node nested?. + + Returns `True` if the node represents a `Token` pair and tokens in the + sequence between them, where `Token.nesting` of the first `Token` in + the pair is 1 and nesting of the other `Token` is -1. + """ + return bool(self.nester_tokens) + @property def siblings(self) -> Sequence["SyntaxTreeNode"]: + """Get siblings of the node. + + Gets the whole group of siblings, including self. + """ if not self.parent: return [self] return self.parent.children @property def type(self) -> str: + """Get a string type of the represented syntax. + + - "root" for root nodes + - `Token.type` if the node represents an unnested token + - `Token.type` of the opening token, with "_open" suffix stripped, if + the node represents a nester token pair + """ if not self.token and not self.nester_tokens: return "root" if self.token: @@ -86,6 +114,10 @@ def type(self) -> str: @property def next_sibling(self) -> Optional["SyntaxTreeNode"]: + """Get the next node in the sequence of siblings. + + Returns `None` if this is the last sibling. + """ self_index = self.siblings.index(self) if self_index + 1 < len(self.siblings): return self.siblings[self_index + 1] @@ -93,6 +125,10 @@ def next_sibling(self) -> Optional["SyntaxTreeNode"]: @property def previous_sibling(self) -> Optional["SyntaxTreeNode"]: + """Get the previous node in the sequence of siblings. + + Returns `None` if this is the first sibling. + """ self_index = self.siblings.index(self) if self_index - 1 >= 0: return self.siblings[self_index - 1] @@ -104,6 +140,7 @@ def _make_child( token: Optional[Token] = None, nester_tokens: Optional[_NesterTokens] = None, ) -> "SyntaxTreeNode": + """Make and return a child node for `self`.""" if token and nester_tokens or not token and not nester_tokens: raise ValueError("must specify either `token` or `nester_tokens`") child = SyntaxTreeNode() @@ -116,7 +153,8 @@ def _make_child( return child def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None: - """Convert the token stream to a tree structure.""" + """Convert the token stream to a tree structure and set the resulting + nodes as children of `self`.""" reversed_tokens = list(reversed(tokens)) while reversed_tokens: token = reversed_tokens.pop() @@ -151,11 +189,12 @@ def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None: # object, so calling these property getters on a root node will raise an # `AttributeError`. # - # There is no mapping for `Token.nesting` because getting a `bool` of - # `SyntaxTreeNode.nester_tokens` provides that data, and can be called on - # any node type, including root. + # There is no mapping for `Token.nesting` because the `is_nested` property + # provides that data, and can be called on any node type, including root. def _attribute_token(self) -> Token: + """Return the `Token` that is used as the data source for the + properties defined below.""" if self.token: return self.token if self.nester_tokens: From 98875334271d8739b5d2900012c47b0c5827f756 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 21:24:15 +0200 Subject: [PATCH 5/6] Add property docstrings --- markdown_it/tree.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/markdown_it/tree.py b/markdown_it/tree.py index 73eaecf5..62d16c79 100644 --- a/markdown_it/tree.py +++ b/markdown_it/tree.py @@ -203,10 +203,12 @@ def _attribute_token(self) -> Token: @property def tag(self) -> str: + """html tag name, e.g. \"p\"""" return self._attribute_token().tag @property def attrs(self) -> Dict[str, Any]: + """Html attributes.""" token_attrs = self._attribute_token().attrs if token_attrs is None: return {} @@ -215,6 +217,7 @@ def attrs(self) -> Dict[str, Any]: @property def map(self) -> Optional[Tuple[int, int]]: + """Source map info. Format: `Tuple[ line_begin, line_end ]`""" map_ = self._attribute_token().map if map_: # Type ignore because `Token`s attribute types are not perfect @@ -223,28 +226,37 @@ def map(self) -> Optional[Tuple[int, int]]: @property def level(self) -> int: + """nesting level, the same as `state.level`""" return self._attribute_token().level @property def content(self) -> str: + """In a case of self-closing tag (code, html, fence, etc.), it + has contents of this tag.""" return self._attribute_token().content @property def markup(self) -> str: + """'*' or '_' for emphasis, fence string for fence, etc.""" return self._attribute_token().markup @property def info(self) -> str: + """fence infostring""" return self._attribute_token().info @property def meta(self) -> dict: + """A place for plugins to store an arbitrary data.""" return self._attribute_token().meta @property def block(self) -> bool: + """True for block-level tokens, false for inline tokens.""" return self._attribute_token().block @property def hidden(self) -> bool: + """If it's true, ignore this element when rendering. + Used for tight lists to hide paragraphs.""" return self._attribute_token().hidden From c96b2cbdf5ffe06a42ce881e030f03ad0b836a9c Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen Date: Thu, 25 Feb 2021 22:11:40 +0200 Subject: [PATCH 6/6] Add tests --- tests/test_tree.py | 47 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/tests/test_tree.py b/tests/test_tree.py index c4b29ebe..d6dadec9 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -4,12 +4,53 @@ EXAMPLE_MARKDOWN = """ ## Heading here -Some paragraph text and **emphasis here** +Some paragraph text and **emphasis here** and more text here. """ def test_tree_to_tokens_conversion(): - mdit = MarkdownIt() - tokens = mdit.parse(EXAMPLE_MARKDOWN) + tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) tokens_after_roundtrip = SyntaxTreeNode.from_tokens(tokens).to_tokens() assert tokens == tokens_after_roundtrip + + +def test_property_passthrough(): + tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) + heading_open = tokens[0] + tree = SyntaxTreeNode.from_tokens(tokens) + heading_node = tree.children[0] + assert heading_open.tag == heading_node.tag + assert tuple(heading_open.map) == heading_node.map + assert heading_open.level == heading_node.level + assert heading_open.content == heading_node.content + assert heading_open.markup == heading_node.markup + assert heading_open.info == heading_node.info + assert heading_open.meta == heading_node.meta + assert heading_open.block == heading_node.block + assert heading_open.hidden == heading_node.hidden + + +def test_type(): + tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) + tree = SyntaxTreeNode.from_tokens(tokens) + # Root type is "root" + assert tree.type == "root" + # "_open" suffix must be stripped from nested token type + assert tree.children[0].type == "heading" + # For unnested tokens, node type must remain same as token type + assert tree.children[0].children[0].type == "inline" + + +def test_sibling_traverse(): + tokens = MarkdownIt().parse(EXAMPLE_MARKDOWN) + tree = SyntaxTreeNode.from_tokens(tokens) + paragraph_inline_node = tree.children[1].children[0] + text_node = paragraph_inline_node.children[0] + assert text_node.type == "text" + strong_node = text_node.next_sibling + assert strong_node.type == "strong" + another_text_node = strong_node.next_sibling + assert another_text_node.type == "text" + assert another_text_node.next_sibling is None + assert another_text_node.previous_sibling.previous_sibling == text_node + assert text_node.previous_sibling is None