From 4dca63475cd0ab2de156e2a7c043a7df3731018b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 7 Mar 2020 00:30:29 +0000 Subject: [PATCH] Update from fork Update mirroring: Fork: ExecutableBookProject/mistletoe Branch: myst Commit: 4840fcd0aa4fb5976ce470640e444c34fc8b8291 --- .travis.yml | 2 - .vscode/settings.json | 1 + README.md | 20 +- contrib/github_wiki.py | 4 +- contrib/jira_renderer.py | 14 +- contrib/mathjax.py | 4 +- contrib/scheme.py | 25 +- contrib/toc_renderer.py | 6 +- docs/using/index.md | 22 +- docstring.fmt.mustache | 20 + makefile | 27 - mistletoe/__init__.py | 18 +- mistletoe/base_elements.py | 188 +++++ mistletoe/block_tokenizer.py | 64 +- mistletoe/{block_token.py => block_tokens.py} | 771 ++++++++++-------- mistletoe/latex_token.py | 4 +- .../{core_tokens.py => nested_tokenizer.py} | 44 +- mistletoe/parse_context.py | 64 ++ .../renderers}/__init__.py | 0 .../{base_renderer.py => renderers/base.py} | 29 +- .../{html_renderer.py => renderers/html.py} | 20 +- .../{ast_renderer.py => renderers/json.py} | 27 +- .../{latex_renderer.py => renderers/latex.py} | 6 +- mistletoe/span_tokenizer.py | 5 +- mistletoe/{span_token.py => span_tokens.py} | 91 +-- setup.py | 4 +- test/commonmark/test_commonmark.py | 4 +- test/test_ast_renderer.py | 52 -- test/test_base_elements.py | 31 + test/test_block_token.py | 214 ++--- test/test_ci.sh | 50 -- test/test_contrib/test_github_wiki.py | 20 +- test/test_contrib/test_jira_renderer.py | 16 +- test/test_contrib/test_mathjax.py | 4 +- test/test_contrib/test_toc_renderer.py | 13 +- test/test_latex_token.py | 6 +- ...ore_tokens.py => test_nested_tokenizer.py} | 4 +- .../test_html_renderer.py | 45 +- test/test_renderers/test_json_renderer.py | 103 +++ .../test_json_renderer/test_basic.yml | 207 +++++ .../test_link_references.yml | 22 + .../test_latex_renderer.py | 20 +- test/test_samples/__init__.py | 0 test/{ => test_samples}/benchmark.py | 8 +- test/{samples => test_samples}/jquery.md | 0 test/{samples => test_samples}/syntax.md | 19 +- test/test_samples/test_samples.py | 15 + .../test_samples/test_jquery.html | 228 ++++++ .../test_samples/test_syntax.html | 717 ++++++++++++++++ test/test_span_token.py | 55 +- 50 files changed, 2429 insertions(+), 904 deletions(-) create mode 100644 docstring.fmt.mustache delete mode 100644 makefile create mode 100644 mistletoe/base_elements.py rename mistletoe/{block_token.py => block_tokens.py} (55%) rename mistletoe/{core_tokens.py => nested_tokenizer.py} (93%) create mode 100644 mistletoe/parse_context.py rename {test/samples => mistletoe/renderers}/__init__.py (100%) rename mistletoe/{base_renderer.py => renderers/base.py} (86%) rename mistletoe/{html_renderer.py => renderers/html.py} (94%) rename mistletoe/{ast_renderer.py => renderers/json.py} (50%) rename mistletoe/{latex_renderer.py => renderers/latex.py} (96%) rename mistletoe/{span_token.py => span_tokens.py} (69%) delete mode 100644 test/test_ast_renderer.py create mode 100644 test/test_base_elements.py delete mode 100755 test/test_ci.sh rename test/{test_core_tokens.py => test_nested_tokenizer.py} (97%) rename test/{ => test_renderers}/test_html_renderer.py (74%) create mode 100644 test/test_renderers/test_json_renderer.py create mode 100644 test/test_renderers/test_json_renderer/test_basic.yml create mode 100644 test/test_renderers/test_json_renderer/test_link_references.yml rename test/{ => test_renderers}/test_latex_renderer.py (87%) create mode 100644 test/test_samples/__init__.py rename test/{ => test_samples}/benchmark.py (90%) rename test/{samples => test_samples}/jquery.md (100%) rename test/{samples => test_samples}/syntax.md (99%) create mode 100644 test/test_samples/test_samples.py create mode 100644 test/test_samples/test_samples/test_jquery.html create mode 100644 test/test_samples/test_samples/test_syntax.html diff --git a/.travis.yml b/.travis.yml index b8a42df..f70b39a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,6 @@ language: python cache: pip matrix: include: - - python: 3.5 - env: TEST_TYPE="pytest" - python: 3.6 env: TEST_TYPE="pytest" - python: 3.7 diff --git a/.vscode/settings.json b/.vscode/settings.json index 256ca0b..6f973de 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -14,4 +14,5 @@ "python.linting.flake8Enabled": true, "python.linting.enabled": true, "python.pythonPath": "/anaconda/envs/ebp/bin/python", + "autoDocstring.customTemplatePath": "docstring.fmt.mustache" } diff --git a/README.md b/README.md index 5d407d5..362d6c1 100644 --- a/README.md +++ b/README.md @@ -24,16 +24,16 @@ Unfortunately, mistletoe is not currently being actively maintained (as of June 8th 2019), and so this fork has been created to allow for a deployed release that can be utilised by EBP. Here is a working list of 'up-streamable' changes that would be desired of mistletoe that this version has begun to implement: -- Move testing from `unittest` to `pytest`: `pytest` is now the *de facto* testing architecture and vastly improves the usability/flexibility of testing. -- Introduce `pre-commit` code linting and formatting: This standardizes the code style across the package, and ensures that new commits and Pull Requests also conform to it. -- Introduce `ReadTheDocs` documentation -- Add a conda-forge distribution of the package -- Improve the AST API and documentation: I view [panflute](http://scorreia.com/software/panflute/index.html)'s implementation of the pandoc API in python, as the gold standard for how a pythonic AST API should be written and documented. Some tweaks to the current token class objects, and creating auto-generated RTD documentation, could achieve this. -- Storage of source line/column ranges: LSP and good rendering reporting of warnings/errors, requires access to the source line and column ranges of each parsed token. -- Asynchronous parsing: LSP requires documents to be parsed asynchronously. Currently, mistletoe contains a number of global state objects, which make parsing inherently not thread-safe. The simple solution to this is to store these items as `threading.local` objects. A related but slightly more complete solution is to introduce the idea of a 'scoped session', similar to that used by sqlalchemy for database access: [Contextual/Thread-local Sessions](https://docs.sqlalchemy.org/en/13/orm/contextual.html#unitofwork-contextual) -- Improve extensibility of block tokens: A Markdown parser is inherently a Finite State-Machine + Memory (a.k.a Push-down Automata (PDA)), with parsing tokens as states (for a good example of a python state-machine see [pytransitions](https://github.com/pytransitions/transitions)). The problem with extensibility, is that inherently states are interdependent; when introducing a new state/token you must provide logic to all the other tokens, w.r.t to when to transition to this new token. Currently, MyST Parser sub-classes nearly all the Mistletoe block tokens to implement the extensions it requires, but it would be ideal if there was a more systematic approach for this. -- Improve extensibility for span tokens: Mistletoe does allow for span token extensions to be added, at least in a simple way. However, as with block tokens above, there is often an interconnectivity to them, especially when considering nested span tokens. As of 7cc2c92, MyST-Parser now overrides some of Mistetoe's core logic to achieve correct parsing of Math tokens, but if possible this should be made more general. -- Improve rendering logic: Currently, there is no concept of recursive walk-throughs or 'visitor' patterns in the Misteltoe `BaseRenderer`, which is a better method for rendering tree like structures (as used by docutils/panflute). Also, the current token instantiating (within context managers) needs improvement (see [miyuchina/mistletoe#56](https://github.com/miyuchina/mistletoe/issues/56)). +- [x] Move testing from `unittest` to `pytest`: `pytest` is now the *de facto* testing architecture and vastly improves the usability/flexibility of testing. +- [x] Introduce `pre-commit` code linting and formatting: This standardizes the code style across the package, and ensures that new commits and Pull Requests also conform to it. +- [x] Introduce `ReadTheDocs` documentation +- [x] Add a conda-forge distribution of the package +- [ ] Improve the AST API and documentation: I view [panflute](http://scorreia.com/software/panflute/index.html)'s implementation of the pandoc API in python, as the gold standard for how a pythonic AST API should be written and documented. Some tweaks to the current token class objects, and creating auto-generated RTD documentation, could achieve this. +- [ ] Storage of source line/column ranges: LSP and good rendering reporting of warnings/errors, requires access to the source line and column ranges of each parsed token. +- [x] Asynchronous parsing: LSP requires documents to be parsed asynchronously. Currently, mistletoe contains a number of global state objects, which make parsing inherently not thread-safe. The simple solution to this is to store these items as `threading.local` objects. A related but slightly more complete solution is to introduce the idea of a 'scoped session', similar to that used by sqlalchemy for database access: [Contextual/Thread-local Sessions](https://docs.sqlalchemy.org/en/13/orm/contextual.html#unitofwork-contextual) +- [ ] Improve extensibility of block tokens: A Markdown parser is inherently a Finite State-Machine + Memory (a.k.a Push-down Automata (PDA)), with parsing tokens as states (for a good example of a python state-machine see [pytransitions](https://github.com/pytransitions/transitions)). The problem with extensibility, is that inherently states are interdependent; when introducing a new state/token you must provide logic to all the other tokens, w.r.t to when to transition to this new token. Currently, MyST Parser sub-classes nearly all the Mistletoe block tokens to implement the extensions it requires, but it would be ideal if there was a more systematic approach for this. +- [ ] Improve extensibility for span tokens: Mistletoe does allow for span token extensions to be added, at least in a simple way. However, as with block tokens above, there is often an interconnectivity to them, especially when considering nested span tokens. As of 7cc2c92, MyST-Parser now overrides some of Mistetoe's core logic to achieve correct parsing of Math tokens, but if possible this should be made more general. +- [ ] Improve rendering logic: Currently, there is no concept of recursive walk-throughs or 'visitor' patterns in the Misteltoe `BaseRenderer`, which is a better method for rendering tree like structures (as used by docutils/panflute). Also, the current token instantiating (within context managers) needs improvement (see [miyuchina/mistletoe#56](https://github.com/miyuchina/mistletoe/issues/56)). [mistletoe]: https://github.com/miyuchina/mistletoe [ebp-link]: https://github.com/ExecutableBookProject diff --git a/contrib/github_wiki.py b/contrib/github_wiki.py index 605b647..a055e49 100644 --- a/contrib/github_wiki.py +++ b/contrib/github_wiki.py @@ -3,8 +3,8 @@ """ import re -from mistletoe.span_token import SpanToken -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.base_elements import SpanToken +from mistletoe.renderers.html import HTMLRenderer __all__ = ["GithubWiki", "GithubWikiRenderer"] diff --git a/contrib/jira_renderer.py b/contrib/jira_renderer.py index b0560c2..0fb8fd2 100644 --- a/contrib/jira_renderer.py +++ b/contrib/jira_renderer.py @@ -23,8 +23,8 @@ import html from itertools import chain -from mistletoe import block_token, span_token -from mistletoe.base_renderer import BaseRenderer +from mistletoe import block_tokens, span_tokens +from mistletoe.renderers.base import BaseRenderer class JIRARenderer(BaseRenderer): @@ -40,7 +40,7 @@ def __init__(self, *extras): extras (list): allows subclasses to add even more custom tokens. """ self.listTokens = [] - super().__init__(*chain([block_token.HTMLBlock, span_token.HTMLSpan], extras)) + super().__init__(*chain([block_tokens.HTMLBlock, span_tokens.HTMLSpan], extras)) def render_strong(self, token): template = "*{}*" @@ -125,15 +125,15 @@ def render_list_item(self, token): return result def render_inner(self, token): - if isinstance(token, block_token.List): - if token.start: + if isinstance(token, block_tokens.List): + if token.start_at: self.listTokens.append("#") else: self.listTokens.append("*") rendered = [self.render(child) for child in token.children] - if isinstance(token, block_token.List): + if isinstance(token, block_tokens.List): del self.listTokens[-1] return "".join(rendered) @@ -192,7 +192,7 @@ def render_html_block(token): return token.content def render_document(self, token): - self.footnotes.update(token.footnotes) + self.link_definitions.update(token.link_definitions) return self.render_inner(token) diff --git a/contrib/mathjax.py b/contrib/mathjax.py index ccbab37..a2858be 100644 --- a/contrib/mathjax.py +++ b/contrib/mathjax.py @@ -2,8 +2,8 @@ Provides MathJax support for rendering Markdown with LaTeX to html. """ -from mistletoe.html_renderer import HTMLRenderer -from mistletoe.latex_renderer import LaTeXRenderer +from mistletoe.renderers.html import HTMLRenderer +from mistletoe.renderers.latex import LaTeXRenderer class MathJaxRenderer(HTMLRenderer, LaTeXRenderer): diff --git a/contrib/scheme.py b/contrib/scheme.py index a598625..235a3f9 100644 --- a/contrib/scheme.py +++ b/contrib/scheme.py @@ -2,18 +2,18 @@ from functools import reduce import re -from mistletoe import BaseRenderer, span_token, block_token -from mistletoe.core_tokens import MatchObj +from mistletoe import BaseRenderer, base_elements +from span_tokenizer import tokenize_span +from mistletoe.nested_tokenizer import MatchObj +from mistletoe.parse_context import get_parse_context -class Program(block_token.BlockToken): +class Program(base_elements.BlockToken): def __init__(self, lines): - self.children = span_token.tokenize_inner( - "".join([line.strip() for line in lines]) - ) + self.children = tokenize_span("".join([line.strip() for line in lines])) -class Expr(span_token.SpanToken): +class Expr(base_elements.SpanToken): @classmethod def find(cls, string): matches = [] @@ -32,7 +32,7 @@ def __repr__(self): return "".format(self.children) -class Number(span_token.SpanToken): +class Number(base_elements.SpanToken): pattern = re.compile(r"(\d+)") parse_inner = False @@ -43,7 +43,7 @@ def __repr__(self): return "".format(self.number) -class Variable(span_token.SpanToken): +class Variable(base_elements.SpanToken): pattern = re.compile(r"([^\s()]+)") parse_inner = False @@ -54,7 +54,7 @@ def __repr__(self): return "".format(self.name) -class Whitespace(span_token.SpanToken): +class Whitespace(base_elements.SpanToken): parse_inner = False def __new__(self, _): @@ -76,8 +76,9 @@ def __init__(self): "Number": self.render_number, "Variable": self.render_variable, } - block_token._token_types.value = [] - span_token._token_types.value = [Expr, Number, Variable, Whitespace] + parse_context = get_parse_context() + parse_context.block_tokens = [] + parse_context.span_tokens = [Expr, Number, Variable, Whitespace] self.env = ChainMap( { diff --git a/contrib/toc_renderer.py b/contrib/toc_renderer.py index 0b22d99..d902a13 100644 --- a/contrib/toc_renderer.py +++ b/contrib/toc_renderer.py @@ -5,7 +5,7 @@ """ import re -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.renderers.html import HTMLRenderer class TOCRenderer(HTMLRenderer): @@ -30,9 +30,9 @@ def __init__(self, depth=5, omit_title=True, filter_conds=[], *extras): @property def toc(self): """ - Returns table of contents as a block_token.List instance. + Returns table of contents as a block_tokens.List instance. """ - from mistletoe.block_token import List + from mistletoe.block_tokens import List def get_indent(level): if self.omit_title: diff --git a/docs/using/index.md b/docs/using/index.md index 1d24d71..db6995f 100644 --- a/docs/using/index.md +++ b/docs/using/index.md @@ -44,7 +44,7 @@ and rendering to HTML. The function also accepts an additional argument ```python import mistletoe -from mistletoe.latex_renderer import LaTeXRenderer +from mistletoe.renderers.latex import LaTeXRenderer with open('foo.md', 'r') as fin: rendered = mistletoe.markdown(fin, LaTeXRenderer) @@ -60,7 +60,7 @@ from mistletoe import Document, HTMLRenderer with open('foo.md', 'r') as fin: with HTMLRenderer() as renderer: - rendered = renderer.render(Document(fin)) + rendered = renderer.render(Document.read(fin)) ``` ### From the command-line @@ -125,15 +125,15 @@ mistletoe is the fastest CommonMark compliant implementation in Python. Try the benchmarks yourself by running: ```sh -$ python3 test/benchmark.py # all results in seconds -Test document: test/samples/syntax.md +$ python3 test/test_samples/benchmark.py # all results in seconds +Test document: syntax.md Test iterations: 1000 Running tests with markdown, mistune, commonmark, mistletoe... ============================================================== -markdown: 33.28557115700096 -mistune: 8.533771439999327 -commonmark: 84.54588776299897 -mistletoe: 23.5405140980001 +markdown: 40.270715949 +mistune: 11.054077996000004 +commonmark: 44.426582849 +mistletoe: 34.47910147500001 ``` We notice that Mistune is the fastest Markdown parser, @@ -331,7 +331,7 @@ of most of them for you. Simply pass your custom token class to `super().__init__()` does the trick: ```python -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.renderers.html import HTMLRenderer class GithubWikiRenderer(HTMLRenderer): def __init__(self): @@ -350,7 +350,7 @@ def render_github_wiki(self, token): Cleaning up, we have our new renderer class: ```python -from mistletoe.html_renderer import HTMLRenderer, escape_url +from mistletoe.renderers.html import HTMLRenderer, escape_url class GithubWikiRenderer(HTMLRenderer): def __init__(self): @@ -376,7 +376,7 @@ from contrib.github_wiki import GithubWikiRenderer with open('foo.md', 'r') as fin: with GithubWikiRenderer() as renderer: - rendered = renderer.render(Document(fin)) + rendered = renderer.render(Document.read(fin)) ``` For more info, take a look at the `base_renderer` module in mistletoe. diff --git a/docstring.fmt.mustache b/docstring.fmt.mustache new file mode 100644 index 0000000..717a457 --- /dev/null +++ b/docstring.fmt.mustache @@ -0,0 +1,20 @@ +{{! Sphinx Docstring Template }} +{{summaryPlaceholder}} + +{{extendedSummaryPlaceholder}} + +{{#args}} +:param {{var}}: {{descriptionPlaceholder}} +{{/args}} +{{#kwargs}} +:param {{var}}: {{descriptionPlaceholder}} +{{/kwargs}} +{{#exceptions}} +:raises {{type}}: {{descriptionPlaceholder}} +{{/exceptions}} +{{#returns}} +:return: {{descriptionPlaceholder}} +{{/returns}} +{{#yields}} +:yield: {{descriptionPlaceholder}} +{{/yields}} diff --git a/makefile b/makefile deleted file mode 100644 index 0ffdf4c..0000000 --- a/makefile +++ /dev/null @@ -1,27 +0,0 @@ -PYTHON_EXEC=python3 - -.PHONY: run test coverage integration benchmark docs - -run: - ${PYTHON_EXEC} -m mistletoe - -test: - ${PYTHON_EXEC} -m unittest - -coverage: - . venv/bin/activate && \ - ${PYTHON_EXEC} -m coverage run -m unittest && \ - coverage report && \ - deactivate - -integration: - ./test/test_ci.sh 1 - -benchmark: - ${PYTHON_EXEC} test/benchmark.py - -commonmark: - ${PYTHON_EXEC} -m test.commonmark - -docs: - ${PYTHON_EXEC} -m docs diff --git a/mistletoe/__init__.py b/mistletoe/__init__.py index 595d572..35204a0 100644 --- a/mistletoe/__init__.py +++ b/mistletoe/__init__.py @@ -2,19 +2,19 @@ Make mistletoe easier to import. """ -__version__ = "0.8.2" +__version__ = "0.9.0" __all__ = [ - "html_renderer", - "ast_renderer", - "block_token", + "renderers", + "base_elements", + "block_tokens", "block_tokenizer", - "span_token", + "span_tokens", "span_tokenizer", ] -from mistletoe.block_token import Document -from mistletoe.base_renderer import BaseRenderer # noqa: F401 -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.block_tokens import Document +from mistletoe.renderers.base import BaseRenderer # noqa: F401 +from mistletoe.renderers.html import HTMLRenderer def markdown(iterable, renderer=HTMLRenderer): @@ -23,4 +23,4 @@ def markdown(iterable, renderer=HTMLRenderer): Enables inline and block-level HTML tags. """ with renderer() as renderer: - return renderer.render(Document(iterable)) + return renderer.render(Document.read(iterable)) diff --git a/mistletoe/base_elements.py b/mistletoe/base_elements.py new file mode 100644 index 0000000..ceb00ac --- /dev/null +++ b/mistletoe/base_elements.py @@ -0,0 +1,188 @@ +from collections import namedtuple +from typing import List, Optional + +import attr + + +WalkItem = namedtuple("WalkItem", ["node", "parent", "depth"]) + + +class Token: + """Base class of all mistletoe tokens.""" + + def __getattr__(self, name): + # ensure certain attributes are always available + if name == "children": + return None + if name == "content": + return "" + + @property + def name(self) -> str: + """Return the name of the element.""" + return type(self).__name__ + + def __contains__(self, text: str): + """Return is text is contained in the element or its ancestors.""" + if self.children is None: + return text in self.content + return any(text in child for child in self.children) + + def __repr__(self): + """A base represent method, that can be overriden for more complex elements.""" + info = [] + if self.children is not None: + info.append("children={}".format(len(self.children))) + return "{}({})".format(self.name, ",".join(info)) + + def to_dict(self) -> dict: + """Convert instatiated attributes to a dict""" + try: + return attr.asdict(self) + except attr.exceptions.NotAnAttrsClassError: + return self.__dict__ + + def walk( + self, + tokens: Optional[List[str]] = None, + depth: Optional[int] = None, + include_self: bool = False, + ) -> WalkItem: + """Traverse the syntax tree, recursively yielding children. + + :param elements: filter children by certain token names. + :param depth: The depth to recurse into the tree. + :param include_self: whether to first yield this element. + + :yield: A container for an element, its parent and depth + + """ + current_depth = 0 + if include_self: + yield WalkItem(self, None, current_depth) + next_children = [(self, c) for c in self.children or []] + if self.name == "Table" and getattr(self, "header", None) is not None: + # table headers row + next_children.append((self, self.header)) + while next_children and (depth is None or current_depth > depth): + current_depth += 1 + new_children = [] + for idx, (parent, child) in enumerate(next_children): + if tokens is None or child.name in tokens: + yield WalkItem(child, parent, current_depth) + new_children.extend([(child, c) for c in child.children or []]) + if self.child == "Table" and getattr(child, "header", None) is not None: + # table headers row + new_children.append((child, child.header)) + + next_children = new_children + + +class SpanContainer: + """This is a container for inline span text. + + We use it in order to delay the assessment of span text, when parsing a document, + so that all link definitions can be gathered first. + After the initial block parse, we walk through the document + and replace these span containers with the actual span tokens + (see `block_tokenizer.tokenize_main`). + """ + + def __init__(self, text): + self.text = text + + def expand(self): + from mistletoe.span_tokenizer import tokenize_span + + return tokenize_span(self.text) + + def __iter__(self): + for _ in []: + yield + + def __len__(self): + return 0 + + +class BlockToken(Token): + """Base class for block-level tokens. Recursively parse inner tokens. + + Naming conventions: + + * lines denotes a list of (possibly unparsed) input lines, and is + commonly used as the argument name for constructors. + + * BlockToken.children is a list with all the inner tokens (thus if + a token has children attribute, it is not a leaf node; if a token + calls tokenize_span, it is the boundary between + span-level tokens and block-level tokens); + + * BlockToken.start takes a line from the document as argument, and + returns a boolean representing whether that line marks the start + of the current token. Every subclass of BlockToken must define a + start function (see block_tokenizer.tokenize). + + * BlockToken.read takes the rest of the lines in the document as an + iterator (including the start line), and consumes all the lines + that should be read into this token. + + Default to stop at an empty line. + + Note that BlockToken.read returns a token (or None). + + If BlockToken.read returns None, the read result is ignored, + but the token class is responsible for resetting the iterator + to a previous state. See block_tokenizer.FileWrapper.anchor, + block_tokenizer.FileWrapper.reset. + + """ + + @classmethod + def start(cls, line: str) -> bool: + """Takes a line from the document as argument, and + returns a boolean representing whether that line marks the start + of the current token. Every subclass of BlockToken must define a + start function (see `block_tokenizer.tokenize_main`). + """ + raise NotImplementedError + + @classmethod + def read(cls, lines) -> Optional[Token]: + """takes the rest of the lines in the document as an + iterator (including the start line), and consumes all the lines + that should be read into this token. + + The default is to stop at an empty line. + """ + line_buffer = [next(lines)] + for line in lines: + if line == "\n": + break + line_buffer.append(line) + return line_buffer + + +class SpanToken(Token): + """Base class for span-level tokens. + + - `pattern`: regex pattern to search for + - To parse child tokens, `parse_inner` should be set to `True`. + - `parse_group` corresponds to the match group in which child tokens might occur + - `precedence`: Alter the relative order by which the span token is assessed. + """ + + pattern = None + parse_inner = True + parse_group = 1 + precedence = 5 + + def __init__(self, match): + if not self.parse_inner: + self.content = match.group(self.parse_group) + + @classmethod + def find(cls, string: str): + """Find all tokens, matching a pattern in the given string""" + if cls.pattern is not None: + return cls.pattern.finditer(string) + return [] diff --git a/mistletoe/block_tokenizer.py b/mistletoe/block_tokenizer.py index ac7a3f7..b3f08a8 100644 --- a/mistletoe/block_tokenizer.py +++ b/mistletoe/block_tokenizer.py @@ -1,6 +1,8 @@ """ Block-level tokenizer for mistletoe. """ +from mistletoe.base_elements import SpanContainer +from mistletoe.parse_context import get_parse_context class FileWrapper: @@ -42,9 +44,10 @@ def backstep(self): self._index -= 1 -def tokenize(iterable, token_types, start_line=0): - """ - Searches for token_types in iterable. +def tokenize_main( + iterable, token_types=None, start_line=0, expand_spans=True, store_definitions=False +): + """Searches for token_types in iterable. Args: iterable (list): user input lines to be parsed. @@ -53,47 +56,42 @@ def tokenize(iterable, token_types, start_line=0): Returns: block-level token instances. """ - return make_tokens(tokenize_block(iterable, token_types, start_line)) - + if token_types is None: + token_types = get_parse_context().block_tokens + tokens = tokenize_block( + iterable, + token_types=token_types, + start_line=start_line, + store_definitions=store_definitions, + ) + if expand_spans: + for token in tokens: + for result in list(token.walk(include_self=True)): + if isinstance(result.node.children, SpanContainer): + result.node.children = result.node.children.expand() + return tokens -def tokenize_block(iterable, token_types, start_line=0): - """ - Returns a list of pairs (token_type, read_result). - Footnotes are parsed here, but span-level parsing has not - started yet. - """ +def tokenize_block(iterable, token_types=None, start_line=0, store_definitions=False): + """Returns a list of parsed tokens.""" + if token_types is None: + token_types = get_parse_context().block_tokens lines = FileWrapper(iterable, start_line) - parse_buffer = ParseBuffer() + parsed_tokens = ParseBuffer() line = lines.peek() while line is not None: for token_type in token_types: if token_type.start(line): - result = token_type.read(lines) - if result is not None: - parse_buffer.append((token_type, result)) + token = token_type.read(lines) + if token is not None: + if store_definitions or token.name != "LinkDefinition": + parsed_tokens.append(token) break else: # unmatched newlines next(lines) - parse_buffer.loose = True + parsed_tokens.loose = True line = lines.peek() - return parse_buffer - - -def make_tokens(parse_buffer): - """ - Takes a list of pairs (token_type, read_result) and - applies token_type(read_result). - - Footnotes are already parsed before this point, - and span-level parsing is started here. - """ - tokens = [] - for token_type, result in parse_buffer: - token = token_type(result) - if token is not None: - tokens.append(token) - return tokens + return parsed_tokens class ParseBuffer(list): diff --git a/mistletoe/block_token.py b/mistletoe/block_tokens.py similarity index 55% rename from mistletoe/block_token.py rename to mistletoe/block_tokens.py index 1caf003..8d979dc 100644 --- a/mistletoe/block_token.py +++ b/mistletoe/block_tokens.py @@ -1,20 +1,24 @@ """ Built-in block-level token classes. """ - -import re from itertools import zip_longest -from threading import local +import re +from typing import Optional, Tuple +from typing import List as ListType + +import attr import mistletoe.block_tokenizer as tokenizer -from mistletoe import span_token -from mistletoe.core_tokens import ( +from mistletoe import span_tokens +from mistletoe.nested_tokenizer import ( follows, shift_whitespace, whitespace, is_control_char, normalize_label, ) +from mistletoe.parse_context import get_parse_context +from mistletoe.base_elements import Token, BlockToken, SpanContainer """ @@ -28,159 +32,124 @@ "ThematicBreak", "List", "Table", - "Footnote", + "LinkDefinition", "Paragraph", ] -""" -Stores a reference to the current document token. - -When parsing, footnote entries will be stored in the document by -accessing this pointer. -""" -# TODO make thread local -_root_node = None - - -def tokenize(lines, start_line=0): - """ - A wrapper around block_tokenizer.tokenize. Pass in all block-level - token constructors as arguments to block_tokenizer.tokenize. - - Doing so (instead of importing block_token module in block_tokenizer) - avoids cyclic dependency issues, and allows for future injections of - custom token classes. - - _token_types variable is at the bottom of this module. - - See also: block_tokenizer.tokenize, span_token.tokenize_inner. - """ - return tokenizer.tokenize(lines, _token_types.value, start_line) - - -def add_token(token_cls, position=0): - """ - Allows external manipulation of the parsing process. - This function is usually called in BaseRenderer.__enter__. - - Arguments: - token_cls (SpanToken): token to be included in the parsing process. - position (int): the position for the token class to be inserted into. - """ - _token_types.value.insert(position, token_cls) - - -def remove_token(token_cls): - """ - Allows external manipulation of the parsing process. - This function is usually called in BaseRenderer.__exit__. - - Arguments: - token_cls (BlockToken): token to be removed from the parsing process. - """ - _token_types.value.remove(token_cls) - - -def reset_tokens(): - """ - Resets global _token_types to all token classes in __all__. - """ - global _token_types - _token_types.value = [globals()[cls_name] for cls_name in __all__] - - -class BlockToken(object): - """ - Base class for block-level tokens. Recursively parse inner tokens. - - Naming conventions: +@attr.s(slots=True, kw_only=True) +class Document(BlockToken): + """Document container.""" - * lines denotes a list of (possibly unparsed) input lines, and is - commonly used as the argument name for constructors. + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + link_definitions: dict = attr.ib( + repr=lambda d: str(len(d)), metadata={"doc": "Mapping of keys to (url, title)"} + ) + front_matter: Optional["FrontMatter"] = attr.ib( + default=None, metadata={"doc": "Front matter YAML block"} + ) - * BlockToken.children is a list with all the inner tokens (thus if - a token has children attribute, it is not a leaf node; if a token - calls span_token.tokenize_inner, it is the boundary between - span-level tokens and block-level tokens); + @classmethod + def read( + cls, + lines, + start_line: int = 0, + reset_definitions=True, + store_definitions=False, + front_matter=False, + ): + """Read a document + + :param lines: Lines or string to parse + :param start_line: The initial line (used for nested parsing) + :param reset_definitions: remove any previously stored link_definitions + :param store_definitions: store LinkDefinitions or ignore them + :param front_matter: search for an initial YAML block front matter block + (note this is not strictly CommonMark compliant) + """ + if isinstance(lines, str): + lines = lines.splitlines(keepends=True) + lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines] + # reset link definitions + if reset_definitions: + get_parse_context().link_definitions = {} + + front_matter_token = None + if front_matter and lines and lines[0].startswith("---"): + front_matter_token = FrontMatter.read(lines) + start_line += front_matter_token.position[1] + lines = lines[front_matter_token.position[1] :] + + children = tokenizer.tokenize_main( + lines, start_line=start_line, store_definitions=store_definitions + ) + return cls( + children=children, + front_matter=front_matter_token, + link_definitions=get_parse_context().link_definitions, + ) - * BlockToken.start takes a line from the document as argument, and - returns a boolean representing whether that line marks the start - of the current token. Every subclass of BlockToken must define a - start function (see block_tokenizer.tokenize). - * BlockToken.read takes the rest of the lines in the ducment as an - iterator (including the start line), and consumes all the lines - that should be read into this token. +@attr.s(slots=True, kw_only=True) +class FrontMatter(BlockToken): + """Front matter YAML block. - Default to stop at an empty line. + :: - Note that BlockToken.read does not have to return a list of lines. - Because the return value of this function will be directly - passed into the token constructor, we can return any relevant - parsing information, sometimes even ready-made tokens, - into the constructor. See block_tokenizer.tokenize. + --- + a: b + c: d + --- - If BlockToken.read returns None, the read result is ignored, - but the token class is responsible for resetting the iterator - to a previous state. See block_tokenizer.FileWrapper.anchor, - block_tokenizer.FileWrapper.reset. + NOTE: The content of the block should be valid YAML, + but its parsing (and hence syntax testing) is deferred to the renderers. + This is so that, given 'bad' YAML, + the rest of the of document will still be parsed, + and then the renderers can apply there own error reporting. - Attributes: - children (list): inner tokens. + Not included in the parsing process, but called by `Document.read`. """ - def __init__(self, lines, tokenize_func): - self.children = tokenize_func(lines) - - def __contains__(self, text): - return any(text in child for child in self.children) + content: str = attr.ib( + repr=False, metadata={"doc": "Source text (should be valid YAML)"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - @staticmethod - def read(lines): - line_buffer = [next(lines)] - for line in lines: - if line == "\n": + @classmethod + def read(cls, lines): + assert lines and lines[0].startswith("---") + end_line = None + for i, line in enumerate(lines[1:]): + if line.startswith("---"): + end_line = i + 2 break - line_buffer.append(line) - return line_buffer + # TODO raise/report error if closing block not found + if end_line is None: + end_line = len(lines) - -class Document(BlockToken): - """ - Document token. - """ - - def __init__(self, lines): - if isinstance(lines, str): - lines = lines.splitlines(keepends=True) - lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines] - self.footnotes = {} - global _root_node - _root_node = self - span_token._root_node = self - self.children = tokenize(lines) - span_token._root_node = None - _root_node = None + return cls(content="".join(lines[1 : end_line - 1]), position=(0, end_line)) +@attr.s(slots=True, kw_only=True) class Heading(BlockToken): - """ - Heading token. (["### some heading ###\\n"]) - Boundary between span-level and block-level tokens. + """Heading token. (["### some heading ###\\n"]) - Attributes: - level (int): heading level. - children (list): inner tokens. + Boundary between span-level and block-level tokens. """ - pattern = re.compile(r" {0,3}(#{1,6})(?:\n|\s+?(.*?)(?:\n|\s+?#+\s*?$))") - level = 0 - content = "" + level: int = attr.ib(metadata={"doc": "Heading level"}) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __init__(self, match): - self.level, content = match - super().__init__(content, span_token.tokenize_inner) + pattern = re.compile(r" {0,3}(#{1,6})(?:\n|\s+?(.*?)(?:\n|\s+?#+\s*?$))") @classmethod def start(cls, line): @@ -194,22 +163,30 @@ def start(cls, line): return True @classmethod - def read(cls, lines): + def read(cls, lines, expand_spans=False): next(lines) - return cls.level, cls.content + children = SpanContainer(cls.content) + if expand_spans: + children = children.expand() + return cls( + level=cls.level, children=children, position=(lines.lineno, lines.lineno) + ) +@attr.s(slots=True, kw_only=True) class SetextHeading(BlockToken): - """ - Setext headings. + """Setext headings. - Not included in the parsing process, but called by Paragraph.__new__. + Not included in the parsing process, but returned by `Paragraph.read`. """ - def __init__(self, lines): - self.level = 1 if lines.pop().lstrip().startswith("=") else 2 - content = "\n".join([line.strip() for line in lines]) - super().__init__(content, span_token.tokenize_inner) + level: int = attr.ib(metadata={"doc": "Heading level"}) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) @classmethod def start(cls, line): @@ -220,14 +197,16 @@ def read(cls, lines): raise NotImplementedError() +@attr.s(slots=True, kw_only=True) class Quote(BlockToken): - """ - Quote token. (["> # heading\\n", "> paragraph\\n"]) - """ + """Quote token. (`["> # heading\\n", "> paragraph\\n"]`).""" - def __init__(self, parse_buffer): - # span-level tokenizing happens here. - self.children = tokenizer.make_tokens(parse_buffer) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) @staticmethod def start(line): @@ -236,9 +215,21 @@ def start(line): return False return stripped.startswith(">") + @classmethod + def transition(cls, next_line): + return ( + next_line is None + or next_line.strip() == "" + or Heading.start(next_line) + or CodeFence.start(next_line) + or ThematicBreak.start(next_line) + or List.start(next_line) + ) + @classmethod def read(cls, lines): # first line + start_line = lines.lineno + 1 line = cls.convert_leading_tabs(next(lines).lstrip()).split(">", 1)[1] if len(line) > 0 and line[0] == " ": line = line[1:] @@ -251,14 +242,7 @@ def read(cls, lines): # loop next_line = lines.peek() - while ( - next_line is not None - and next_line.strip() != "" - and not Heading.start(next_line) - and not CodeFence.start(next_line) - and not ThematicBreak.start(next_line) - and not List.start(next_line) - ): + while not cls.transition(next_line): stripped = cls.convert_leading_tabs(next_line.lstrip()) prepend = 0 if stripped[0] == ">": @@ -280,12 +264,14 @@ def read(cls, lines): next(lines) next_line = lines.peek() - # block level tokens are parsed here, so that footnotes + # block level tokens are parsed here, so that link_definitions # in quotes can be recognized before span-level tokenizing. Paragraph.parse_setext = False - parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types.value) - Paragraph.parse_setext = True - return parse_buffer + try: + child_tokens = tokenizer.tokenize_block(line_buffer, start_line=start_line) + finally: + Paragraph.parse_setext = True + return cls(children=child_tokens, position=(start_line, lines.lineno)) @staticmethod def convert_leading_tabs(string): @@ -303,41 +289,47 @@ def convert_leading_tabs(string): return ">" + " " * count + string[i:] +@attr.s(slots=True, kw_only=True) class Paragraph(BlockToken): - """ - Paragraph token. (["some\\n", "continuous\\n", "lines\\n"]) + """Paragraph token. (`["some\\n", "continuous\\n", "lines\\n"]`) + Boundary between span-level and block-level tokens. """ - setext_pattern = re.compile(r" {0,3}(=|-)+ *$") - parse_setext = True # can be disabled by Quote - - def __new__(cls, lines): - if not isinstance(lines, list): - # setext heading token, return directly - return lines - return super().__new__(cls) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __init__(self, lines): - content = "".join([line.lstrip() for line in lines]).strip() - super().__init__(content, span_token.tokenize_inner) + _setext_pattern = re.compile(r" {0,3}(=|-)+ *$") + parse_setext = True # can be disabled by Quote @staticmethod def start(line): return line.strip() != "" @classmethod - def read(cls, lines): + def is_setext_heading(cls, line): + return cls._setext_pattern.match(line) + + @classmethod + def transition(cls, next_line): + return ( + next_line is None + or next_line.strip() == "" + or Heading.start(next_line) + or CodeFence.start(next_line) + or Quote.start(next_line) + ) + + @classmethod + def read(cls, lines, expand_spans=False): line_buffer = [next(lines)] + start_line = lines.lineno next_line = lines.peek() - while ( - next_line is not None - and next_line.strip() != "" - and not Heading.start(next_line) - and not CodeFence.start(next_line) - and not Quote.start(next_line) - ): - + while not cls.transition(next_line): # check if next_line starts List list_pair = ListItem.parse_marker(next_line) if len(next_line) - len(next_line.lstrip()) < 4 and list_pair is not None: @@ -356,7 +348,15 @@ def read(cls, lines): # check if we see a setext underline if cls.parse_setext and cls.is_setext_heading(next_line): line_buffer.append(next(lines)) - return SetextHeading(line_buffer) + level = 1 if line_buffer.pop().lstrip().startswith("=") else 2 + children = SpanContainer( + "\n".join([line.strip() for line in line_buffer]) + ) + if expand_spans: + children = children.expand() + return SetextHeading( + children=children, level=level, position=(start_line, lines.lineno) + ) # check if we have a ThematicBreak (has to be after setext) if ThematicBreak.start(next_line): @@ -365,25 +365,27 @@ def read(cls, lines): # no other tokens, we're good line_buffer.append(next(lines)) next_line = lines.peek() - return line_buffer - @classmethod - def is_setext_heading(cls, line): - return cls.setext_pattern.match(line) + content = "".join([line.lstrip() for line in line_buffer]).strip() + children = SpanContainer(content) + if expand_spans: + children = children.expand() + return cls(children=children, position=(start_line, lines.lineno)) +@attr.s(slots=True, kw_only=True) class BlockCode(BlockToken): - """ - Indented code. + """Indented code.""" - Attributes: - children (list): contains a single span_token.RawText token. - language (str): always the empty string. - """ - - def __init__(self, lines): - self.language = "" - self.children = (span_token.RawText("".join(lines).strip("\n") + "\n"),) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + language: str = attr.ib( + default="", metadata={"doc": "The code language (for sytax highlighting)"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) @staticmethod def start(line): @@ -391,6 +393,7 @@ def start(line): @classmethod def read(cls, lines): + start_line = lines.lineno + 1 line_buffer = [] for line in lines: if line.strip() == "": @@ -400,7 +403,12 @@ def read(cls, lines): lines.backstep() break line_buffer.append(cls.strip(line)) - return line_buffer + + children = (span_tokens.RawText("".join(line_buffer).strip("\n") + "\n"),) + + return cls( + children=children, language="", position=(start_line, lines.lineno - 1) + ) @staticmethod def strip(string): @@ -417,24 +425,26 @@ def strip(string): return string +@attr.s(slots=True, kw_only=True) class CodeFence(BlockToken): - """ - Code fence. (["```sh\\n", "rm -rf /", ..., "```"]) - Boundary between span-level and block-level tokens. + """Code fence. (["```sh\\n", "rm -rf /", ..., "```"]) - Attributes: - children (list): contains a single span_token.RawText token. - language (str): language of code block (default to empty). + Boundary between span-level and block-level tokens. """ + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + language: str = attr.ib( + default="", metadata={"doc": "The code language (for sytax highlighting)"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) + pattern = re.compile(r"( {0,3})((?:`|~){3,}) *(\S*)") _open_info = None - def __init__(self, match): - lines, open_info = match - self.language = span_token.EscapeSequence.strip(open_info[2]) - self.children = (span_token.RawText("".join(lines)),) - @classmethod def start(cls, line): match_obj = cls.pattern.match(line) @@ -448,6 +458,7 @@ def start(cls, line): @classmethod def read(cls, lines): + start_line = lines.lineno + 1 next(lines) line_buffer = [] for line in lines: @@ -462,58 +473,75 @@ def read(cls, lines): if diff > cls._open_info[0]: stripped_line = " " * (diff - cls._open_info[0]) + stripped_line line_buffer.append(stripped_line) - return line_buffer, cls._open_info + language = span_tokens.EscapeSequence.strip(cls._open_info[2]) + children = (span_tokens.RawText("".join(line_buffer)),) -class List(BlockToken): - """ - List token. + return cls( + children=children, language=language, position=(start_line, lines.lineno) + ) - Attributes: - children (list): a list of ListItem tokens. - loose (bool): whether the list is loose. - start (NoneType or int): None if unordered, starting number if ordered. - """ - pattern = re.compile(r" {0,3}(?:\d{0,9}[.)]|[+\-*])(?:[ \t]*$|[ \t]+)") +@attr.s(slots=True, kw_only=True) +class List(BlockToken): + """List token (unordered or ordered)""" - def __init__(self, matches): - self.children = [ListItem(*match) for match in matches] - self.loose = any(item.loose for item in self.children) - leader = self.children[0].leader - self.start = None - if len(leader) != 1: - self.start = int(leader[:-1]) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + loose: bool = attr.ib( + metadata={"doc": "Whether list items are separated by blank lines"} + ) + start_at: Optional[int] = attr.ib( + metadata={"doc": "None if unordered, starting number if ordered."} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) + + _pattern = re.compile(r" {0,3}(?:\d{0,9}[.)]|[+\-*])(?:[ \t]*$|[ \t]+)") @classmethod def start(cls, line): - return cls.pattern.match(line) + return cls._pattern.match(line) @classmethod def read(cls, lines): + start_line = lines.lineno leader = None next_marker = None - matches = [] + children = [] while True: - output, next_marker = ListItem.read(lines, next_marker) - item_leader = output[2] + item = ListItem.read(lines, next_marker) + next_marker = item.next_marker + item_leader = item.leader if leader is None: leader = item_leader elif not cls.same_marker_type(leader, item_leader): lines.reset() break - matches.append(output) + children.append(item) if next_marker is None: break - if matches: + if children: # Only consider the last list item loose if there's more than one element - last_parse_buffer = matches[-1][0] + last_parse_buffer = children[-1] last_parse_buffer.loose = ( - len(last_parse_buffer) > 1 and last_parse_buffer.loose + len(last_parse_buffer.children) > 1 and last_parse_buffer.loose ) - return matches + loose = any(item.loose for item in children) + leader = children[0].leader + start = None + if len(leader) != 1: + start = int(leader[:-1]) + return cls( + children=children, + loose=loose, + start_at=start, + position=(start_line, lines.lineno), + ) @staticmethod def same_marker_type(leader, other): @@ -524,38 +552,47 @@ def same_marker_type(leader, other): ) +@attr.s(slots=True, kw_only=True) class ListItem(BlockToken): - """ - List items. Not included in the parsing process, but called by List. + """List items. + + Not included in the parsing process, but called by List. """ - pattern = re.compile(r"\s*(\d{0,9}[.)]|[+\-*])(\s*$|\s+)") + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + loose: bool = attr.ib( + metadata={"doc": "Whether list items are separated by blank lines"} + ) + leader: str = attr.ib(metadata={"doc": "The prefix number or bullet point."}) + prepend = attr.ib(metadata={"doc": ""}) + next_marker = attr.ib(metadata={"doc": ""}) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __init__(self, parse_buffer, prepend, leader): - self.leader = leader - self.prepend = prepend - self.children = tokenizer.make_tokens(parse_buffer) - self.loose = parse_buffer.loose + _pattern = re.compile(r"\s*(\d{0,9}[.)]|[+\-*])(\s*$|\s+)") @staticmethod def in_continuation(line, prepend): return line.strip() == "" or len(line) - len(line.lstrip()) >= prepend @staticmethod - def other_token(line): + def transition(next_line): return ( - Heading.start(line) - or Quote.start(line) - or CodeFence.start(line) - or ThematicBreak.start(line) + Heading.start(next_line) + or Quote.start(next_line) + or CodeFence.start(next_line) + or ThematicBreak.start(next_line) ) @classmethod def parse_marker(cls, line): """ - Returns a pair (prepend, leader) iff the line has a valid leader. + Returns a pair (prepend, leader) if the line has a valid leader. """ - match_obj = cls.pattern.match(line) + match_obj = cls._pattern.match(line) if match_obj is None: return None # no valid leader leader = match_obj.group(1) @@ -580,6 +617,7 @@ def read(cls, lines, prev_marker=None): lines.anchor() prepend = -1 leader = None + start_line = lines.lineno line_buffer = [] # first line @@ -591,13 +629,22 @@ def read(cls, lines, prev_marker=None): line_buffer.append(line[prepend:]) next_line = lines.peek() if empty_first_line and next_line is not None and next_line.strip() == "": - parse_buffer = tokenizer.tokenize_block([next(lines)], _token_types.value) + child_tokens = tokenizer.tokenize_block( + [next(lines)], start_line=lines.lineno + ) next_line = lines.peek() if next_line is not None: marker_info = cls.parse_marker(next_line) if marker_info is not None: next_marker = marker_info - return (parse_buffer, prepend, leader), next_marker + return cls( + children=child_tokens, + loose=child_tokens.loose, + prepend=prepend, + leader=leader, + next_marker=next_marker, + position=(start_line, lines.lineno), + ) # loop newline = 0 @@ -613,7 +660,7 @@ def read(cls, lines, prev_marker=None): # not in continuation if not cls.in_continuation(next_line, prepend): # directly followed by another token - if cls.other_token(next_line): + if cls.transition(next_line): if newline: lines.backstep() del line_buffer[-newline:] @@ -638,32 +685,34 @@ def read(cls, lines, prev_marker=None): newline = newline + 1 if next_line.strip() == "" else 0 next_line = lines.peek() - # block-level tokens are parsed here, so that footnotes can be - # recognized before span-level parsing. - parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types.value) - return (parse_buffer, prepend, leader), next_marker + child_tokens = tokenizer.tokenize_block(line_buffer, start_line=start_line) + return cls( + children=child_tokens, + loose=child_tokens.loose, + prepend=prepend, + leader=leader, + next_marker=next_marker, + position=(start_line, lines.lineno), + ) -class Table(BlockToken): - """ - Table token. - Attributes: - has_header (bool): whether table has header row. - column_align (list): align options for each column (default to [None]). - children (list): inner tokens (TableRows). - """ +@attr.s(slots=True, kw_only=True) +class Table(BlockToken): + """Table token.""" - def __init__(self, lines): - if "---" in lines[1]: - self.column_align = [ - self.parse_align(column) for column in self.split_delimiter(lines[1]) - ] - self.header = TableRow(lines[0], self.column_align) - self.children = [TableRow(line, self.column_align) for line in lines[2:]] - else: - self.column_align = [None] - self.children = [TableRow(line) for line in lines] + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + header: Optional["TableRow"] = attr.ib(metadata={"doc": "The header row"}) + column_align: list = attr.ib( + metadata={ + "doc": "align options for columns (left=None (default), center=0, right=1)" + } + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) @staticmethod def split_delimiter(delimiter): @@ -694,8 +743,9 @@ def parse_align(column): def start(line): return "|" in line - @staticmethod - def read(lines): + @classmethod + def read(cls, lines): + start_line = lines.lineno lines.anchor() line_buffer = [next(lines)] while lines.peek() is not None and "|" in lines.peek(): @@ -703,54 +753,99 @@ def read(lines): if len(line_buffer) < 2 or "---" not in line_buffer[1]: lines.reset() return None - return line_buffer + if "---" in line_buffer[1]: + column_align = [ + cls.parse_align(column) + for column in cls.split_delimiter(line_buffer[1]) + ] + header = TableRow.read(line_buffer[0], column_align, lineno=start_line) + children = [ + TableRow.read(line, column_align, lineno=start_line + i) + for i, line in enumerate(line_buffer[2:], 2) + ] + else: + column_align = [None] + header = None + children = [ + TableRow.read(line, lineno=start_line + i) + for i, line in enumerate(line_buffer) + ] + return cls( + children=children, + column_align=column_align, + header=header, + position=(start_line, lines.lineno), + ) + +@attr.s(slots=True, kw_only=True) class TableRow(BlockToken): - """ - Table row token. + """Table row token.""" - Should only be called by Table.__init__(). - """ + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + row_align: list = attr.ib( + metadata={ + "doc": "align options for columns (left=None (default), center=0, right=1)" + } + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __init__(self, line, row_align=None): - self.row_align = row_align or [None] + @classmethod + def read(cls, line, row_align=None, lineno=0): + row_align = row_align or [None] cells = filter(None, line.strip().split("|")) - self.children = [ - TableCell(cell.strip() if cell else "", align) - for cell, align in zip_longest(cells, self.row_align) + children = [ + TableCell.read(cell.strip() if cell else "", align, lineno=lineno) + for cell, align in zip_longest(cells, row_align) ] + return cls(children=children, row_align=row_align, position=(lineno, lineno)) +@attr.s(slots=True, kw_only=True) class TableCell(BlockToken): - """ - Table cell token. - Boundary between span-level and block-level tokens. + """Table cell token. - Should only be called by TableRow.__init__(). + Boundary between span-level and block-level tokens. Attributes: align (bool): align option for current cell (default to None). children (list): inner (span-)tokens. """ - def __init__(self, content, align=None): - self.align = align - super().__init__(content, span_token.tokenize_inner) + children: ListType[Token] = attr.ib( + repr=lambda c: str(len(c)), metadata={"doc": "Child tokens list"} + ) + align: Optional[int] = attr.ib( + metadata={ + "doc": "align options for the cell (left=None (default), center=0, right=1)" + } + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) + @classmethod + def read(cls, content, align=None, expand_spans=False, lineno=0): + children = SpanContainer(content) + if expand_spans: + children = children.expand() + return cls(children=children, align=align, position=(lineno, lineno)) -class Footnote(BlockToken): - """ - Footnote token. - The constructor returns None, because the footnote information - is stored in Footnote.read. - """ +@attr.s(slots=True, kw_only=True) +class LinkDefinition(BlockToken): + """LinkDefinition token: `[ref]: url "title"`""" - label_pattern = re.compile(r"[ \n]{0,3}\[(.+?)\]", re.DOTALL) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __new__(cls, _): - return None + label_pattern = re.compile(r"[ \n]{0,3}\[(.+?)\]", re.DOTALL) @classmethod def start(cls, line): @@ -759,6 +854,7 @@ def start(cls, line): @classmethod def read(cls, lines): line_buffer = [] + start_line = lines.lineno + 1 next_line = lines.peek() while next_line is not None and next_line.strip() != "": line_buffer.append(next(lines)) @@ -772,8 +868,8 @@ def read(cls, lines): break offset, match = match_info matches.append(match) - cls.append_footnotes(matches, _root_node) - return matches or None + cls.append_link_definitions(matches) + return cls(position=(start_line, lines.lineno)) if matches else None @classmethod def match_reference(cls, lines, string, offset): @@ -896,56 +992,58 @@ def match_link_title(cls, string, offset): return None @staticmethod - def append_footnotes(matches, root): + def append_link_definitions(matches): for key, dest, title in matches: key = normalize_label(key) - dest = span_token.EscapeSequence.strip(dest.strip()) - title = span_token.EscapeSequence.strip(title) - if key not in root.footnotes: - root.footnotes[key] = dest, title + dest = span_tokens.EscapeSequence.strip(dest.strip()) + title = span_tokens.EscapeSequence.strip(title) + link_definitions = get_parse_context().link_definitions + if key not in link_definitions: + link_definitions[key] = dest, title @staticmethod def backtrack(lines, string, offset): lines._index -= string[offset + 1 :].count("\n") +@attr.s(slots=True, kw_only=True) class ThematicBreak(BlockToken): - """ - Thematic break token (a.k.a. horizontal rule.) - """ + """Thematic break token (a.k.a. horizontal rule.)""" - pattern = re.compile(r" {0,3}(?:([-_*])\s*?)(?:\1\s*?){2,}$") + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) - def __init__(self, _): - pass + _pattern = re.compile(r" {0,3}(?:([-_*])\s*?)(?:\1\s*?){2,}$") @classmethod def start(cls, line): - return cls.pattern.match(line) + return cls._pattern.match(line) - @staticmethod - def read(lines): - return [next(lines)] + @classmethod + def read(cls, lines): + next(lines) + return cls(position=(lines.lineno, lines.lineno)) +@attr.s(slots=True, kw_only=True) class HTMLBlock(BlockToken): - """ - Block-level HTML tokens. + """Block-level HTML token.""" - Attributes: - content (str): literal strings rendered as-is. - """ + content: str = attr.ib( + repr=False, metadata={"doc": "literal strings rendered as-is"} + ) + position: Tuple[int, int] = attr.ib( + metadata={"doc": "Line position in source text (start, end)"} + ) _end_cond = None multiblock = re.compile(r"<(script|pre|style)[ >\n]") predefined = re.compile(r"<\/?(.+?)(?:\/?>|[ \n])") custom_tag = re.compile( - r"(?:" + "|".join((span_token._open_tag, span_token._closing_tag)) + r")\s*$" + r"(?:" + "|".join((span_tokens._open_tag, span_tokens._closing_tag)) + r")\s*$" ) - def __init__(self, lines): - self.content = "".join(lines).rstrip("\n") - @classmethod def start(cls, line): stripped = line.lstrip() @@ -974,7 +1072,7 @@ def start(cls, line): return 5 # rule 6: predefined tags (see html_token._tags), read until newline match_obj = cls.predefined.match(stripped) - if match_obj is not None and match_obj.group(1).casefold() in span_token._tags: + if match_obj is not None and match_obj.group(1).casefold() in span_tokens._tags: cls._end_cond = None return 6 # rule 7: custom tags, read until newline @@ -987,6 +1085,7 @@ def start(cls, line): @classmethod def read(cls, lines): # note: stop condition can trigger on the starting line + start_line = lines.lineno line_buffer = [] for line in lines: line_buffer.append(line) @@ -996,9 +1095,7 @@ def read(cls, lines): elif line.strip() == "": line_buffer.pop() break - return line_buffer - - -_token_types = local() -_token_types.value = [] -reset_tokens() + return cls( + content="".join(line_buffer).rstrip("\n"), + position=(start_line, lines.lineno), + ) diff --git a/mistletoe/latex_token.py b/mistletoe/latex_token.py index ea3db6f..8bfe606 100644 --- a/mistletoe/latex_token.py +++ b/mistletoe/latex_token.py @@ -1,11 +1,11 @@ import re -import mistletoe.span_token as span_token +from mistletoe.base_elements import SpanToken __all__ = ["Math"] -class Math(span_token.SpanToken): +class Math(SpanToken): pattern = re.compile(r"(\${1,2})([^$]+?)\1") parse_inner = False parse_group = 0 diff --git a/mistletoe/core_tokens.py b/mistletoe/nested_tokenizer.py similarity index 93% rename from mistletoe/core_tokens.py rename to mistletoe/nested_tokenizer.py index fa6ba4c..5eb3f94 100644 --- a/mistletoe/core_tokens.py +++ b/mistletoe/nested_tokenizer.py @@ -1,4 +1,7 @@ +"""Tokenize nested span tokens.""" import re +from threading import local +from mistletoe.parse_context import get_parse_context whitespace = {" ", "\t", "\n", "\x0b", "\x0c", "\r"} @@ -70,10 +73,11 @@ code_pattern = re.compile(r"(? ParseContext: + """Return the current `ParseContext`.""" + global THREAD + if not hasattr(THREAD, "context") or reset: + THREAD.context = ParseContext() + return THREAD.context + + +def set_parse_context(parse_context): + """Set an existing `ParseContext`.""" + global THREAD + THREAD.context = parse_context + + +def tokens_from_module(module): + """ + Helper method; takes a module and returns a list of all token classes + specified in module.__all__. Useful when custom tokens are defined in a + separate module. + """ + return [getattr(module, name) for name in module.__all__] diff --git a/test/samples/__init__.py b/mistletoe/renderers/__init__.py similarity index 100% rename from test/samples/__init__.py rename to mistletoe/renderers/__init__.py diff --git a/mistletoe/base_renderer.py b/mistletoe/renderers/base.py similarity index 86% rename from mistletoe/base_renderer.py rename to mistletoe/renderers/base.py index 4bdf05d..b15405b 100644 --- a/mistletoe/base_renderer.py +++ b/mistletoe/renderers/base.py @@ -4,7 +4,8 @@ import re import sys -from mistletoe import block_token, span_token +from mistletoe import base_elements +from mistletoe.parse_context import get_parse_context, set_parse_context class BaseRenderer(object): @@ -27,9 +28,9 @@ class BaseRenderer(object): >>> from mistletoe import Document >>> from some_renderer import SomeRenderer >>> with SomeRenderer() as renderer: - ... rendered = renderer.render(Document(fin)) + ... rendered = renderer.render(Document.read(fin)) - See mistletoe.html_renderer for an implementation example. + See mistletoe.renderers.html for an implementation example. Naming conventions: * The keys of self.render_map should exactly match the class @@ -70,19 +71,21 @@ def __init__(self, *extras): "ThematicBreak": self.render_thematic_break, "LineBreak": self.render_line_break, "Document": self.render_document, + "LinkDefinition": self.render_link_definition, } self._extras = extras - + parse_context = get_parse_context(reset=True) for token in extras: - if issubclass(token, span_token.SpanToken): - token_module = span_token + if issubclass(token, base_elements.SpanToken): + # insert at position 1 (since backslash escape should also be 1st) + parse_context.span_tokens.insert(1, token) else: - token_module = block_token - token_module.add_token(token) + parse_context.block_tokens.insert(0, token) render_func = getattr(self, self._cls_to_func(token.__name__)) self.render_map[token.__name__] = render_func - self.footnotes = {} + self.parse_context = parse_context.copy() + self.link_definitions = {} def render(self, token): """ @@ -109,22 +112,20 @@ def render_inner(self, token): Arguments: token: a branch node who has children attribute. """ - return "".join(map(self.render, token.children)) + return "".join(map(self.render, token.children or [])) def __enter__(self): """ Make renderer classes into context managers. """ + set_parse_context(self.parse_context) return self def __exit__(self, exception_type, exception_val, traceback): """ Make renderer classes into context managers. - - Reset block_token._token_types and span_token._token_types. """ - block_token.reset_tokens() - span_token.reset_tokens() + get_parse_context(reset=True) @classmethod def _cls_to_func(cls, cls_name): diff --git a/mistletoe/html_renderer.py b/mistletoe/renderers/html.py similarity index 94% rename from mistletoe/html_renderer.py rename to mistletoe/renderers/html.py index 1a91f2c..2093316 100644 --- a/mistletoe/html_renderer.py +++ b/mistletoe/renderers/html.py @@ -6,9 +6,9 @@ import sys from itertools import chain from urllib.parse import quote -from mistletoe.block_token import HTMLBlock -from mistletoe.span_token import HTMLSpan -from mistletoe.base_renderer import BaseRenderer +from mistletoe.block_tokens import HTMLBlock +from mistletoe.span_tokens import HTMLSpan +from mistletoe.renderers.base import BaseRenderer if sys.version_info < (3, 4): from mistletoe import _html as html @@ -17,11 +17,7 @@ class HTMLRenderer(BaseRenderer): - """ - HTML renderer class. - - See mistletoe.base_renderer module for more info. - """ + """HTML renderer class.""" def __init__(self, *extras): """ @@ -43,7 +39,7 @@ def __exit__(self, *args): html._charref = self._stdlib_charref def render_to_plain(self, token): - if hasattr(token, "children"): + if token.children is not None: inner = [self.render_to_plain(child) for child in token.children] return "".join(inner) return self.escape_html(token.content) @@ -133,9 +129,9 @@ def render_block_code(self, token): def render_list(self, token): template = "<{tag}{attr}>\n{inner}\n" - if token.start is not None: + if token.start_at is not None: tag = "ol" - attr = ' start="{}"'.format(token.start) if token.start != 1 else "" + attr = ' start="{}"'.format(token.start_at) if token.start_at != 1 else "" else: tag = "ul" attr = "" @@ -206,7 +202,7 @@ def render_html_block(token): return token.content def render_document(self, token): - self.footnotes.update(token.footnotes) + self.link_definitions.update(token.link_definitions) inner = "\n".join([self.render(child) for child in token.children]) return "{}\n".format(inner) if inner else "" diff --git a/mistletoe/ast_renderer.py b/mistletoe/renderers/json.py similarity index 50% rename from mistletoe/ast_renderer.py rename to mistletoe/renderers/json.py index 91e9c2d..7735b17 100644 --- a/mistletoe/ast_renderer.py +++ b/mistletoe/renderers/json.py @@ -3,23 +3,26 @@ """ import json -from mistletoe.base_renderer import BaseRenderer +from mistletoe.renderers.base import BaseRenderer -class ASTRenderer(BaseRenderer): - def render(self, token): +class JsonRenderer(BaseRenderer): + def render(self, token, as_string=True): """ - Returns the string representation of the AST. + Returns the JSON string representation of the AST. - Overrides super().render. Delegates the logic to get_ast. + Overrides super().render. Delegates the logic to ast_to_json. """ - return json.dumps(get_ast(token), indent=2) + "\n" + dct = ast_to_json(token) + if as_string: + return json.dumps(dct, indent=2) + "\n" + return dct def __getattr__(self, name): return lambda token: "" -def get_ast(token): +def ast_to_json(token): """ Recursively unrolls token attributes into dictionaries (token.children into lists). @@ -34,10 +37,10 @@ def get_ast(token): # # [1]: https://docs.python.org/3/whatsnew/3.6.html # [2]: https://github.com/syntax-tree/mdast - node["type"] = token.__class__.__name__ - node.update(token.__dict__) + node["type"] = token.name + node.update(token.to_dict()) if "header" in node: - node["header"] = get_ast(node["header"]) - if "children" in node: - node["children"] = [get_ast(child) for child in node["children"]] + node["header"] = ast_to_json(token.header) + if token.children is not None: + node["children"] = [ast_to_json(child) for child in token.children] return node diff --git a/mistletoe/latex_renderer.py b/mistletoe/renderers/latex.py similarity index 96% rename from mistletoe/latex_renderer.py rename to mistletoe/renderers/latex.py index 121057a..227e11f 100644 --- a/mistletoe/latex_renderer.py +++ b/mistletoe/renderers/latex.py @@ -4,7 +4,7 @@ from itertools import chain import mistletoe.latex_token as latex_token -from mistletoe.base_renderer import BaseRenderer +from mistletoe.renderers.base import BaseRenderer class LaTeXRenderer(BaseRenderer): @@ -89,7 +89,7 @@ def render_block_code(self, token): def render_list(self, token): self.packages["listings"] = [] template = "\\begin{{{tag}}}\n{inner}\\end{{{tag}}}\n" - tag = "enumerate" if token.start is not None else "itemize" + tag = "enumerate" if token.start_at is not None else "itemize" inner = self.render_inner(token) return template.format(tag=tag, inner=inner) @@ -155,7 +155,7 @@ def render_document(self, token): "{inner}" "\\end{{document}}\n" ) - self.footnotes.update(token.footnotes) + self.link_definitions.update(token.link_definitions) return template.format( inner=self.render_inner(token), packages=self.render_packages() ) diff --git a/mistletoe/span_tokenizer.py b/mistletoe/span_tokenizer.py index 373f6a2..8a4ebc4 100644 --- a/mistletoe/span_tokenizer.py +++ b/mistletoe/span_tokenizer.py @@ -1,9 +1,12 @@ """ Inline tokenizer for mistletoe. """ +from mistletoe.parse_context import get_parse_context -def tokenize(string, token_types): +def tokenize_span(string, token_types=None): + if token_types is None: + token_types = get_parse_context().span_tokens *token_types, fallback_token = token_types tokens = find_tokens(string, token_types, fallback_token) token_buffer = [] diff --git a/mistletoe/span_token.py b/mistletoe/span_tokens.py similarity index 69% rename from mistletoe/span_token.py rename to mistletoe/span_tokens.py index c53506a..0046296 100644 --- a/mistletoe/span_token.py +++ b/mistletoe/span_tokens.py @@ -1,13 +1,10 @@ """ Built-in span-level token classes. """ - import re -from threading import local - -import mistletoe.span_tokenizer as tokenizer -from mistletoe import core_tokens +from mistletoe import nested_tokenizer +from mistletoe.base_elements import SpanToken """ Tokens to be included in the parsing process, in the order specified. @@ -23,83 +20,16 @@ ] -_root_node = None - - -def tokenize_inner(content): - """ - A wrapper around span_tokenizer.tokenize. Pass in all span-level token - constructors as arguments to span_tokenizer.tokenize. - - Doing so (instead of importing span_token module in span_tokenizer) - avoids cyclic dependency issues, and allows for future injections of - custom token classes. - - _token_types variable is at the bottom of this module. - - See also: span_tokenizer.tokenize, block_token.tokenize. - """ - return tokenizer.tokenize(content, _token_types.value) - - -def add_token(token_cls, position=1): - """ - Allows external manipulation of the parsing process. - This function is called in BaseRenderer.__enter__. - - Arguments: - token_cls (SpanToken): token to be included in the parsing process. - """ - _token_types.value.insert(position, token_cls) - - -def remove_token(token_cls): - """ - Allows external manipulation of the parsing process. - This function is called in BaseRenderer.__exit__. - - Arguments: - token_cls (SpanToken): token to be removed from the parsing process. - """ - _token_types.value.remove(token_cls) - - -def reset_tokens(): - """ - Resets global _token_types to all token classes in __all__. - """ - global _token_types - _token_types.value = [globals()[cls_name] for cls_name in __all__] - - -class SpanToken: - parse_inner = True - parse_group = 1 - precedence = 5 - - def __init__(self, match): - if not self.parse_inner: - self.content = match.group(self.parse_group) - - def __contains__(self, text): - if hasattr(self, "children"): - return any(text in child for child in self.children) - return text in self.content - - @classmethod - def find(cls, string): - return cls.pattern.finditer(string) - - class CoreTokens(SpanToken): precedence = 3 def __new__(self, match): + # TODO this needs to be made more general (so tokens can be in diffent modules) return globals()[match.type](match) @classmethod def find(cls, string): - return core_tokens.find_core_tokens(string, _root_node) + return nested_tokenizer.find_nested_tokenizer(string) class Strong(SpanToken): @@ -129,8 +59,8 @@ def __init__(self, match): @classmethod def find(cls, string): - matches = core_tokens._code_matches - core_tokens._code_matches = [] + matches = nested_tokenizer._code_matches.value + nested_tokenizer._code_matches.value = [] return matches @@ -144,7 +74,7 @@ class Strikethrough(SpanToken): class Image(SpanToken): """ - Image tokens. ("![alt](src "title")") + Image tokens, with inline targets: "![alt](src "title")". Attributes: src (str): image source. @@ -158,7 +88,7 @@ def __init__(self, match): class Link(SpanToken): """ - Link tokens. ("[name](target)") + Link tokens, with inline targets: "[name](target)" Attributes: target (str): link target. @@ -330,8 +260,3 @@ class HTMLSpan(SpanToken): ) parse_inner = False parse_group = 0 - - -_token_types = local() -_token_types.value = [] -reset_tokens() diff --git a/setup.py b/setup.py index a1178dc..185dd67 100644 --- a/setup.py +++ b/setup.py @@ -29,11 +29,13 @@ "Topic :: Text Processing :: Markup", ], keywords="markdown lexer parser development", - python_requires="~=3.5", + python_requires="~=3.6", + install_requires=["attrs~=19.3"], extras_require={ "code_style": ["flake8<3.8.0,>=3.7.0", "black==19.10b0", "pre-commit==1.17.0"], "testing": ["coverage", "pytest>=3.6,<4", "pytest-cov"], "rtd": ["sphinx>=2,<3", "myst-parser", "pyyaml"], + "benchmark": ["commonmark~=0.9.1", "markdown~=3.2", "mistune~=0.8.4"], }, zip_safe=False, ) diff --git a/test/commonmark/test_commonmark.py b/test/commonmark/test_commonmark.py index 8e5a17c..5313c3a 100644 --- a/test/commonmark/test_commonmark.py +++ b/test/commonmark/test_commonmark.py @@ -4,7 +4,7 @@ import pytest from mistletoe import Document -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.renderers.html import HTMLRenderer with open(os.path.join(os.path.dirname(__file__), "commonmark.json"), "r") as fin: tests = json.load(fin) @@ -14,5 +14,5 @@ def test_commonmark(entry): test_case = entry["markdown"].splitlines(keepends=True) with HTMLRenderer() as renderer: - output = renderer.render(Document(test_case)) + output = renderer.render(Document.read(test_case)) assert entry["html"] == output diff --git a/test/test_ast_renderer.py b/test/test_ast_renderer.py deleted file mode 100644 index 08695fd..0000000 --- a/test/test_ast_renderer.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest -from mistletoe import Document, ast_renderer - - -class TestASTRenderer(unittest.TestCase): - def test(self): - self.maxDiff = None - d = Document(["# heading 1\n", "\n", "hello\n", "world\n"]) - output = ast_renderer.get_ast(d) - target = { - "type": "Document", - "footnotes": {}, - "children": [ - { - "type": "Heading", - "level": 1, - "children": [{"type": "RawText", "content": "heading 1"}], - }, - { - "type": "Paragraph", - "children": [ - {"type": "RawText", "content": "hello"}, - {"type": "LineBreak", "soft": True, "content": ""}, - {"type": "RawText", "content": "world"}, - ], - }, - ], - } - self.assertEqual(output, target) - - def test_footnotes(self): - self.maxDiff = None - d = Document(["[bar][baz]\n", "\n", "[baz]: spam\n"]) - target = { - "type": "Document", - "footnotes": {"baz": ("spam", "")}, - "children": [ - { - "type": "Paragraph", - "children": [ - { - "type": "Link", - "target": "spam", - "title": "", - "children": [{"type": "RawText", "content": "bar"}], - } - ], - } - ], - } - output = ast_renderer.get_ast(d) - self.assertEqual(output, target) diff --git a/test/test_base_elements.py b/test/test_base_elements.py new file mode 100644 index 0000000..086ec53 --- /dev/null +++ b/test/test_base_elements.py @@ -0,0 +1,31 @@ +from textwrap import dedent + +from mistletoe import Document + + +def test_walk(): + doc = Document.read( + dedent( + """\ + a **b** + + c [*d*](link) + """ + ) + ) + tree = [ + (t.node.name, t.parent.name if t.parent else None, t.depth) + for t in doc.walk(include_self=True) + ] + assert tree == [ + ("Document", None, 0), + ("Paragraph", "Document", 1), + ("Paragraph", "Document", 1), + ("RawText", "Paragraph", 2), + ("Strong", "Paragraph", 2), + ("RawText", "Paragraph", 2), + ("Link", "Paragraph", 2), + ("RawText", "Strong", 3), + ("Emphasis", "Link", 3), + ("RawText", "Emphasis", 4), + ] diff --git a/test/test_block_token.py b/test/test_block_token.py index 7c731d0..27899d3 100644 --- a/test/test_block_token.py +++ b/test/test_block_token.py @@ -1,21 +1,22 @@ import unittest from unittest.mock import patch, call -from mistletoe import block_token, span_token -from mistletoe.block_tokenizer import FileWrapper +from mistletoe import block_tokens, span_tokens +from mistletoe.parse_context import get_parse_context +from mistletoe.block_tokenizer import FileWrapper, tokenize_main class TestToken(unittest.TestCase): def setUp(self): self.addCleanup( - lambda: span_token._token_types.value.__setitem__(-1, span_token.RawText) + lambda: get_parse_context().span_tokens.__setitem__(-1, span_tokens.RawText) ) - patcher = patch("mistletoe.span_token.RawText") + patcher = patch("mistletoe.span_tokens.RawText") self.mock = patcher.start() - span_token._token_types.value[-1] = self.mock + get_parse_context().span_tokens[-1] = self.mock self.addCleanup(patcher.stop) def _test_match(self, token_cls, lines, arg, **kwargs): - token = next(iter(block_token.tokenize(lines))) + token = next(iter(tokenize_main(lines))) self.assertIsInstance(token, token_cls) self._test_token(token, arg, **kwargs) @@ -29,37 +30,37 @@ class TestATXHeading(TestToken): def test_match(self): lines = ["### heading 3\n"] arg = "heading 3" - self._test_match(block_token.Heading, lines, arg, level=3) + self._test_match(block_tokens.Heading, lines, arg, level=3) def test_children_with_enclosing_hashes(self): lines = ["# heading 3 ##### \n"] arg = "heading 3" - self._test_match(block_token.Heading, lines, arg, level=1) + self._test_match(block_tokens.Heading, lines, arg, level=1) def test_not_heading(self): lines = ["####### paragraph\n"] arg = "####### paragraph" - self._test_match(block_token.Paragraph, lines, arg) + self._test_match(block_tokens.Paragraph, lines, arg) def test_heading_in_paragraph(self): lines = ["foo\n", "# heading\n", "bar\n"] - token1, token2, token3 = block_token.tokenize(lines) - self.assertIsInstance(token1, block_token.Paragraph) - self.assertIsInstance(token2, block_token.Heading) - self.assertIsInstance(token3, block_token.Paragraph) + token1, token2, token3 = tokenize_main(lines) + self.assertIsInstance(token1, block_tokens.Paragraph) + self.assertIsInstance(token2, block_tokens.Heading) + self.assertIsInstance(token3, block_tokens.Paragraph) class TestSetextHeading(TestToken): def test_match(self): lines = ["some heading\n", "---\n"] arg = "some heading" - self._test_match(block_token.SetextHeading, lines, arg, level=2) + self._test_match(block_tokens.SetextHeading, lines, arg, level=2) def test_next(self): lines = ["some\n", "heading\n", "---\n", "\n", "foobar\n"] - tokens = iter(block_token.tokenize(lines)) - self.assertIsInstance(next(tokens), block_token.SetextHeading) - self.assertIsInstance(next(tokens), block_token.Paragraph) + tokens = iter(tokenize_main(lines)) + self.assertIsInstance(next(tokens), block_tokens.SetextHeading) + self.assertIsInstance(next(tokens), block_tokens.Paragraph) self.mock.assert_has_calls([call("some"), call("heading"), call("foobar")]) with self.assertRaises(StopIteration): next(tokens) @@ -67,70 +68,70 @@ def test_next(self): class TestQuote(unittest.TestCase): def test_match(self): - with patch("mistletoe.block_token.Paragraph"): - token = next(iter(block_token.tokenize(["> line 1\n", "> line 2\n"]))) - self.assertIsInstance(token, block_token.Quote) + with patch("mistletoe.block_tokens.Paragraph"): + token = next(iter(tokenize_main(["> line 1\n", "> line 2\n"]))) + self.assertIsInstance(token, block_tokens.Quote) def test_lazy_continuation(self): - with patch("mistletoe.block_token.Paragraph"): - token = next(iter(block_token.tokenize(["> line 1\n", "line 2\n"]))) - self.assertIsInstance(token, block_token.Quote) + with patch("mistletoe.block_tokens.Paragraph"): + token = next(iter(tokenize_main(["> line 1\n", "line 2\n"]))) + self.assertIsInstance(token, block_tokens.Quote) class TestCodeFence(TestToken): def test_match_fenced_code(self): lines = ["```sh\n", "rm dir\n", "mkdir test\n", "```\n"] arg = "rm dir\nmkdir test\n" - self._test_match(block_token.CodeFence, lines, arg, language="sh") + self._test_match(block_tokens.CodeFence, lines, arg, language="sh") def test_match_fenced_code_with_tilda(self): lines = ["~~~sh\n", "rm dir\n", "mkdir test\n", "~~~\n"] arg = "rm dir\nmkdir test\n" - self._test_match(block_token.CodeFence, lines, arg, language="sh") + self._test_match(block_tokens.CodeFence, lines, arg, language="sh") def test_mixed_code_fence(self): lines = ["~~~markdown\n", "```sh\n", "some code\n", "```\n", "~~~\n"] arg = "```sh\nsome code\n```\n" - self._test_match(block_token.CodeFence, lines, arg, language="markdown") + self._test_match(block_tokens.CodeFence, lines, arg, language="markdown") def test_fence_code_lazy_continuation(self): lines = ["```sh\n", "rm dir\n", "\n", "mkdir test\n", "```\n"] arg = "rm dir\n\nmkdir test\n" - self._test_match(block_token.CodeFence, lines, arg, language="sh") + self._test_match(block_tokens.CodeFence, lines, arg, language="sh") def test_no_wrapping_newlines_code_fence(self): lines = ["```\n", "hey", "```\n", "paragraph\n"] arg = "hey" - self._test_match(block_token.CodeFence, lines, arg, language="") + self._test_match(block_tokens.CodeFence, lines, arg, language="") def test_unclosed_code_fence(self): lines = ["```\n", "hey"] arg = "hey" - self._test_match(block_token.CodeFence, lines, arg, language="") + self._test_match(block_tokens.CodeFence, lines, arg, language="") class TestBlockCode(TestToken): def test_parse_indented_code(self): lines = [" rm dir\n", " mkdir test\n"] arg = "rm dir\nmkdir test\n" - self._test_match(block_token.BlockCode, lines, arg, language="") + self._test_match(block_tokens.BlockCode, lines, arg, language="") class TestParagraph(TestToken): def test_parse(self): lines = ["some\n", "continuous\n", "lines\n"] arg = "some" - self._test_match(block_token.Paragraph, lines, arg) + self._test_match(block_tokens.Paragraph, lines, arg) def test_read(self): lines = ["this\n", "```\n", "is some\n", "```\n", "code\n"] try: - token1, token2, token3 = block_token.tokenize(lines) + token1, token2, token3 = tokenize_main(lines) except ValueError as e: raise AssertionError("Token number mismatch.") from e - self.assertIsInstance(token1, block_token.Paragraph) - self.assertIsInstance(token2, block_token.CodeFence) - self.assertIsInstance(token3, block_token.Paragraph) + self.assertIsInstance(token1, block_tokens.Paragraph) + self.assertIsInstance(token2, block_tokens.CodeFence) + self.assertIsInstance(token3, block_tokens.Paragraph) class TestListItem(unittest.TestCase): @@ -144,78 +145,78 @@ def test_parse_marker(self): "123456789. item x\n", ] for line in lines: - self.assertTrue(block_token.ListItem.parse_marker(line)) + self.assertTrue(block_tokens.ListItem.parse_marker(line)) bad_lines = ["> foo\n", "1item 1\n", "2| item 2\n", "1234567890. item x\n"] for line in bad_lines: - self.assertFalse(block_token.ListItem.parse_marker(line)) + self.assertFalse(block_tokens.ListItem.parse_marker(line)) def test_tokenize(self): lines = [" - foo\n", " bar\n", "\n", " baz\n"] - token1, token2 = next(iter(block_token.tokenize(lines))).children[0].children - self.assertIsInstance(token1, block_token.Paragraph) + token1, token2 = next(iter(tokenize_main(lines))).children[0].children + self.assertIsInstance(token1, block_tokens.Paragraph) self.assertTrue("foo" in token1) - self.assertIsInstance(token2, block_token.BlockCode) + self.assertIsInstance(token2, block_tokens.BlockCode) def test_sublist(self): lines = ["- foo\n", " - bar\n"] - token1, token2 = block_token.tokenize(lines)[0].children[0].children - self.assertIsInstance(token1, block_token.Paragraph) - self.assertIsInstance(token2, block_token.List) + token1, token2 = tokenize_main(lines)[0].children[0].children + self.assertIsInstance(token1, block_tokens.Paragraph) + self.assertIsInstance(token2, block_tokens.List) def test_deep_list(self): lines = ["- foo\n", " - bar\n", " - baz\n"] FileWrapper(lines) - ptoken, ltoken = block_token.tokenize(lines)[0].children[0].children - self.assertIsInstance(ptoken, block_token.Paragraph) - self.assertIsInstance(ltoken, block_token.List) + ptoken, ltoken = tokenize_main(lines)[0].children[0].children + self.assertIsInstance(ptoken, block_tokens.Paragraph) + self.assertIsInstance(ltoken, block_tokens.List) self.assertTrue("foo" in ptoken) ptoken, ltoken = ltoken.children[0].children - self.assertIsInstance(ptoken, block_token.Paragraph) + self.assertIsInstance(ptoken, block_tokens.Paragraph) self.assertTrue("bar" in ptoken) - self.assertIsInstance(ltoken, block_token.List) + self.assertIsInstance(ltoken, block_tokens.List) self.assertTrue("baz" in ltoken) def test_loose_list(self): lines = ["- foo\n", " ~~~\n", " bar\n", " \n", " baz\n" " ~~~\n"] FileWrapper(lines) - list_item = block_token.tokenize(lines)[0].children[0] + list_item = tokenize_main(lines)[0].children[0] self.assertEqual(list_item.loose, False) def test_tight_list(self): lines = ["- foo\n", "\n", "# bar\n"] FileWrapper(lines) - list_item = block_token.tokenize(lines)[0].children[0] + list_item = tokenize_main(lines)[0].children[0] self.assertEqual(list_item.loose, False) class TestList(unittest.TestCase): def test_different_markers(self): lines = ["- foo\n", "* bar\n", "1. baz\n", "2) spam\n"] - l1, l2, l3, l4 = block_token.tokenize(lines) - self.assertIsInstance(l1, block_token.List) + l1, l2, l3, l4 = tokenize_main(lines) + self.assertIsInstance(l1, block_tokens.List) self.assertTrue("foo" in l1) - self.assertIsInstance(l2, block_token.List) + self.assertIsInstance(l2, block_tokens.List) self.assertTrue("bar" in l2) - self.assertIsInstance(l3, block_token.List) + self.assertIsInstance(l3, block_tokens.List) self.assertTrue("baz" in l3) - self.assertIsInstance(l4, block_token.List) + self.assertIsInstance(l4, block_tokens.List) self.assertTrue("spam" in l4) def test_sublist(self): lines = ["- foo\n", " + bar\n"] - (token,) = block_token.tokenize(lines) - self.assertIsInstance(token, block_token.List) + (token,) = tokenize_main(lines) + self.assertIsInstance(token, block_tokens.List) class TestTable(unittest.TestCase): def test_parse_align(self): - test_func = block_token.Table.parse_align + test_func = block_tokens.Table.parse_align self.assertEqual(test_func(":------"), None) self.assertEqual(test_func(":-----:"), 0) self.assertEqual(test_func("------:"), 1) def test_parse_delimiter(self): - test_func = block_token.Table.split_delimiter + test_func = block_tokens.Table.split_delimiter self.assertEqual( list(test_func("| :--- | :---: | ---:|\n")), [":---", ":---:", "---:"] ) @@ -227,92 +228,111 @@ def test_match(self): "| cell 1 | cell 2 | cell 3 |\n", "| more 1 | more 2 | more 3 |\n", ] - with patch("mistletoe.block_token.TableRow") as mock: - token = next(iter(block_token.tokenize(lines))) - self.assertIsInstance(token, block_token.Table) + with patch("mistletoe.block_tokens.TableRow") as mock: + token = next(iter(tokenize_main(lines))) + self.assertIsInstance(token, block_tokens.Table) self.assertTrue(hasattr(token, "header")) self.assertEqual(token.column_align, [None, None, None]) token.children - calls = [call(line, [None, None, None]) for line in lines[:1] + lines[2:]] + calls = [ + call.read(line, [None, None, None], lineno=l) + for line, l in zip(lines[:1] + lines[2:], [0, 2, 3]) + ] mock.assert_has_calls(calls) def test_easy_table(self): lines = ["header 1 | header 2\n", " ---: | :---\n", " cell 1 | cell 2\n"] - with patch("mistletoe.block_token.TableRow") as mock: - (token,) = block_token.tokenize(lines) - self.assertIsInstance(token, block_token.Table) + with patch("mistletoe.block_tokens.TableRow") as mock: + (token,) = tokenize_main(lines) + self.assertIsInstance(token, block_tokens.Table) self.assertTrue(hasattr(token, "header")) self.assertEqual(token.column_align, [1, None]) token.children - calls = [call(line, [1, None]) for line in lines[:1] + lines[2:]] + calls = [ + call.read(line, [1, None], lineno=l) + for line, l in zip(lines[:1] + lines[2:], [0, 2]) + ] mock.assert_has_calls(calls) def test_not_easy_table(self): lines = ["not header 1 | not header 2\n", "foo | bar\n"] - (token,) = block_token.tokenize(lines) - self.assertIsInstance(token, block_token.Paragraph) + (token,) = tokenize_main(lines) + self.assertIsInstance(token, block_tokens.Paragraph) class TestTableRow(unittest.TestCase): def test_match(self): - with patch("mistletoe.block_token.TableCell") as mock: + with patch("mistletoe.block_tokens.TableCell") as mock: line = "| cell 1 | cell 2 |\n" - token = block_token.TableRow(line) - self.assertEqual(token.row_align, [None]) - token.children - mock.assert_has_calls([call("cell 1", None), call("cell 2", None)]) + result = block_tokens.TableRow.read(line) + self.assertEqual(result.row_align, [None]) + self.assertEquals(len(result.children), 2) + mock.assert_has_calls( + [ + call.read("cell 1", None, lineno=0), + call.read("cell 2", None, lineno=0), + ] + ) def test_easy_table_row(self): - with patch("mistletoe.block_token.TableCell") as mock: + with patch("mistletoe.block_tokens.TableCell") as mock: line = "cell 1 | cell 2\n" - token = block_token.TableRow(line) - self.assertEqual(token.row_align, [None]) - token.children - mock.assert_has_calls([call("cell 1", None), call("cell 2", None)]) + result = block_tokens.TableRow.read(line) + self.assertEqual(result.row_align, [None]) + self.assertEquals(len(result.children), 2) + mock.assert_has_calls( + [ + call.read("cell 1", None, lineno=0), + call.read("cell 2", None, lineno=0), + ] + ) def test_short_row(self): - with patch("mistletoe.block_token.TableCell") as mock: + with patch("mistletoe.block_tokens.TableCell") as mock: line = "| cell 1 |\n" - token = block_token.TableRow(line, [None, None]) - self.assertEqual(token.row_align, [None, None]) - token.children - mock.assert_has_calls([call("cell 1", None), call("", None)]) + result = block_tokens.TableRow.read(line, [None, None]) + self.assertEqual(result.row_align, [None, None]) + self.assertEquals(len(result.children), 2) + mock.assert_has_calls( + [call.read("cell 1", None, lineno=0), call.read("", None, lineno=0)] + ) class TestTableCell(TestToken): def test_match(self): - token = block_token.TableCell("cell 2") + token = block_tokens.TableCell.read("cell 2", expand_spans=True) self._test_token(token, "cell 2", align=None) + assert isinstance(token, block_tokens.TableCell) -class TestFootnote(unittest.TestCase): +class TestLinkDefinition(unittest.TestCase): def test_store(self): lines = ["[key 1]: value1\n", "[key 2]: value2\n"] - token = block_token.Document(lines) + token = block_tokens.Document.read(lines) self.assertEqual( - token.footnotes, {"key 1": ("value1", ""), "key 2": ("value2", "")} + token.link_definitions, {"key 1": ("value1", ""), "key 2": ("value2", "")} ) class TestDocument(unittest.TestCase): - def test_store_footnote(self): + def test_store_link_definition(self): lines = ["[key 1]: value1\n", "[key 2]: value2\n"] - document = block_token.Document(lines) - self.assertEqual(document.footnotes["key 1"], ("value1", "")) - self.assertEqual(document.footnotes["key 2"], ("value2", "")) + document = block_tokens.Document.read(lines) + self.assertEqual(document.link_definitions["key 1"], ("value1", "")) + self.assertEqual(document.link_definitions["key 2"], ("value2", "")) def test_auto_splitlines(self): lines = "some\ncontinual\nlines\n" - document = block_token.Document(lines) - self.assertIsInstance(document.children[0], block_token.Paragraph) + document = block_tokens.Document.read(lines) + self.assertIsInstance(document.children[0], block_tokens.Paragraph) self.assertEqual(len(document.children), 1) class TestThematicBreak(unittest.TestCase): def test_match(self): def test_case(line): - token = next(iter(block_token.tokenize([line]))) - self.assertIsInstance(token, block_token.ThematicBreak) + token = next(iter(tokenize_main([line]))) + self.assertIsInstance(token, block_tokens.ThematicBreak) cases = ["---\n", "* * *\n", "_ _ _\n"] for case in cases: @@ -322,7 +342,7 @@ def test_case(line): class TestContains(unittest.TestCase): def test_contains(self): lines = ["# heading\n", "\n", "paragraph\n", "with\n", "`code`\n"] - token = block_token.Document(lines) + token = block_tokens.Document.read(lines) self.assertTrue("heading" in token) self.assertTrue("code" in token) self.assertFalse("foo" in token) diff --git a/test/test_ci.sh b/test/test_ci.sh deleted file mode 100755 index 29678d3..0000000 --- a/test/test_ci.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function main { - if [[ "$1" == "" ]]; then - echo "[Error] Specify how far you want to go back." - exit 1 - fi - - CURR_BRANCH="$(get_current_branch)" - - git checkout --quiet HEAD~$1 - render_to_file "out2.html" - OLD_SHA=$(get_sha "out2.html") - - git checkout --quiet "$CURR_BRANCH" - render_to_file "out.html" - NEW_SHA=$(get_sha "out2.html") - - if [[ "$OLD_SHA" == "$NEW_SHA" ]]; then - cleanup - else - get_diff - fi -} - -function get_current_branch { - git rev-parse --abbrev-ref HEAD -} - -function render_to_file { - python3 -m mistletoe "test/samples/syntax.md" > "$1" -} - -function get_sha { - md5 -q "$1" -} - -function cleanup { - echo "All good." - rm out2.html -} - -function get_diff { - echo "Diff exits; prompting for review..." - diff out.html out2.html | view - -} - -main $1 diff --git a/test/test_contrib/test_github_wiki.py b/test/test_contrib/test_github_wiki.py index 2b90a70..300872e 100644 --- a/test/test_contrib/test_github_wiki.py +++ b/test/test_contrib/test_github_wiki.py @@ -1,31 +1,29 @@ from unittest import TestCase, mock -from mistletoe import span_token, Document -from mistletoe.span_token import tokenize_inner, _token_types +from mistletoe.span_tokenizer import tokenize_span +from mistletoe.parse_context import get_parse_context from contrib.github_wiki import GithubWiki, GithubWikiRenderer class TestGithubWiki(TestCase): def setUp(self): - span_token._root_node = Document([]) self.renderer = GithubWikiRenderer() self.renderer.__enter__() self.addCleanup(self.renderer.__exit__, None, None, None) def test_parse(self): - MockRawText = mock.Mock(autospec="mistletoe.span_token.RawText") - RawText = _token_types.value.pop() - _token_types.value.append(MockRawText) + MockRawText = mock.Mock(autospec="mistletoe.span_tokens.RawText") + RawText = get_parse_context().span_tokens.pop() + get_parse_context().span_tokens.append(MockRawText) try: - tokens = tokenize_inner("text with [[wiki | target]]") + tokens = tokenize_span("text with [[wiki | target]]") token = tokens[1] self.assertIsInstance(token, GithubWiki) self.assertEqual(token.target, "target") - # TODO this assert is failing if part of a full pytest run only - # MockRawText.assert_has_calls([mock.call('text with '), mock.call('wiki')]) + MockRawText.assert_has_calls([mock.call("text with "), mock.call("wiki")]) finally: - _token_types.value[-1] = RawText + get_parse_context().span_tokens[-1] = RawText def test_render(self): - token = next(iter(tokenize_inner("[[wiki|target]]"))) + token = next(iter(tokenize_span("[[wiki|target]]"))) output = 'wiki' self.assertEqual(self.renderer.render(token), output) diff --git a/test/test_contrib/test_jira_renderer.py b/test/test_contrib/test_jira_renderer.py index 6d9a67d..132291a 100644 --- a/test/test_contrib/test_jira_renderer.py +++ b/test/test_contrib/test_jira_renderer.py @@ -21,7 +21,7 @@ # SOFTWARE. from unittest import TestCase -from mistletoe.span_token import tokenize_inner +from mistletoe.span_tokenizer import tokenize_span from contrib.jira_renderer import JIRARenderer import random import string @@ -43,7 +43,7 @@ def genRandomString(self, n, hasWhitespace=False): def textFormatTest(self, inputTemplate, outputTemplate): input = self.genRandomString(80, False) - token = next(iter(tokenize_inner(inputTemplate.format(input)))) + token = next(iter(tokenize_span(inputTemplate.format(input)))) expected = outputTemplate.format(input) actual = self.renderer.render(token) self.assertEqual(expected, actual) @@ -61,13 +61,13 @@ def test_render_strikethrough(self): self.textFormatTest("-{}-", "-{}-") def test_render_image(self): - token = next(iter(tokenize_inner("![image](foo.jpg)"))) + token = next(iter(tokenize_span("![image](foo.jpg)"))) expected = "!foo.jpg!" actual = self.renderer.render(token) self.assertEqual(expected, actual) - def test_render_footnote_image(self): - # token = next(tokenize_inner('![image]\n\n[image]: foo.jpg')) + def test_render_link_definition_image(self): + # token = next(tokenize_span('![image]\n\n[image]: foo.jpg')) # expected = '!foo.jpg!' # actual = self.renderer.render(token) # self.assertEqual(expected, actual) @@ -78,19 +78,19 @@ def test_render_link(self): self.genRandomString(5), self.genRandomString(5), self.genRandomString(3) ) body = self.genRandomString(80, True) - token = next(iter(tokenize_inner("[{body}]({url})".format(url=url, body=body)))) + token = next(iter(tokenize_span("[{body}]({url})".format(url=url, body=body)))) expected = "[{body}|{url}]".format(url=url, body=body) actual = self.renderer.render(token) self.assertEqual(expected, actual) - def test_render_footnote_link(self): + def test_render_link_definition(self): pass def test_render_auto_link(self): url = "http://{0}.{1}.{2}".format( self.genRandomString(5), self.genRandomString(5), self.genRandomString(3) ) - token = next(iter(tokenize_inner("<{url}>".format(url=url)))) + token = next(iter(tokenize_span("<{url}>".format(url=url)))) expected = "[{url}]".format(url=url) actual = self.renderer.render(token) self.assertEqual(expected, actual) diff --git a/test/test_contrib/test_mathjax.py b/test/test_contrib/test_mathjax.py index 1b3b635..3ba58cf 100644 --- a/test/test_contrib/test_mathjax.py +++ b/test/test_contrib/test_mathjax.py @@ -11,7 +11,7 @@ class TestMathJaxRenderer(unittest.TestCase): def test_render_html(self): with MathJaxRenderer() as renderer: - token = Document(["# heading 1\n", "paragraph\n"]) + token = Document.read(["# heading 1\n", "paragraph\n"]) output = renderer.render(token) target = "

heading 1

\n

paragraph

\n" target += self.mathjax_src @@ -20,7 +20,7 @@ def test_render_html(self): def test_render_math(self): with MathJaxRenderer() as renderer: raw = ["# heading 1\n", "$$paragraph$$\n", "with $ math $\n"] - token = Document(raw) + token = Document.read(raw) output = renderer.render(token) target = "

heading 1

\n

$$paragraph$$\nwith $$ math $$

\n" target += self.mathjax_src diff --git a/test/test_contrib/test_toc_renderer.py b/test/test_contrib/test_toc_renderer.py index 5b5bd5f..217de24 100644 --- a/test/test_contrib/test_toc_renderer.py +++ b/test/test_contrib/test_toc_renderer.py @@ -1,5 +1,6 @@ from unittest import TestCase, mock -from mistletoe.block_token import Document, Heading +from mistletoe.block_tokens import Document, Heading +from mistletoe.block_tokenizer import FileWrapper from contrib.toc_renderer import TOCRenderer @@ -12,19 +13,19 @@ def test_parse_rendered_heading(self): def test_render_heading(self): renderer = TOCRenderer() Heading.start("### some *text*\n") - token = Heading(Heading.read(iter(["foo"]))) + token = Heading.read(FileWrapper(["foo"]), expand_spans=True) renderer.render_heading(token) self.assertEqual(renderer._headings[0], (3, "some text")) def test_depth(self): renderer = TOCRenderer(depth=3) - token = Document(["# title\n", "## heading\n", "#### heading\n"]) + token = Document.read(["# title\n", "## heading\n", "#### heading\n"]) renderer.render(token) self.assertEqual(renderer._headings, [(2, "heading")]) def test_omit_title(self): renderer = TOCRenderer(omit_title=True) - token = Document(["# title\n", "\n", "## heading\n"]) + token = Document.read(["# title\n", "\n", "## heading\n"]) renderer.render(token) self.assertEqual(renderer._headings, [(2, "heading")]) @@ -36,13 +37,13 @@ def test_filter_conditions(self): lambda x: re.match(r"foo", x), ] renderer = TOCRenderer(filter_conds=filter_conds) - token = Document( + token = Document.read( ["# title\n", "\n", "## heading\n", "\n", "#### not heading\n"] ) renderer.render(token) self.assertEqual(renderer._headings, [(4, "not heading")]) - @mock.patch("mistletoe.block_token.List") + @mock.patch("mistletoe.block_tokens.List") def test_get_toc(self, MockList): headings = [ (1, "heading 1"), diff --git a/test/test_latex_token.py b/test/test_latex_token.py index 17c5955..f1c0f21 100644 --- a/test/test_latex_token.py +++ b/test/test_latex_token.py @@ -1,7 +1,7 @@ import unittest -from mistletoe.span_token import tokenize_inner +from mistletoe.span_tokenizer import tokenize_span from mistletoe.latex_token import Math -from mistletoe.latex_renderer import LaTeXRenderer +from mistletoe.renderers.latex import LaTeXRenderer class TestLaTeXToken(unittest.TestCase): @@ -11,6 +11,6 @@ def setUp(self): self.addCleanup(self.renderer.__exit__, None, None, None) def test_span(self): - token = next(iter(tokenize_inner("$ 1 + 2 = 3 $"))) + token = next(iter(tokenize_span("$ 1 + 2 = 3 $"))) self.assertIsInstance(token, Math) self.assertEqual(token.content, "$ 1 + 2 = 3 $") diff --git a/test/test_core_tokens.py b/test/test_nested_tokenizer.py similarity index 97% rename from test/test_core_tokens.py rename to test/test_nested_tokenizer.py index a3eb336..2edc31e 100644 --- a/test/test_core_tokens.py +++ b/test/test_nested_tokenizer.py @@ -1,5 +1,5 @@ from unittest import TestCase -from mistletoe.core_tokens import ( +from mistletoe.nested_tokenizer import ( MatchObj, Delimiter, follows, @@ -11,7 +11,7 @@ ) -class TestCoreTokens(TestCase): +class TestNestedTokenizer(TestCase): def test_match_obj(self): match = MatchObj(0, 2, (0, 1, "a"), (1, 2, "b")) self.assertEqual(match.start(), 0) diff --git a/test/test_html_renderer.py b/test/test_renderers/test_html_renderer.py similarity index 74% rename from test/test_html_renderer.py rename to test/test_renderers/test_html_renderer.py index 945914b..f625b9d 100644 --- a/test/test_html_renderer.py +++ b/test/test_renderers/test_html_renderer.py @@ -1,5 +1,6 @@ from unittest import TestCase, mock -from mistletoe.html_renderer import HTMLRenderer +from mistletoe.renderers.html import HTMLRenderer +from mistletoe import Document class TestRenderer(TestCase): @@ -29,9 +30,9 @@ def test_emphasis(self): self._test_token("Emphasis", "inner") def test_inline_code(self): - from mistletoe.span_token import tokenize_inner + from mistletoe.span_tokenizer import tokenize_span - rendered = self.renderer.render(tokenize_inner("`foo`")[0]) + rendered = self.renderer.render(tokenize_span("`foo`")[0]) self.assertEqual(rendered, "foo") def test_strikethrough(self): @@ -74,29 +75,29 @@ def test_paragraph(self): self._test_token("Paragraph", "

inner

") def test_block_code(self): - from mistletoe.block_token import tokenize + from mistletoe.block_tokenizer import tokenize_main - rendered = self.renderer.render(tokenize(["```sh\n", "foo\n", "```\n"])[0]) + rendered = self.renderer.render(tokenize_main(["```sh\n", "foo\n", "```\n"])[0]) output = '
foo\n
' self.assertEqual(rendered, output) def test_block_code_no_language(self): - from mistletoe.block_token import tokenize + from mistletoe.block_tokenizer import tokenize_main - rendered = self.renderer.render(tokenize(["```\n", "foo\n", "```\n"])[0]) + rendered = self.renderer.render(tokenize_main(["```\n", "foo\n", "```\n"])[0]) output = "
foo\n
" self.assertEqual(rendered, output) def test_list(self): output = "
    \n\n
" - self._test_token("List", output, start=None) + self._test_token("List", output, start_at=None) def test_list_item(self): output = "
  • " self._test_token("ListItem", output) def test_table_with_header(self): - func_path = "mistletoe.html_renderer.HTMLRenderer.render_table_row" + func_path = "mistletoe.renderers.html.HTMLRenderer.render_table_row" with mock.patch(func_path, autospec=True) as mock_func: mock_func.return_value = "row" output = ( @@ -108,7 +109,7 @@ def test_table_with_header(self): self._test_token("Table", output) def test_table_without_header(self): - func_path = "mistletoe.html_renderer.HTMLRenderer.render_table_row" + func_path = "mistletoe.renderers.html.HTMLRenderer.render_table_row" with mock.patch(func_path, autospec=True) as mock_func: mock_func.return_value = "row" output = "\n\ninner\n
    " @@ -140,25 +141,35 @@ def test_line_break(self): self._test_token("LineBreak", "
    \n", children=False, soft=False) def test_document(self): - self._test_token("Document", "", footnotes={}) + self._test_token("Document", "", link_definitions={}) -class TestHTMLRendererFootnotes(TestCase): +class TestHTMLRendererLinkDefinitions(TestCase): def setUp(self): self.renderer = HTMLRenderer() self.renderer.__enter__() self.addCleanup(self.renderer.__exit__, None, None, None) - def test_footnote_image(self): + def test_link_definition_image(self): from mistletoe import Document - token = Document(["![alt][foo]\n", "\n", '[foo]: bar "title"\n']) + token = Document.read(["![alt][foo]\n", "\n", '[foo]: bar "title"\n']) output = '

    alt

    \n' self.assertEqual(self.renderer.render(token), output) - def test_footnote_link(self): - from mistletoe import Document + def test_link_definition(self): + token = Document.read(["[name][foo]\n", "\n", "[foo]: target\n"]) + output = '

    name

    \n' + self.assertEqual(self.renderer.render(token), output) - token = Document(["[name][foo]\n", "\n", "[foo]: target\n"]) + def test_link_definition_1st(self): + token = Document.read(["[foo]: target\n", "\n", "[name][foo]\n"]) output = '

    name

    \n' self.assertEqual(self.renderer.render(token), output) + + def test_link_definition_2reads(self): + """The link definitions should not persist between parses.""" + token = Document.read(["[name][foo]\n", "\n", "[foo]: target\n"]) + token = Document.read(["[name][foo]\n", "\n"]) + output = "

    [name][foo]

    \n" + self.assertEqual(self.renderer.render(token), output) diff --git a/test/test_renderers/test_json_renderer.py b/test/test_renderers/test_json_renderer.py new file mode 100644 index 0000000..d323dcf --- /dev/null +++ b/test/test_renderers/test_json_renderer.py @@ -0,0 +1,103 @@ +from textwrap import dedent + +from mistletoe import Document +from mistletoe.renderers.json import ast_to_json, JsonRenderer +from mistletoe.latex_token import Math + + +def test_basic(data_regression): + doc = Document.read( + dedent( + """\ + --- + a: 1 + --- + + Setext Header + ============== + + # Atx Header + + __*nested strong emphasis*__ + + + - unordered list + + 1. ordered list + + > quote *emphasis* + + [link][ref] + + [ref]: abc "xyz" + + ```python + code = 1 + ``` + + block + quote + + --- + + a | b + --- | ---: + 1 | 2 + + """ + ), + store_definitions=True, + front_matter=True, + ) + output = ast_to_json(doc) + data_regression.check(output) + + +def test_link_references(data_regression): + doc = Document.read(["[bar][baz]\n", "\n", "[baz]: spam\n"], store_definitions=True) + output = ast_to_json(doc) + data_regression.check(output) + + +def test_extra_tokens(): + """Extra tokens should persist between multiple calls of the same renderer, + but be reset if initiating a new renderer. + """ + output = { + "type": "Document", + "front_matter": None, + "link_definitions": {}, + "children": [ + { + "type": "Paragraph", + "children": [{"type": "RawText", "content": "$b$"}], + "position": [1, 1], + } + ], + } + output_math = { + "type": "Document", + "front_matter": None, + "link_definitions": {}, + "children": [ + { + "type": "Paragraph", + "children": [{"type": "Math", "content": "$b$"}], + "position": [1, 1], + } + ], + } + + with JsonRenderer() as render: + output = render.render(Document.read(["$b$"]), as_string=False) + assert output == output + renderer = JsonRenderer(Math) + with renderer as render: + output = render.render(Document.read(["$b$"]), as_string=False) + assert output == output_math + with renderer as render: + output = render.render(Document.read(["$b$"]), as_string=False) + assert output == output_math + with JsonRenderer() as render: + output = render.render(Document.read(["$b$"]), as_string=False) + assert output == output diff --git a/test/test_renderers/test_json_renderer/test_basic.yml b/test/test_renderers/test_json_renderer/test_basic.yml new file mode 100644 index 0000000..ba9e711 --- /dev/null +++ b/test/test_renderers/test_json_renderer/test_basic.yml @@ -0,0 +1,207 @@ +children: +- children: + - content: Setext Header + type: RawText + level: 1 + position: + - 5 + - 6 + type: SetextHeading +- children: + - content: Atx Header + type: RawText + level: 1 + position: + - 8 + - 8 + type: Heading +- children: + - children: + - children: + - content: nested strong emphasis + type: RawText + type: Emphasis + type: Strong + - content: '' + soft: true + type: LineBreak + - content: + type: RawText + position: + - 10 + - 11 + type: Paragraph +- children: + - children: + - children: + - content: unordered list + type: RawText + position: + - 13 + - 13 + type: Paragraph + leader: '-' + loose: false + next_marker: + - 3 + - '1.' + position: + - 12 + - 14 + prepend: 2 + type: ListItem + loose: false + position: + - 12 + - 14 + start_at: null + type: List +- children: + - children: + - children: + - content: ordered list + type: RawText + position: + - 15 + - 15 + type: Paragraph + leader: '1.' + loose: false + next_marker: null + position: + - 14 + - 15 + prepend: 3 + type: ListItem + loose: false + position: + - 14 + - 15 + start_at: 1 + type: List +- children: + - children: + - content: 'quote ' + type: RawText + - children: + - content: emphasis + type: RawText + type: Emphasis + position: + - 18 + - 18 + type: Paragraph + position: + - 17 + - 17 + type: Quote +- children: + - children: + - content: link + type: RawText + target: abc + title: xyz + type: Link + position: + - 19 + - 19 + type: Paragraph +- position: + - 21 + - 21 + type: LinkDefinition +- children: + - content: 'code = 1 + + ' + type: RawText + language: python + position: + - 23 + - 25 + type: CodeFence +- children: + - content: 'block + + quote + + ' + type: RawText + language: '' + position: + - 27 + - 28 + type: BlockCode +- position: + - 30 + - 30 + type: ThematicBreak +- children: + - children: + - align: null + children: + - content: '1' + type: RawText + position: + - 33 + - 33 + type: TableCell + - align: 1 + children: + - content: '2' + type: RawText + position: + - 33 + - 33 + type: TableCell + position: + - 33 + - 33 + row_align: + - null + - 1 + type: TableRow + column_align: + - null + - 1 + header: + children: + - align: null + children: + - content: a + type: RawText + position: + - 31 + - 31 + type: TableCell + - align: 1 + children: + - content: b + type: RawText + position: + - 31 + - 31 + type: TableCell + position: + - 31 + - 31 + row_align: + - null + - 1 + type: TableRow + position: + - 31 + - 34 + type: Table +front_matter: + content: 'a: 1 + + ' + position: + - 0 + - 3 +link_definitions: + ref: + - abc + - xyz +type: Document diff --git a/test/test_renderers/test_json_renderer/test_link_references.yml b/test/test_renderers/test_json_renderer/test_link_references.yml new file mode 100644 index 0000000..5dc8127 --- /dev/null +++ b/test/test_renderers/test_json_renderer/test_link_references.yml @@ -0,0 +1,22 @@ +children: +- children: + - children: + - content: bar + type: RawText + target: spam + title: '' + type: Link + position: + - 1 + - 1 + type: Paragraph +- position: + - 3 + - 3 + type: LinkDefinition +front_matter: null +link_definitions: + baz: + - spam + - '' +type: Document diff --git a/test/test_latex_renderer.py b/test/test_renderers/test_latex_renderer.py similarity index 87% rename from test/test_latex_renderer.py rename to test/test_renderers/test_latex_renderer.py index 488db2d..d8d4c4f 100644 --- a/test/test_latex_renderer.py +++ b/test/test_renderers/test_latex_renderer.py @@ -1,5 +1,5 @@ from unittest import TestCase, mock -from mistletoe.latex_renderer import LaTeXRenderer +from mistletoe.renderers.latex import LaTeXRenderer class TestLaTeXRenderer(TestCase): @@ -65,20 +65,20 @@ def test_paragraph(self): self._test_token("Paragraph", output) def test_block_code(self): - func_path = "mistletoe.latex_renderer.LaTeXRenderer.render_raw_text" + func_path = "mistletoe.renderers.latex.LaTeXRenderer.render_raw_text" with mock.patch(func_path, return_value="inner"): output = "\n\\begin{lstlisting}[language=sh]\ninner\\end{lstlisting}\n" self._test_token("BlockCode", output, language="sh") def test_list(self): output = "\\begin{itemize}\ninner\\end{itemize}\n" - self._test_token("List", output, start=None) + self._test_token("List", output, start_at=None) def test_list_item(self): self._test_token("ListItem", "\\item inner\n") def test_table_with_header(self): - func_path = "mistletoe.latex_renderer.LaTeXRenderer.render_table_row" + func_path = "mistletoe.renderers.latex.LaTeXRenderer.render_table_row" with mock.patch(func_path, autospec=True, return_value="row\n"): output = "\\begin{tabular}{l c r}\nrow\n\\hline\ninner\\end{tabular}\n" self._test_token("Table", output, column_align=[None, 0, 1]) @@ -106,16 +106,16 @@ def test_document(self): "inner" "\\end{document}\n" ) - self._test_token("Document", output, footnotes={}) + self._test_token("Document", output, link_definitions={}) -class TestLaTeXFootnotes(TestCase): +class TestLaTeXlink_definitions(TestCase): def setUp(self): self.renderer = LaTeXRenderer() self.renderer.__enter__() self.addCleanup(self.renderer.__exit__, None, None, None) - def test_footnote_image(self): + def test_link_definition_image(self): from mistletoe import Document raw = ["![alt][foo]\n", "\n", '[foo]: bar "title"\n'] @@ -128,9 +128,9 @@ def test_footnote_image(self): "\n" "\\end{document}\n" ) - self.assertEqual(self.renderer.render(Document(raw)), target) + self.assertEqual(self.renderer.render(Document.read(raw)), target) - def test_footnote_link(self): + def test_link_definition(self): from mistletoe import Document raw = ["[name][key]\n", "\n", "[key]: target\n"] @@ -143,4 +143,4 @@ def test_footnote_link(self): "\n" "\\end{document}\n" ) - self.assertEqual(self.renderer.render(Document(raw)), target) + self.assertEqual(self.renderer.render(Document.read(raw)), target) diff --git a/test/test_samples/__init__.py b/test/test_samples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/benchmark.py b/test/test_samples/benchmark.py similarity index 90% rename from test/benchmark.py rename to test/test_samples/benchmark.py index c166a94..fc0aabb 100644 --- a/test/benchmark.py +++ b/test/test_samples/benchmark.py @@ -6,7 +6,7 @@ from time import perf_counter -TEST_FILE = "test/samples/syntax.md" +TEST_FILE = "syntax.md" TIMES = 1000 @@ -16,7 +16,7 @@ def inner(): try: package = import_module(package_name) except ImportError: - return "not available." + return f"not available: '{package_name}'." start = perf_counter() for i in range(TIMES): @@ -33,7 +33,7 @@ def inner(): @benchmark("markdown") def run_markdown(package): with open(TEST_FILE, "r") as fin: - return package.markdown(fin.read(), ["extra"]) + return package.markdown(fin.read(), extensions=["extra"]) @benchmark("mistune") @@ -42,7 +42,7 @@ def run_mistune(package): return package.markdown(fin.read()) -@benchmark("CommonMark") +@benchmark("commonmark") def run_commonmark(package): with open(TEST_FILE, "r") as fin: return package.commonmark(fin.read()) diff --git a/test/samples/jquery.md b/test/test_samples/jquery.md similarity index 100% rename from test/samples/jquery.md rename to test/test_samples/jquery.md diff --git a/test/samples/syntax.md b/test/test_samples/syntax.md similarity index 99% rename from test/samples/syntax.md rename to test/test_samples/syntax.md index 0a5d63d..eedf163 100644 --- a/test/samples/syntax.md +++ b/test/test_samples/syntax.md @@ -254,7 +254,7 @@ wrap the text and put a `>` before every line: > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - > + > > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse > id sem consectetuer libero luctus adipiscing. @@ -281,12 +281,12 @@ Blockquotes can contain other Markdown elements, including headers, lists, and code blocks: > ## This is a header. - > + > > 1. This is the first list item. > 2. This is the second list item. - > + > > Here's some example code: - > + > > return shell_exec("echo $input | $markdown_script"); Any decent text editor should make email-style quoting easy. For @@ -567,7 +567,7 @@ Will produce: If you're referring to a local resource on the same server, you can use relative paths: - See my [About](/about/) page for details. + See my [About](/about/) page for details. Reference-style links use a second set of square brackets, inside which you place a label of your choosing to identify the link: @@ -641,7 +641,7 @@ multiple words in the link text: Visit [Daring Fireball][] for more information. And then define the link: - + [Daring Fireball]: http://daringfireball.net/ Link definitions can be placed anywhere in your Markdown document. I @@ -765,13 +765,13 @@ one after the opening, one before the closing. This allows you to place literal backtick characters at the beginning or end of a code span: A single backtick in a code span: `` ` `` - + A backtick-delimited string in a code span: `` `foo` `` will produce:

    A single backtick in a code span: `

    - +

    A backtick-delimited string in a code span: `foo`

    With a code span, ampersands and angle brackets are encoded as HTML @@ -842,7 +842,7 @@ use regular HTML `` tags. Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: - + Markdown will turn this into: http://example.com/ @@ -894,4 +894,3 @@ Markdown provides backslash escapes for the following characters: - minus sign (hyphen) . dot ! exclamation mark - diff --git a/test/test_samples/test_samples.py b/test/test_samples/test_samples.py new file mode 100644 index 0000000..c8015cb --- /dev/null +++ b/test/test_samples/test_samples.py @@ -0,0 +1,15 @@ +import os + +from mistletoe import markdown + +PATH = os.path.dirname(__file__) + + +def test_syntax(file_regression): + with open(os.path.join(PATH, "syntax.md")) as handle: + file_regression.check(markdown(handle.read()), extension=".html") + + +def test_jquery(file_regression): + with open(os.path.join(PATH, "jquery.md")) as handle: + file_regression.check(markdown(handle.read()), extension=".html") diff --git a/test/test_samples/test_samples/test_jquery.html b/test/test_samples/test_samples/test_jquery.html new file mode 100644 index 0000000..261963f --- /dev/null +++ b/test/test_samples/test_samples/test_jquery.html @@ -0,0 +1,228 @@ +

    jQuery — New Wave JavaScript

    +

    Contribution Guides

    +

    In the spirit of open source software development, jQuery always encourages community code contribution. To help you get started and before you jump into writing code, be sure to read these important contribution guidelines thoroughly:

    +
      +
    1. Getting Involved
    2. +
    3. Core Style Guide
    4. +
    5. Writing Code for jQuery Foundation Projects
    6. +
    +

    Environments in which to use jQuery

    +
      +
    • Browser support
    • +
    • jQuery also supports Node, browser extensions, and other non-browser environments.
    • +
    +

    What you need to build your own jQuery

    +

    In order to build jQuery, you need to have the latest Node.js/npm and git 1.7 or later. Earlier versions might work, but are not supported.

    +

    For Windows, you have to download and install git and Node.js.

    +

    OS X users should install Homebrew. Once Homebrew is installed, run brew install git to install git, +and brew install node to install Node.js.

    +

    Linux/BSD users should use their appropriate package managers to install git and Node.js, or build from source +if you swing that way. Easy-peasy.

    +

    How to build your own jQuery

    +

    Clone a copy of the main jQuery git repo by running:

    +
    git clone git://github.com/jquery/jquery.git
    +
    +

    Enter the jquery directory and run the build script:

    +
    cd jquery && npm run build
    +
    +

    The built version of jQuery will be put in the dist/ subdirectory, along with the minified copy and associated map file.

    +

    If you want to create custom build or help with jQuery development, it would be better to install grunt command line interface as a global package:

    +
    npm install -g grunt-cli
    +
    +

    Make sure you have grunt installed by testing:

    +
    grunt -V
    +
    +

    Now by running the grunt command, in the jquery directory, you can build a full version of jQuery, just like with an npm run build command:

    +
    grunt
    +
    +

    There are many other tasks available for jQuery Core:

    +
    grunt -help
    +
    +

    Modules

    +

    Special builds can be created that exclude subsets of jQuery functionality. +This allows for smaller custom builds when the builder is certain that those parts of jQuery are not being used. +For example, an app that only used JSONP for $.ajax() and did not need to calculate offsets or positions of elements could exclude the offset and ajax/xhr modules.

    +

    Any module may be excluded except for core, and selector. To exclude a module, pass its path relative to the src folder (without the .js extension).

    +

    Some example modules that can be excluded are:

    +
      +
    • ajax: All AJAX functionality: $.ajax(), $.get(), $.post(), $.ajaxSetup(), .load(), transports, and ajax event shorthands such as .ajaxStart().
    • +
    • ajax/xhr: The XMLHTTPRequest AJAX transport only.
    • +
    • ajax/script: The <script> AJAX transport only; used to retrieve scripts.
    • +
    • ajax/jsonp: The JSONP AJAX transport only; depends on the ajax/script transport.
    • +
    • css: The .css() method. Also removes all modules depending on css (including effects, dimensions, and offset).
    • +
    • css/showHide: Non-animated .show(), .hide() and .toggle(); can be excluded if you use classes or explicit .css() calls to set the display property. Also removes the effects module.
    • +
    • deprecated: Methods documented as deprecated but not yet removed.
    • +
    • dimensions: The .width() and .height() methods, including inner- and outer- variations.
    • +
    • effects: The .animate() method and its shorthands such as .slideUp() or .hide("slow").
    • +
    • event: The .on() and .off() methods and all event functionality. Also removes event/alias.
    • +
    • event/alias: All event attaching/triggering shorthands like .click() or .mouseover().
    • +
    • event/focusin: Cross-browser support for the focusin and focusout events.
    • +
    • event/trigger: The .trigger() and .triggerHandler() methods. Used by alias and focusin modules.
    • +
    • offset: The .offset(), .position(), .offsetParent(), .scrollLeft(), and .scrollTop() methods.
    • +
    • wrap: The .wrap(), .wrapAll(), .wrapInner(), and .unwrap() methods.
    • +
    • core/ready: Exclude the ready module if you place your scripts at the end of the body. Any ready callbacks bound with jQuery() will simply be called immediately. However, jQuery(document).ready() will not be a function and .on("ready", ...) or similar will not be triggered.
    • +
    • deferred: Exclude jQuery.Deferred. This also removes jQuery.Callbacks. Note that modules that depend on jQuery.Deferred(AJAX, effects, core/ready) will not be removed and will still expect jQuery.Deferred to be there. Include your own jQuery.Deferred implementation or exclude those modules as well (grunt custom:-deferred,-ajax,-effects,-core/ready).
    • +
    • exports/global: Exclude the attachment of global jQuery variables ($ and jQuery) to the window.
    • +
    • exports/amd: Exclude the AMD definition.
    • +
    +

    As a special case, you may also replace Sizzle by using a special flag grunt custom:-sizzle.

    +
      +
    • sizzle: The Sizzle selector engine. When this module is excluded, it is replaced by a rudimentary selector engine based on the browser's querySelectorAll method that does not support jQuery selector extensions or enhanced semantics. See the selector-native.js file for details.
    • +
    +

    Note: Excluding Sizzle will also exclude all jQuery selector extensions (such as effects/animatedSelector and css/hiddenVisibleSelectors).

    +

    The build process shows a message for each dependent module it excludes or includes.

    +
    AMD name
    +

    As an option, you can set the module name for jQuery's AMD definition. By default, it is set to "jquery", which plays nicely with plugins and third-party libraries, but there may be cases where you'd like to change this. Simply set the "amd" option:

    +
    grunt custom --amd="custom-name"
    +
    +

    Or, to define anonymously, set the name to an empty string.

    +
    grunt custom --amd=""
    +
    +

    Custom Build Examples

    +

    To create a custom build, first check out the version:

    +
    git pull; git checkout VERSION
    +
    +

    Where VERSION is the version you want to customize. Then, make sure all Node dependencies are installed:

    +
    npm install
    +
    +

    Create the custom build using the grunt custom option, listing the modules to be excluded.

    +

    Exclude all ajax functionality:

    +
    grunt custom:-ajax
    +
    +

    Excluding css removes modules depending on CSS: effects, offset, dimensions.

    +
    grunt custom:-css
    +
    +

    Exclude a bunch of modules:

    +
    grunt custom:-ajax,-css,-deprecated,-dimensions,-effects,-event/alias,-offset,-wrap
    +
    +

    For questions or requests regarding custom builds, please start a thread on the Developing jQuery Core section of the forum. Due to the combinatorics and custom nature of these builds, they are not regularly tested in jQuery's unit test process. The non-Sizzle selector engine currently does not pass unit tests because it is missing too much essential functionality.

    +

    Running the Unit Tests

    +

    Make sure you have the necessary dependencies:

    +
    npm install
    +
    +

    Start grunt watch or npm start to auto-build jQuery as you work:

    +
    grunt watch
    +
    +

    Run the unit tests with a local server that supports PHP. Ensure that you run the site from the root directory, not the "test" directory. No database is required. Pre-configured php local servers are available for Windows and Mac. Here are some options:

    + +

    Building to a different directory

    +

    To copy the built jQuery files from /dist to another directory:

    +
    grunt && grunt dist:/path/to/special/location/
    +
    +

    With this example, the output files would be:

    +
    /path/to/special/location/jquery.js
    +/path/to/special/location/jquery.min.js
    +
    +

    To add a permanent copy destination, create a file in dist/ called ".destination.json". Inside the file, paste and customize the following:

    +
    
    +{
    +  "/Absolute/path/to/other/destination": true
    +}
    +
    +

    Additionally, both methods can be combined.

    +

    Essential Git

    +

    As the source code is handled by the Git version control system, it's useful to know some features used.

    +

    Cleaning

    +

    If you want to purge your working directory back to the status of upstream, the following commands can be used (remember everything you've worked on is gone after these):

    +
    git reset --hard upstream/master
    +git clean -fdx
    +
    +

    Rebasing

    +

    For feature/topic branches, you should always use the --rebase flag to git pull, or if you are usually handling many temporary "to be in a github pull request" branches, run the following to automate this:

    +
    git config branch.autosetuprebase local
    +
    +

    (see man git-config for more information)

    +

    Handling merge conflicts

    +

    If you're getting merge conflicts when merging, instead of editing the conflicted files manually, you can use the feature +git mergetool. Even though the default tool xxdiff looks awful/old, it's rather useful.

    +

    The following are some commands that can be used there:

    +
      +
    • Ctrl + Alt + M - automerge as much as possible
    • +
    • b - jump to next merge conflict
    • +
    • s - change the order of the conflicted lines
    • +
    • u - undo a merge
    • +
    • left mouse button - mark a block to be the winner
    • +
    • middle mouse button - mark a line to be the winner
    • +
    • Ctrl + S - save
    • +
    • Ctrl + Q - quit
    • +
    +

    QUnit Reference

    +

    Test methods

    +
    expect( numAssertions );
    +stop();
    +start();
    +
    +

    Note: QUnit's eventual addition of an argument to stop/start is ignored in this test suite so that start and stop can be passed as callbacks without worrying about their parameters.

    +

    Test assertions

    +
    ok( value, [message] );
    +equal( actual, expected, [message] );
    +notEqual( actual, expected, [message] );
    +deepEqual( actual, expected, [message] );
    +notDeepEqual( actual, expected, [message] );
    +strictEqual( actual, expected, [message] );
    +notStrictEqual( actual, expected, [message] );
    +throws( block, [expected], [message] );
    +
    +

    Test Suite Convenience Methods Reference (See test/data/testinit.js)

    +

    Returns an array of elements with the given IDs

    +
    q( ... );
    +
    +

    Example:

    +
    q("main", "foo", "bar");
    +
    +=> [ div#main, span#foo, input#bar ]
    +
    +

    Asserts that a selection matches the given IDs

    +
    t( testName, selector, [ "array", "of", "ids" ] );
    +
    +

    Example:

    +
    t("Check for something", "//[a]", ["foo", "bar"]);
    +
    +

    Fires a native DOM event without going through jQuery

    +
    fireNative( node, eventType )
    +
    +

    Example:

    +
    fireNative( jQuery("#elem")[0], "click" );
    +
    +

    Add random number to url to stop caching

    +
    url( "some/url.php" );
    +
    +

    Example:

    +
    url("data/test.html");
    +
    +=> "data/test.html?10538358428943"
    +
    +
    +url("data/test.php?foo=bar");
    +
    +=> "data/test.php?foo=bar&10538358345554"
    +
    +

    Run tests in an iframe

    +

    Some tests may require a document other than the standard test fixture, and +these can be run in a separate iframe. The actual test code and assertions +remain in jQuery's main test files; only the minimal test fixture markup +and setup code should be placed in the iframe file.

    +
    testIframe( testName, fileName,
    +  function testCallback(
    +      assert, jQuery, window, document,
    +	  [ additional args ] ) {
    +	...
    +  } );
    +
    +

    This loads a page, constructing a url with fileName "./data/" + fileName. +The iframed page determines when the callback occurs in the test by +including the "/test/data/iframeTest.js" script and calling +startIframeTest( [ additional args ] ) when appropriate. Often this +will be after either document ready or window.onload fires.

    +

    The testCallback receives the QUnit assert object created by testIframe +for this test, followed by the global jQuery, window, and document from +the iframe. If the iframe code passes any arguments to startIframeTest, +they follow the document argument.

    +

    Questions?

    +

    If you have any questions, please feel free to ask on the +Developing jQuery Core forum or in #jquery on irc.freenode.net.

    diff --git a/test/test_samples/test_samples/test_syntax.html b/test/test_samples/test_samples/test_syntax.html new file mode 100644 index 0000000..65015ab --- /dev/null +++ b/test/test_samples/test_samples/test_syntax.html @@ -0,0 +1,717 @@ +

    Markdown: Syntax

    + + +

    Note: This document is itself written using Markdown; you +can see the source for it by adding '.text' to the URL.

    +
    +

    Overview

    +

    Philosophy

    +

    Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

    +

    Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including [Setext] 1, [atx] 2, [Textile] 3, [reStructuredText] 4, +[Grutatext] 5, and [EtText] 6 -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email.

    +

    To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like *emphasis*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email.

    +

    Inline HTML

    +

    Markdown's syntax is intended for one purpose: to be used as a +format for writing for the web.

    +

    Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is not to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a publishing format; Markdown is a writing +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text.

    +

    For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags.

    +

    The only restrictions are that block-level HTML elements -- e.g. <div>, +<table>, <pre>, <p>, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) <p> tags around HTML block-level tags.

    +

    For example, to add an HTML table to a Markdown article:

    +
    This is a regular paragraph.
    +
    +<table>
    +    <tr>
    +        <td>Foo</td>
    +    </tr>
    +</table>
    +
    +This is another regular paragraph.
    +
    +

    Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style *emphasis* inside an +HTML block.

    +

    Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML <a> or <img> tags instead of Markdown's +link or image syntax, go right ahead.

    +

    Unlike block-level HTML tags, Markdown syntax is processed within +span-level tags.

    +

    Automatic Escaping for Special Characters

    +

    In HTML, there are two characters that demand special treatment: < +and &. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. &lt;, and +&amp;.

    +

    Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write 'AT&amp;T'. You even need to +escape ampersands within URLs. Thus, if you want to link to:

    +
    http://images.google.com/images?num=30&q=larry+bird
    +
    +

    you need to encode the URL as:

    +
    http://images.google.com/images?num=30&amp;q=larry+bird
    +
    +

    in your anchor tag href attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites.

    +

    Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into &amp;.

    +

    So, if you want to include a copyright symbol in your article, you can write:

    +
    &copy;
    +
    +

    and Markdown will leave it alone. But if you write:

    +
    AT&T
    +
    +

    Markdown will translate it to:

    +
    AT&amp;T
    +
    +

    Similarly, because Markdown supports inline HTML, if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write:

    +
    4 < 5
    +
    +

    Markdown will translate it to:

    +
    4 &lt; 5
    +
    +

    However, inside Markdown code spans and blocks, angle brackets and +ampersands are always encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single < +and & in your example code needs to be escaped.)

    +
    +

    Block Elements

    +

    Paragraphs and Line Breaks

    +

    A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be indented with spaces or tabs.

    +

    The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a <br /> tag.

    +

    When you do want to insert a <br /> break tag using Markdown, you +end a line with two or more spaces, then type return.

    +

    Yes, this takes a tad more effort to create a <br />, but a simplistic +"every line break is a <br />" rule wouldn't work for Markdown. +Markdown's email-style blockquoting and multi-paragraph list items +work best -- and look better -- when you format them with hard breaks.

    + +

    Markdown supports two styles of headers, [Setext] 1 and [atx] 2.

    +

    Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example:

    +
    This is an H1
    +=============
    +
    +This is an H2
    +-------------
    +
    +

    Any number of underlining ='s or -'s will work.

    +

    Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example:

    +
    # This is an H1
    +
    +## This is an H2
    +
    +###### This is an H6
    +
    +

    Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) :

    +
    # This is an H1 #
    +
    +## This is an H2 ##
    +
    +### This is an H3 ######
    +
    +

    Blockquotes

    +

    Markdown uses email-style > characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a > before every line:

    +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +>
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +> id sem consectetuer libero luctus adipiscing.
    +
    +

    Markdown allows you to be lazy and only put the > before the first +line of a hard-wrapped paragraph:

    +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +id sem consectetuer libero luctus adipiscing.
    +
    +

    Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of >:

    +
    > This is the first level of quoting.
    +>
    +> > This is nested blockquote.
    +>
    +> Back to the first level.
    +
    +

    Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks:

    +
    > ## This is a header.
    +>
    +> 1.   This is the first list item.
    +> 2.   This is the second list item.
    +>
    +> Here's some example code:
    +>
    +>     return shell_exec("echo $input | $markdown_script");
    +
    +

    Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu.

    +

    Lists

    +

    Markdown supports ordered (numbered) and unordered (bulleted) lists.

    +

    Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers:

    +
    *   Red
    +*   Green
    +*   Blue
    +
    +

    is equivalent to:

    +
    +   Red
    ++   Green
    ++   Blue
    +
    +

    and:

    +
    -   Red
    +-   Green
    +-   Blue
    +
    +

    Ordered lists use numbers followed by periods:

    +
    1.  Bird
    +2.  McHale
    +3.  Parish
    +
    +

    It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is:

    +
    <ol>
    +<li>Bird</li>
    +<li>McHale</li>
    +<li>Parish</li>
    +</ol>
    +
    +

    If you instead wrote the list in Markdown like this:

    +
    1.  Bird
    +1.  McHale
    +1.  Parish
    +
    +

    or even:

    +
    3. Bird
    +1. McHale
    +8. Parish
    +
    +

    you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to.

    +

    If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number.

    +

    List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab.

    +

    To make lists look nice, you can wrap items with hanging indents:

    +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +    viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +    Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    +

    But if you want to be lazy, you don't have to:

    +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    +

    If list items are separated by blank lines, Markdown will wrap the +items in <p> tags in the HTML output. For example, this input:

    +
    *   Bird
    +*   Magic
    +
    +

    will turn into:

    +
    <ul>
    +<li>Bird</li>
    +<li>Magic</li>
    +</ul>
    +
    +

    But this:

    +
    *   Bird
    +
    +*   Magic
    +
    +

    will turn into:

    +
    <ul>
    +<li><p>Bird</p></li>
    +<li><p>Magic</p></li>
    +</ul>
    +
    +

    List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be indented by either 4 spaces +or one tab:

    +
    1.  This is a list item with two paragraphs. Lorem ipsum dolor
    +    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
    +    mi posuere lectus.
    +
    +    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
    +    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
    +    sit amet velit.
    +
    +2.  Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    +

    It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy:

    +
    *   This is a list item with two paragraphs.
    +
    +    This is the second paragraph in the list item. You're
    +only required to indent the first line. Lorem ipsum dolor
    +sit amet, consectetuer adipiscing elit.
    +
    +*   Another item in the same list.
    +
    +

    To put a blockquote within a list item, the blockquote's > +delimiters need to be indented:

    +
    *   A list item with a blockquote:
    +
    +    > This is a blockquote
    +    > inside a list item.
    +
    +

    To put a code block within a list item, the code block needs +to be indented twice -- 8 spaces or two tabs:

    +
    *   A list item with a code block:
    +
    +        <code goes here>
    +
    +

    It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this:

    +
    1986. What a great season.
    +
    +

    In other words, a number-period-space sequence at the beginning of a +line. To avoid this, you can backslash-escape the period:

    +
    1986\. What a great season.
    +
    +

    Code Blocks

    +

    Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both <pre> and <code> tags.

    +

    To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input:

    +
    This is a normal paragraph:
    +
    +    This is a code block.
    +
    +

    Markdown will generate:

    +
    <p>This is a normal paragraph:</p>
    +
    +<pre><code>This is a code block.
    +</code></pre>
    +
    +

    One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this:

    +
    Here is an example of AppleScript:
    +
    +    tell application "Foo"
    +        beep
    +    end tell
    +
    +

    will turn into:

    +
    <p>Here is an example of AppleScript:</p>
    +
    +<pre><code>tell application "Foo"
    +    beep
    +end tell
    +</code></pre>
    +
    +

    A code block continues until it reaches a line that is not indented +(or the end of the article).

    +

    Within a code block, ampersands (&) and angle brackets (< and >) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this:

    +
        <div class="footer">
    +        &copy; 2004 Foo Corporation
    +    </div>
    +
    +

    will turn into:

    +
    <pre><code>&lt;div class="footer"&gt;
    +    &amp;copy; 2004 Foo Corporation
    +&lt;/div&gt;
    +</code></pre>
    +
    +

    Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax.

    +

    Horizontal Rules

    +

    You can produce a horizontal rule tag (<hr />) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule:

    +
    * * *
    +
    +***
    +
    +*****
    +
    +- - -
    +
    +---------------------------------------
    +
    +
    +

    Span Elements

    + +

    Markdown supports two style of links: inline and reference.

    +

    In both styles, the link text is delimited by [square brackets].

    +

    To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an optional +title for the link, surrounded in quotes. For example:

    +
    This is [an example](http://example.com/ "Title") inline link.
    +
    +[This link](http://example.net/) has no title attribute.
    +
    +

    Will produce:

    +
    <p>This is <a href="http://example.com/" title="Title">
    +an example</a> inline link.</p>
    +
    +<p><a href="http://example.net/">This link</a> has no
    +title attribute.</p>
    +
    +

    If you're referring to a local resource on the same server, you can +use relative paths:

    +
    See my [About](/about/) page for details.
    +
    +

    Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link:

    +
    This is [an example][id] reference-style link.
    +
    +

    You can optionally use a space to separate the sets of brackets:

    +
    This is [an example] [id] reference-style link.
    +
    +

    Then, anywhere in the document, you define your link label like this, +on a line by itself:

    +
    [id]: http://example.com/  "Optional Title Here"
    +
    +

    That is:

    +
      +
    • Square brackets containing the link identifier (optionally +indented from the left margin using up to three spaces);
    • +
    • followed by a colon;
    • +
    • followed by one or more spaces (or tabs);
    • +
    • followed by the URL for the link;
    • +
    • optionally followed by a title attribute for the link, enclosed +in double or single quotes, or enclosed in parentheses.
    • +
    +

    The following three link definitions are equivalent:

    +
    [foo]: http://example.com/  "Optional Title Here"
    +[foo]: http://example.com/  'Optional Title Here'
    +[foo]: http://example.com/  (Optional Title Here)
    +
    +

    Note: There is a known bug in Markdown.pl 1.0.1 which prevents +single quotes from being used to delimit link titles.

    +

    The link URL may, optionally, be surrounded by angle brackets:

    +
    [id]: <http://example.com/>  "Optional Title Here"
    +
    +

    You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs:

    +
    [id]: http://example.com/longish/path/to/resource/here
    +    "Optional Title Here"
    +
    +

    Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output.

    +

    Link definition names may consist of letters, numbers, spaces, and +punctuation -- but they are not case sensitive. E.g. these two +links:

    +
    [link text][a]
    +[link text][A]
    +
    +

    are equivalent.

    +

    The implicit link name shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write:

    +
    [Google][]
    +
    +

    And then define the link:

    +
    [Google]: http://google.com/
    +
    +

    Because link names may contain spaces, this shortcut even works for +multiple words in the link text:

    +
    Visit [Daring Fireball][] for more information.
    +
    +

    And then define the link:

    +
    [Daring Fireball]: http://daringfireball.net/
    +
    +

    Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes.

    +

    Here's an example of reference links in action:

    +
    I get 10 times more traffic from [Google] [1] than from
    +[Yahoo] [2] or [MSN] [3].
    +
    +  [1]: http://google.com/        "Google"
    +  [2]: http://search.yahoo.com/  "Yahoo Search"
    +  [3]: http://search.msn.com/    "MSN Search"
    +
    +

    Using the implicit link name shortcut, you could instead write:

    +
    I get 10 times more traffic from [Google][] than from
    +[Yahoo][] or [MSN][].
    +
    +  [google]: http://google.com/        "Google"
    +  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
    +  [msn]:    http://search.msn.com/    "MSN Search"
    +
    +

    Both of the above examples will produce the following HTML output:

    +
    <p>I get 10 times more traffic from <a href="http://google.com/"
    +title="Google">Google</a> than from
    +<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
    +or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
    +
    +

    For comparison, here is the same paragraph written using +Markdown's inline link style:

    +
    I get 10 times more traffic from [Google](http://google.com/ "Google")
    +than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
    +[MSN](http://search.msn.com/ "MSN Search").
    +
    +

    The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text.

    +

    With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose.

    +

    Emphasis

    +

    Markdown treats asterisks (*) and underscores (_) as indicators of +emphasis. Text wrapped with one * or _ will be wrapped with an +HTML <em> tag; double *'s or _'s will be wrapped with an HTML +<strong> tag. E.g., this input:

    +
    *single asterisks*
    +
    +_single underscores_
    +
    +**double asterisks**
    +
    +__double underscores__
    +
    +

    will produce:

    +
    <em>single asterisks</em>
    +
    +<em>single underscores</em>
    +
    +<strong>double asterisks</strong>
    +
    +<strong>double underscores</strong>
    +
    +

    You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span.

    +

    Emphasis can be used in the middle of a word:

    +
    un*frigging*believable
    +
    +

    But if you surround an * or _ with spaces, it'll be treated as a +literal asterisk or underscore.

    +

    To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it:

    +
    \*this text is surrounded by literal asterisks\*
    +
    +

    Code

    +

    To indicate a span of code, wrap it with backtick quotes (`). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example:

    +
    Use the `printf()` function.
    +
    +

    will produce:

    +
    <p>Use the <code>printf()</code> function.</p>
    +
    +

    To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters:

    +
    ``There is a literal backtick (`) here.``
    +
    +

    which will produce this:

    +
    <p><code>There is a literal backtick (`) here.</code></p>
    +
    +

    The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span:

    +
    A single backtick in a code span: `` ` ``
    +
    +A backtick-delimited string in a code span: `` `foo` ``
    +
    +

    will produce:

    +

    A single backtick in a code span: `

    +

    A backtick-delimited string in a code span: `foo`

    +

    With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this:

    +
    Please don't use any `<blink>` tags.
    +
    +

    into:

    +
    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
    +
    +

    You can write this:

    +
    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
    +
    +

    to produce:

    +
    <p><code>&amp;#8212;</code> is the decimal-encoded
    +equivalent of <code>&amp;mdash;</code>.</p>
    +
    +

    Images

    +

    Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format.

    +

    Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: inline and reference.

    +

    Inline image syntax looks like this:

    +
    ![Alt text](/path/to/img.jpg)
    +
    +![Alt text](/path/to/img.jpg "Optional title")
    +
    +

    That is:

    +
      +
    • An exclamation mark: !;
    • +
    • followed by a set of square brackets, containing the alt +attribute text for the image;
    • +
    • followed by a set of parentheses, containing the URL or path to +the image, and an optional title attribute enclosed in double +or single quotes.
    • +
    +

    Reference-style image syntax looks like this:

    +
    ![Alt text][id]
    +
    +

    Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references:

    +
    [id]: url/to/image  "Optional title attribute"
    +
    +

    As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML <img> tags.

    +
    +

    Miscellaneous

    + +

    Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

    +
    <http://example.com/>
    +
    +

    Markdown will turn this into:

    +
    <a href="http://example.com/">http://example.com/</a>
    +
    +

    Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this:

    +
    <address@example.com>
    +
    +

    into something like this:

    +
    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
    +&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
    +&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
    +&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
    +
    +

    which will render in a browser as a clickable link to "address@example.com".

    +

    (This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.)

    +

    Backslash Escapes

    +

    Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word +with literal asterisks (instead of an HTML <em> tag), you can use +backslashes before the asterisks, like this:

    +
    \*literal asterisks\*
    +
    +

    Markdown provides backslash escapes for the following characters:

    +
    \   backslash
    +`   backtick
    +*   asterisk
    +_   underscore
    +{}  curly braces
    +[]  square brackets
    +()  parentheses
    +#   hash mark
    ++	plus sign
    +-	minus sign (hyphen)
    +.   dot
    +!   exclamation mark
    +
    diff --git a/test/test_span_token.py b/test/test_span_token.py index 772071a..632a140 100644 --- a/test/test_span_token.py +++ b/test/test_span_token.py @@ -1,20 +1,22 @@ import unittest from unittest.mock import patch -from mistletoe import span_token +from mistletoe import span_tokens +from mistletoe.span_tokenizer import tokenize_span +from mistletoe.parse_context import get_parse_context class TestBranchToken(unittest.TestCase): def setUp(self): self.addCleanup( - lambda: span_token._token_types.value.__setitem__(-1, span_token.RawText) + lambda: get_parse_context().span_tokens.__setitem__(-1, span_tokens.RawText) ) - patcher = patch("mistletoe.span_token.RawText") + patcher = patch("mistletoe.span_tokens.RawText") self.mock = patcher.start() - span_token._token_types.value[-1] = self.mock + get_parse_context().span_tokens[-1] = self.mock self.addCleanup(patcher.stop) def _test_parse(self, token_cls, raw, arg, **kwargs): - token = next(iter(span_token.tokenize_inner(raw))) + token = next(iter(tokenize_span(raw))) self.assertIsInstance(token, token_cls) self._test_token(token, arg, **kwargs) @@ -27,40 +29,40 @@ def _test_token(self, token, arg, children=True, **kwargs): class TestStrong(TestBranchToken): def test_parse(self): - self._test_parse(span_token.Strong, "**some text**", "some text") - self._test_parse(span_token.Strong, "__some text__", "some text") + self._test_parse(span_tokens.Strong, "**some text**", "some text") + self._test_parse(span_tokens.Strong, "__some text__", "some text") class TestEmphasis(TestBranchToken): def test_parse(self): - self._test_parse(span_token.Emphasis, "*some text*", "some text") - self._test_parse(span_token.Emphasis, "_some text_", "some text") + self._test_parse(span_tokens.Emphasis, "*some text*", "some text") + self._test_parse(span_tokens.Emphasis, "_some text_", "some text") class TestInlineCode(TestBranchToken): def test_parse(self): - self._test_parse(span_token.InlineCode, "`some text`", "some text") + self._test_parse(span_tokens.InlineCode, "`some text`", "some text") class TestStrikethrough(TestBranchToken): def test_parse(self): - self._test_parse(span_token.Strikethrough, "~~some text~~", "some text") + self._test_parse(span_tokens.Strikethrough, "~~some text~~", "some text") class TestLink(TestBranchToken): def test_parse(self): self._test_parse( - span_token.Link, "[name 1](target1)", "name 1", target="target1", title="" + span_tokens.Link, "[name 1](target1)", "name 1", target="target1", title="" ) def test_parse_multi_links(self): - tokens = iter(span_token.tokenize_inner("[n1](t1) & [n2](t2)")) + tokens = iter(tokenize_span("[n1](t1) & [n2](t2)")) self._test_token(next(tokens), "n1", target="t1") self._test_token(next(tokens), " & ", children=False) self._test_token(next(tokens), "n2", target="t2") def test_parse_children(self): - token = next(iter(span_token.tokenize_inner("[![alt](src)](target)"))) + token = next(iter(tokenize_span("[![alt](src)](target)"))) child = next(iter(token.children)) self._test_token(child, "alt", src="src") @@ -68,7 +70,7 @@ def test_parse_children(self): class TestAutoLink(TestBranchToken): def test_parse(self): self._test_parse( - span_token.AutoLink, + span_tokens.AutoLink, "", "ftp://foo.com", target="ftp://foo.com", @@ -77,21 +79,21 @@ def test_parse(self): class TestImage(TestBranchToken): def test_parse(self): - self._test_parse(span_token.Image, "![alt](link)", "alt", src="link") + self._test_parse(span_tokens.Image, "![alt](link)", "alt", src="link") self._test_parse( - span_token.Image, '![alt](link "title")', "alt", src="link", title="title" + span_tokens.Image, '![alt](link "title")', "alt", src="link", title="title" ) def test_no_alternative_text(self): - self._test_parse(span_token.Image, "![](link)", "", children=False, src="link") + self._test_parse(span_tokens.Image, "![](link)", "", children=False, src="link") class TestEscapeSequence(TestBranchToken): def test_parse(self): - self._test_parse(span_token.EscapeSequence, "\\*", "*") + self._test_parse(span_tokens.EscapeSequence, "\\*", "*") def test_parse_in_text(self): - tokens = iter(span_token.tokenize_inner("some \\*text*")) + tokens = iter(tokenize_span("some \\*text*")) self._test_token(next(tokens), "some ", children=False) self._test_token(next(tokens), "*") self._test_token(next(tokens), "text*", children=False) @@ -99,24 +101,23 @@ def test_parse_in_text(self): class TestRawText(unittest.TestCase): def test_attribute(self): - token = span_token.RawText("some text") + token = span_tokens.RawText("some text") self.assertEqual(token.content, "some text") def test_no_children(self): - token = span_token.RawText("some text") - with self.assertRaises(AttributeError): - token.children + token = span_tokens.RawText("some text") + assert token.children is None class TestLineBreak(unittest.TestCase): def test_parse(self): - (token,) = span_token.tokenize_inner(" \n") - self.assertIsInstance(token, span_token.LineBreak) + (token,) = tokenize_span(" \n") + self.assertIsInstance(token, span_tokens.LineBreak) class TestContains(unittest.TestCase): def test_contains(self): - token = next(iter(span_token.tokenize_inner("**with some *emphasis* text**"))) + token = next(iter(tokenize_span("**with some *emphasis* text**"))) self.assertTrue("text" in token) self.assertTrue("emphasis" in token) self.assertFalse("foo" in token)