From 318e2c9e3d3e0642cecc8855653b12556d276b03 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 01:50:42 +1100 Subject: [PATCH 1/8] Fix warnings in docs and make circle-ci fail on warnings --- .circleci/config.yml | 2 +- docs/Makefile | 7 ++++ docs/examples/wealth_dynamics_md.md | 10 +++--- docs/index.md | 2 +- docs/using/syntax.md | 56 ++++++++++++++++------------- myst_parser/docutils_renderer.py | 41 +++++++++++++-------- 6 files changed, 73 insertions(+), 45 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7e951db0..d295c9c0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: name: Build docs to store command: | cd docs - make html + make html-strict - store_artifacts: path: docs/_build/html/ diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb..f92f0cd3 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -18,3 +18,10 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# raise warnings to errors +html-strict: + @$(SPHINXBUILD) -b html -nW "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) + +clean: + rm -r $(BUILDDIR) diff --git a/docs/examples/wealth_dynamics_md.md b/docs/examples/wealth_dynamics_md.md index dfad8f73..b25429cd 100644 --- a/docs/examples/wealth_dynamics_md.md +++ b/docs/examples/wealth_dynamics_md.md @@ -34,7 +34,7 @@ The wealth distribution in many countries exhibits a Pareto tail - See {doc}`this lecture ` for a definition. - For a review of the empirical evidence, see, for example, - {cite}`benhabib2018skewed`. + {cite}`md-benhabib2018skewed`. ### A Note on Assumptions @@ -159,7 +159,7 @@ The model we will study is ```{math} --- -label: wealth_dynam_ah +label: md:wealth_dynam_ah --- w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1} ``` @@ -186,11 +186,11 @@ $$y_t = c_y \exp(z_t) + \exp(\mu_y + \sigma_y \zeta_t)$$ Here $\{ (\epsilon_t, \xi_t, \zeta_t) \}$ is IID and standard normal in $\mathbb R^3$. -(sav_ah)= +(md:sav_ah)= ```{math} --- -label: sav_ah +label: md:sav_ah --- s(w) = s_0 w \cdot \mathbb 1\{w \geq \hat w\} ``` @@ -475,4 +475,6 @@ We see that greater volatility has the effect of increasing inequality in this model. ```{bibliography} references.bib +:labelprefix: md +:keyprefix: md- ``` diff --git a/docs/index.md b/docs/index.md index b8829476..86256e43 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,7 +8,7 @@ This project provides a parser for this flavor of markdown, as well as a bridge MyST syntax and Sphinx. This allows for native markdown support for roles and directives. -```{warn} +```{warning} The MyST parser is in an alpha stage, and may have breaking syntax to its implementation and to the syntax that it supports. Use at your own risk. If you find any issues, please report them diff --git a/docs/using/syntax.md b/docs/using/syntax.md index 367cad2c..8840eb27 100644 --- a/docs/using/syntax.md +++ b/docs/using/syntax.md @@ -33,11 +33,12 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org in curly brackets `{}`. See {ref}`syntax/directives` for more details. - **Math**: Two `$` characters wrapping multi-line math, e.g. - ``` + ```latex $$ a=1 $$ ``` + - **LineComment**: `% this is a comment`. See {ref}`syntax/comments` for more information. - **BlockBreak**: `+++`. See {ref}`syntax/blockbreaks` for more information. @@ -50,11 +51,13 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org - **SetextHeading**: underlined header (using multiple `=` or `-`) - **Quote**: `> this is a quote` - **CodeFence**: enclosed in 3 or more backticks with an optional language name. E.g.: - ```` + + ````md ```python print('this is python') ``` ```` + - **ThematicBreak**: `---` - **List**: bullet points or enumerated. - **Table**: Standard markdown table styles. @@ -85,6 +88,7 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org - **RawText** (syntax/directives)= + ## Directives - a block-level extension point Directives syntax is defined with triple-backticks and curly-brackets. It @@ -98,7 +102,7 @@ header-rows: 1 --- * - MyST - reStructuredText -* - ````markdown +* - ````md ```{directivename} arguments --- key1: val1 @@ -120,7 +124,7 @@ header-rows: 1 For example, the following code: -```` +````md ```{admonition} This is my admonition This is my note ``` @@ -135,7 +139,7 @@ This is my note For directives that are meant to parse content for your site, you may use markdown as the markup language inside... -```` +````md ```{admonition} My markdown link Here is [markdown link syntax](https://jupyter.org) ``` @@ -148,7 +152,7 @@ Here is [markdown link syntax](https://jupyter.org) As a short-hand for directives that require no arguments, and when no paramter options are used (see below), you may start the content directly after the directive name. -```` +````md ```{note} Notes require **no** arguments, so content can start here. ``` ```` @@ -163,7 +167,7 @@ beginning your directive content with YAML frontmatter. This needs to be surrounded by `---` lines. Everything in between will be parsed by YAML and passed as keyword arguments to your directive. For example: -```` +````md ```{code-block} python --- lineno-start: 10 @@ -193,7 +197,7 @@ print(f'my {a}nd line') As a short-hand alternative, more closely resembling the reStructuredText syntax, options may also be denoted by an initial block, whereby all lines start with '`:`', for example: -```` +````md ```{code-block} python :lineno-start: 10 :emphasize-lines: 1, 3 @@ -210,7 +214,7 @@ You can nest directives by ensuring that the ticklines corresponding to the outermost directive are longer than the ticklines for the inner directives. For example, nest a warning inside a note block like so: -````` +`````md ````{note} The next info should be nested ```{warning} @@ -231,7 +235,7 @@ Here's my warning You can indent inner-code fences, so long as they aren't indented by more than 3 spaces. Otherwise, they will be rendered as "raw code" blocks: -````` +`````md ````{note} The warning block will be properly-parsed @@ -277,6 +281,7 @@ print('yep!') `````` (syntax/roles)= + ## Roles - an in-line extension point Roles are similar to directives - they allow you to define arbitrary new @@ -289,7 +294,7 @@ header-rows: 1 --- * - MyST - reStructuredText -* - ````markdown +* - ````md {role-name}`role content` ```` - ```rst @@ -310,7 +315,7 @@ Since Pythagoras, we know that {math}`a^2 + b^2 = c^2` You can use roles to do things like reference equations and other items in your book. For example: -```` +````md ```{math} e^{i\pi} + 1 = 0 --- label: euler @@ -351,12 +356,12 @@ header-rows: 1 - `$x^2$` - N/A * - Front matter - - ``` + - ```md --- key: val --- ``` - - ``` + - ```md :key: val ``` * - Comments @@ -373,14 +378,14 @@ Math can be called in-line with single `$` characters around your math. For example, `$x_{hey}=it+is^{math}$` renders as $x_{hey}=it+is^{math}$. This is equivalent to writing: -``` +```md {math}`x_{hey}=it+is^{math}` ``` Block-level math can be provided with `$$` signs that wrap the math block you'd like to parse. For example: -``` +```latex $$ \begin{eqnarray} y & = & ax^2 + bx + c \\ @@ -400,7 +405,7 @@ $$ This is equivalent to the following directive: -```` +````md ```{math} \begin{eqnarray} y & = & ax^2 + bx + c \\ @@ -410,6 +415,7 @@ This is equivalent to the following directive: ```` (syntax/frontmatter)= + ### Front Matter This is a YAML block at the start of the document, as used for example in @@ -421,7 +427,7 @@ A classic use-case is to specify 'orphan' documents, that are not specified in a toctrees. For example, inserting the following syntax at the top of a page will cause Sphinx to treat it as an orphan page: -```markdown +```md --- orphan: true --- @@ -438,7 +444,7 @@ prevent the line from being parsed into the output document. For example, this code: -``` +```md % my comment ``` @@ -458,7 +464,7 @@ but is stored in the internal document structure for use by developers. For example, this code: -``` +```md +++ some text ``` @@ -476,19 +482,19 @@ to them. Target headers are defined with this syntax: -``` +```md (header_target)= ``` They can then be referred to with the [ref inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-ref): -``` +```md {ref}`header_target` ``` By default, the reference will use the text of the target (such as the section title), but also you can directly specify the text: -``` +```md {ref}`my text ` ``` @@ -497,13 +503,13 @@ this page: {ref}`my text `. Alternatively using the markdown syntax: -```markdown +```md [my text](header_target) ``` is synonymous with using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any): -``` +```md {any}`my text ` ``` diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py index 0c0d9d74..fa0b60a3 100644 --- a/myst_parser/docutils_renderer.py +++ b/myst_parser/docutils_renderer.py @@ -73,6 +73,13 @@ def new_document(self, source_path="notset") -> nodes.document: settings = OptionParser(components=(RSTParser,)).get_default_values() return new_document(source_path, settings=settings) + def add_line_and_source_path(self, node, token): + try: + node.line = token.range[0] + 1 + except AttributeError: + pass + node.source = self.document["source"] + def nested_render_text(self, text: str, lineno: int): """Render unparsed text.""" token = myst_block_tokens.Document( @@ -134,7 +141,7 @@ def render_paragraph(self, token): # promote the target to block level return self.render_target(token.children[0]) para = nodes.paragraph("") - para.line = token.range[0] + self.add_line_and_source_path(para, token) with self.current_node_context(para, append=True): self.render_children(token) @@ -175,7 +182,7 @@ def render_emphasis(self, token): def render_quote(self, token): quote = nodes.block_quote() - quote.line = token.range[0] + self.add_line_and_source_path(quote, token) with self.current_node_context(quote, append=True): self.render_children(token) @@ -200,17 +207,22 @@ def render_math(self, token): node = nodes.math(content, content) self.current_node.append(node) + def render_block_code(self, token): + # this should never have a language, since it is just indented text, however, + # creating a literal_block with no language will raise a warning in sphinx + text = token.children[0].content + language = token.language or "none" + node = nodes.literal_block(text, text, language=language) + self.add_line_and_source_path(node, token) + self.current_node.append(node) + def render_code_fence(self, token): if token.language.startswith("{") and token.language.endswith("}"): return self.render_directive(token) - self.render_block_code(token, default_language=True) - def render_block_code(self, token, default_language=False): - # indented code blocks will always have no language, - # but for code fences, if not set, a default_language will be retrieved text = token.children[0].content language = token.language - if not language and default_language: + if not language: try: sphinx_env = self.document.settings.env language = sphinx_env.temp_data.get( @@ -218,9 +230,10 @@ def render_block_code(self, token, default_language=False): ) except AttributeError: pass - if not language and default_language: + if not language: language = self.config.get("highlight_language", "") node = nodes.literal_block(text, text, language=language) + self.add_line_and_source_path(node, token) self.current_node.append(node) def render_inline_code(self, token): @@ -255,10 +268,10 @@ def render_heading(self, token): self.current_node = self.current_node.parent title_node = nodes.title() - title_node.line = token.range[0] + self.add_line_and_source_path(title_node, token) new_section = nodes.section() - new_section.line = token.range[0] + self.add_line_and_source_path(new_section, token) new_section.append(title_node) self._add_section(new_section, token.level) @@ -496,7 +509,7 @@ def render_directive(self, token): # the absolute line number of the first line of the directive lineno=token.range[0], # the line offset of the first line of the content - content_offset=0, + content_offset=0, # TODO get content offset from `parse_directive_text` # a string containing the entire directive block_text="\n".join(body_lines), state=state, @@ -760,7 +773,7 @@ class Struct: def nested_parse( self, - block: List[str], + block: StringList, input_offset: int, node: nodes.Element, match_titles: bool = False, @@ -770,7 +783,7 @@ def nested_parse( current_match_titles = self.state_machine.match_titles self.state_machine.match_titles = match_titles with self._renderer.current_node_context(node): - self._renderer.nested_render_text(block, self._lineno) + self._renderer.nested_render_text(block, self._lineno + input_offset) self.state_machine.match_titles = current_match_titles def inline_text(self, text: str, lineno: int): @@ -1002,7 +1015,7 @@ def run(self): literal_block = nodes.literal_block( file_content, source=str(path), classes=self.options.get("class", []) ) - literal_block.line = 1 + literal_block.line = 1 # TODO don;t think this should be 1? self.add_name(literal_block) if "number-lines" in self.options: try: From 5cbe12eda32bacb180756fe398644f768713ba04 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 01:55:21 +1100 Subject: [PATCH 2/8] check that circle-ci fails on a warning --- docs/Makefile | 4 ++-- docs/using/sphinx.md | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index f92f0cd3..22a24daa 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -19,9 +19,9 @@ help: %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -# raise warnings to errors +# raise warnings to errors, but don't stop on the first error html-strict: - @$(SPHINXBUILD) -b html -nW "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -b html -nW --keep-going "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) clean: rm -r $(BUILDDIR) diff --git a/docs/using/sphinx.md b/docs/using/sphinx.md index e8b8bd69..0185a011 100644 --- a/docs/using/sphinx.md +++ b/docs/using/sphinx.md @@ -5,3 +5,7 @@ Sphinx is a documentation generator for building a website or book from multiple To use the MyST parser in Sphinx, simply add: `extensions = ["myst_parser"]` to your `conf.py` and all documents with the `.md` extension will be parsed as MyST. Naturally this site is generated with Sphinx and MyST! + +```a +b +``` From ffea359a728486efd0ce66671c3d4974ad71ac5d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 01:56:51 +1100 Subject: [PATCH 3/8] Revert "check that circle-ci fails on a warning" This reverts commit 5cbe12eda32bacb180756fe398644f768713ba04. --- docs/Makefile | 4 ++-- docs/using/sphinx.md | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 22a24daa..f92f0cd3 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -19,9 +19,9 @@ help: %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -# raise warnings to errors, but don't stop on the first error +# raise warnings to errors html-strict: - @$(SPHINXBUILD) -b html -nW --keep-going "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) + @$(SPHINXBUILD) -b html -nW "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) clean: rm -r $(BUILDDIR) diff --git a/docs/using/sphinx.md b/docs/using/sphinx.md index 0185a011..e8b8bd69 100644 --- a/docs/using/sphinx.md +++ b/docs/using/sphinx.md @@ -5,7 +5,3 @@ Sphinx is a documentation generator for building a website or book from multiple To use the MyST parser in Sphinx, simply add: `extensions = ["myst_parser"]` to your `conf.py` and all documents with the `.md` extension will be parsed as MyST. Naturally this site is generated with Sphinx and MyST! - -```a -b -``` From 0f05bc8a08efcb23b695b201083b6f1cdf24637d Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 01:58:32 +1100 Subject: [PATCH 4/8] Allow parsed lines to be stored in the document --- myst_parser/block_tokens.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py index 76e57140..33813553 100644 --- a/myst_parser/block_tokens.py +++ b/myst_parser/block_tokens.py @@ -79,7 +79,7 @@ def __repr__(self): class Document(block_token.BlockToken): """Document token.""" - def __init__(self, lines, start_line=0, inc_front_matter=True): + def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False): self.footnotes = {} block_token._root_node = self @@ -87,6 +87,11 @@ def __init__(self, lines, start_line=0, inc_front_matter=True): if isinstance(lines, str): lines = lines.splitlines(keepends=True) + + if store_lines: + self._lines = lines + self._start_line = start_line + lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines] self.children = [] if lines and lines[0].startswith("---"): From 6e268b9657136f8c84d376a86613be2562b135d6 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 02:00:51 +1100 Subject: [PATCH 5/8] Always store start_line on Document --- myst_parser/block_tokens.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py index 33813553..6d19cc4c 100644 --- a/myst_parser/block_tokens.py +++ b/myst_parser/block_tokens.py @@ -82,6 +82,7 @@ class Document(block_token.BlockToken): def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False): self.footnotes = {} + self._start_line = start_line block_token._root_node = self span_token._root_node = self @@ -90,7 +91,6 @@ def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False if store_lines: self._lines = lines - self._start_line = start_line lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines] self.children = [] From 39a71e98280f4f72545eba713f0fc94202901b19 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 02:11:12 +1100 Subject: [PATCH 6/8] Add guidance on how to run doc build tests --- docs/develop/contributing.md | 21 +++++++++++++++++++++ docs/develop/test_infrastructure.md | 2 ++ 2 files changed, 23 insertions(+) diff --git a/docs/develop/contributing.md b/docs/develop/contributing.md index d5d40378..ac1f0ef1 100644 --- a/docs/develop/contributing.md +++ b/docs/develop/contributing.md @@ -25,6 +25,27 @@ Optionally you can run `black` and `flake8` separately: Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard. +## Testing + +For code tests: + +```shell +>> cd MyST-Parser +>> pytest +``` + +For documentation build tests: + +```shell +>> cd MyST-Parser/docs +>> make clean +>> make html-strict +``` + +```{seealso} +{ref}`develop/testing` +``` + ## Pull Requests To contribute, make Pull Requests to the `develop` branch (this is the default branch). A PR can consist of one or multiple commits. Before you open a PR, make sure to clean up your commit history and create the commits that you think best divide up the total work as outlined above (use `git rebase` and `git commit --amend`). Ensure all commit messages clearly summarise the changes in the header and the problem that this commit is solving in the body. diff --git a/docs/develop/test_infrastructure.md b/docs/develop/test_infrastructure.md index 897b67d4..eff33849 100644 --- a/docs/develop/test_infrastructure.md +++ b/docs/develop/test_infrastructure.md @@ -1,3 +1,5 @@ +(develop/testing)= + # Testing Infrastructure Where possible, additions to the code should be carried out in a From 7632b9175c881561d82d71eda2e9d11764dc777f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 02:21:44 +1100 Subject: [PATCH 7/8] Ignore 'private' underscore attribute in AstRenderer --- myst_parser/ast_renderer.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/myst_parser/ast_renderer.py b/myst_parser/ast_renderer.py index 7eb9e83e..a3dd8e4a 100644 --- a/myst_parser/ast_renderer.py +++ b/myst_parser/ast_renderer.py @@ -33,3 +33,28 @@ def render(self, token, to_json=False): if to_json: return json.dumps(ast, indent=2) + "\n" return ast + + +def get_ast(token): + """ + Recursively unrolls token attributes into dictionaries (token.children + into lists). + + Returns: + a dictionary of token's attributes. + """ + node = {} + # Python 3.6 uses [ordered dicts] [1]. + # Put in 'type' entry first to make the final tree format somewhat + # similar to [MDAST] [2]. + # + # [1]: https://docs.python.org/3/whatsnew/3.6.html + # [2]: https://github.com/syntax-tree/mdast + node["type"] = token.__class__.__name__ + # here we ignore 'private' underscore attribute + node.update({k: v for k, v in token.__dict__.items() if not k.startswith("_")}) + if "header" in node: + node["header"] = get_ast(node["header"]) + if "children" in node: + node["children"] = [get_ast(child) for child in node["children"]] + return node From 1237856a2ced0754e2a9d47dd460f86dcf67db15 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 28 Feb 2020 02:22:24 +1100 Subject: [PATCH 8/8] test fix --- myst_parser/ast_renderer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/myst_parser/ast_renderer.py b/myst_parser/ast_renderer.py index a3dd8e4a..b01d49e9 100644 --- a/myst_parser/ast_renderer.py +++ b/myst_parser/ast_renderer.py @@ -29,7 +29,7 @@ def render(self, token, to_json=False): Overrides super().render. Delegates the logic to get_ast. """ - ast = ast_renderer.get_ast(token) + ast = get_ast(token) if to_json: return json.dumps(ast, indent=2) + "\n" return ast