From 8777f5681e012f6d3b233bb400d4e1fbcb913dc4 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Fri, 28 Feb 2020 02:28:07 +1100
Subject: [PATCH] Make CircleCI fail on sphinx warnings (#84)

Also:
* Fix warnings in docs
* Add guidance on how to run doc build tests
* Allow parsed lines to be stored in the document
* Ignore 'private' underscore attribute in `AstRenderer`
---
 .circleci/config.yml                |  2 +-
 docs/Makefile                       |  7 ++++
 docs/develop/contributing.md        | 21 +++++++++++
 docs/develop/test_infrastructure.md |  2 ++
 docs/examples/wealth_dynamics_md.md | 10 +++---
 docs/index.md                       |  2 +-
 docs/using/syntax.md                | 56 ++++++++++++++++-------------
 myst_parser/ast_renderer.py         | 27 +++++++++++++-
 myst_parser/block_tokens.py         |  7 +++-
 myst_parser/docutils_renderer.py    | 41 +++++++++++++--------
 10 files changed, 128 insertions(+), 47 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 7e951db0..d295c9c0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -22,7 +22,7 @@ jobs:
           name: Build docs to store
           command: |
             cd docs
-            make html
+            make html-strict
 
       - store_artifacts:
           path: docs/_build/html/
diff --git a/docs/Makefile b/docs/Makefile
index d4bb2cbb..f92f0cd3 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,3 +18,10 @@ help:
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# raise warnings to errors
+html-strict:
+	@$(SPHINXBUILD) -b html -nW "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
+
+clean:
+	rm -r $(BUILDDIR)
diff --git a/docs/develop/contributing.md b/docs/develop/contributing.md
index d5d40378..ac1f0ef1 100644
--- a/docs/develop/contributing.md
+++ b/docs/develop/contributing.md
@@ -25,6 +25,27 @@ Optionally you can run `black` and `flake8` separately:
 
 Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard.
 
+## Testing
+
+For code tests:
+
+```shell
+>> cd MyST-Parser
+>> pytest
+```
+
+For documentation build tests:
+
+```shell
+>> cd MyST-Parser/docs
+>> make clean
+>> make html-strict
+```
+
+```{seealso}
+{ref}`develop/testing`
+```
+
 ## Pull Requests
 
 To contribute, make Pull Requests to the `develop` branch (this is the default branch). A PR can consist of one or multiple commits. Before you open a PR, make sure to clean up your commit history and create the commits that you think best divide up the total work as outlined above (use `git rebase` and `git commit --amend`). Ensure all commit messages clearly summarise the changes in the header and the problem that this commit is solving in the body.
diff --git a/docs/develop/test_infrastructure.md b/docs/develop/test_infrastructure.md
index 897b67d4..eff33849 100644
--- a/docs/develop/test_infrastructure.md
+++ b/docs/develop/test_infrastructure.md
@@ -1,3 +1,5 @@
+(develop/testing)=
+
 # Testing Infrastructure
 
 Where possible, additions to the code should be carried out in a
diff --git a/docs/examples/wealth_dynamics_md.md b/docs/examples/wealth_dynamics_md.md
index dfad8f73..b25429cd 100644
--- a/docs/examples/wealth_dynamics_md.md
+++ b/docs/examples/wealth_dynamics_md.md
@@ -34,7 +34,7 @@ The wealth distribution in many countries exhibits a Pareto tail
 - See {doc}`this lecture <heavy_tails>` for a
     definition.
 - For a review of the empirical evidence, see, for example,
-    {cite}`benhabib2018skewed`.
+    {cite}`md-benhabib2018skewed`.
 
 ### A Note on Assumptions
 
@@ -159,7 +159,7 @@ The model we will study is
 
 ```{math}
 ---
-label: wealth_dynam_ah
+label: md:wealth_dynam_ah
 ---
 w_{t+1} = (1 + r_{t+1}) s(w_t) + y_{t+1}
 ```
@@ -186,11 +186,11 @@ $$y_t = c_y \exp(z_t) + \exp(\mu_y + \sigma_y \zeta_t)$$
 Here $\{ (\epsilon_t, \xi_t, \zeta_t) \}$ is IID and standard normal in
 $\mathbb R^3$.
 
-(sav_ah)=
+(md:sav_ah)=
 
 ```{math}
 ---
-label: sav_ah
+label: md:sav_ah
 ---
 s(w) = s_0 w \cdot \mathbb 1\{w \geq \hat w\}
 ```
@@ -475,4 +475,6 @@ We see that greater volatility has the effect of increasing inequality
 in this model.
 
 ```{bibliography} references.bib
+:labelprefix: md
+:keyprefix: md-
 ```
diff --git a/docs/index.md b/docs/index.md
index b8829476..86256e43 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,7 +8,7 @@ This project provides a parser for this flavor of markdown, as well as a bridge
 MyST syntax and Sphinx. This allows for native markdown support for roles and
 directives.
 
-```{warn}
+```{warning}
 The MyST parser is in an alpha stage, and may have breaking syntax to its implementation
 and to the syntax that it supports. Use at your own risk. If you find any issues,
 please report them
diff --git a/docs/using/syntax.md b/docs/using/syntax.md
index 367cad2c..8840eb27 100644
--- a/docs/using/syntax.md
+++ b/docs/using/syntax.md
@@ -33,11 +33,12 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org
   in curly brackets `{}`. See {ref}`syntax/directives` for more details.
 - **Math**: Two `$` characters wrapping multi-line math, e.g.
 
-  ```
+  ```latex
   $$
   a=1
   $$
   ```
+
 - **LineComment**: `% this is a comment`. See {ref}`syntax/comments` for more
   information.
 - **BlockBreak**: `+++`. See {ref}`syntax/blockbreaks` for more information.
@@ -50,11 +51,13 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org
 - **SetextHeading**: underlined header (using multiple `=` or `-`)
 - **Quote**: `> this is a quote`
 - **CodeFence**: enclosed in 3 or more backticks with an optional language name. E.g.:
-  ````
+
+  ````md
   ```python
   print('this is python')
   ```
   ````
+
 - **ThematicBreak**: `---`
 - **List**: bullet points or enumerated.
 - **Table**: Standard markdown table styles.
@@ -85,6 +88,7 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org
 - **RawText**
 
 (syntax/directives)=
+
 ## Directives - a block-level extension point
 
 Directives syntax is defined with triple-backticks and curly-brackets. It
@@ -98,7 +102,7 @@ header-rows: 1
 ---
 * - MyST
   - reStructuredText
-* - ````markdown
+* - ````md
     ```{directivename} arguments
     ---
     key1: val1
@@ -120,7 +124,7 @@ header-rows: 1
 
 For example, the following code:
 
-````
+````md
 ```{admonition} This is my admonition
 This is my note
 ```
@@ -135,7 +139,7 @@ This is my note
 For directives that are meant to parse content for your site, you may use
 markdown as the markup language inside...
 
-````
+````md
 ```{admonition} My markdown link
 Here is [markdown link syntax](https://jupyter.org)
 ```
@@ -148,7 +152,7 @@ Here is [markdown link syntax](https://jupyter.org)
 As a short-hand for directives that require no arguments, and when no paramter options are used (see below),
 you may start the content directly after the directive name.
 
-````
+````md
 ```{note} Notes require **no** arguments, so content can start here.
 ```
 ````
@@ -163,7 +167,7 @@ beginning your directive content with YAML frontmatter. This needs to be
 surrounded by `---` lines. Everything in between will be parsed by YAML and
 passed as keyword arguments to your directive. For example:
 
-````
+````md
 ```{code-block} python
 ---
 lineno-start: 10
@@ -193,7 +197,7 @@ print(f'my {a}nd line')
 
 As a short-hand alternative, more closely resembling the reStructuredText syntax, options may also be denoted by an initial block, whereby all lines start with '`:`', for example:
 
-````
+````md
 ```{code-block} python
 :lineno-start: 10
 :emphasize-lines: 1, 3
@@ -210,7 +214,7 @@ You can nest directives by ensuring that the ticklines corresponding to the
 outermost directive are longer than the ticklines for the inner directives.
 For example, nest a warning inside a note block like so:
 
-`````
+`````md
 ````{note}
 The next info should be nested
 ```{warning}
@@ -231,7 +235,7 @@ Here's my warning
 You can indent inner-code fences, so long as they aren't indented by more than 3 spaces.
 Otherwise, they will be rendered as "raw code" blocks:
 
-`````
+`````md
 ````{note}
 The warning block will be properly-parsed
 
@@ -277,6 +281,7 @@ print('yep!')
 ``````
 
 (syntax/roles)=
+
 ## Roles - an in-line extension point
 
 Roles are similar to directives - they allow you to define arbitrary new
@@ -289,7 +294,7 @@ header-rows: 1
 ---
 * - MyST
   - reStructuredText
-* - ````markdown
+* - ````md
     {role-name}`role content`
     ````
   - ```rst
@@ -310,7 +315,7 @@ Since Pythagoras, we know that {math}`a^2 + b^2 = c^2`
 You can use roles to do things like reference equations and other items in
 your book. For example:
 
-````
+````md
 ```{math} e^{i\pi} + 1 = 0
 ---
 label: euler
@@ -351,12 +356,12 @@ header-rows: 1
   - `$x^2$`
   - N/A
 * - Front matter
-  - ```
+  - ```md
     ---
     key: val
     ---
     ```
-  - ```
+  - ```md
     :key: val
     ```
 * - Comments
@@ -373,14 +378,14 @@ Math can be called in-line with single `$` characters around your math.
 For example, `$x_{hey}=it+is^{math}$` renders as $x_{hey}=it+is^{math}$.
 This is equivalent to writing:
 
-```
+```md
 {math}`x_{hey}=it+is^{math}`
 ```
 
 Block-level math can be provided with `$$` signs that wrap the math block you'd like
 to parse. For example:
 
-```
+```latex
 $$
    \begin{eqnarray}
       y    & = & ax^2 + bx + c \\
@@ -400,7 +405,7 @@ $$
 
 This is equivalent to the following directive:
 
-````
+````md
 ```{math}
    \begin{eqnarray}
       y    & = & ax^2 + bx + c \\
@@ -410,6 +415,7 @@ This is equivalent to the following directive:
 ````
 
 (syntax/frontmatter)=
+
 ### Front Matter
 
 This is a YAML block at the start of the document, as used for example in
@@ -421,7 +427,7 @@ A classic use-case is to specify 'orphan' documents, that are not specified in a
 toctrees. For example, inserting the following syntax at the top of a page will cause
 Sphinx to treat it as an orphan page:
 
-```markdown
+```md
 ---
 orphan: true
 ---
@@ -438,7 +444,7 @@ prevent the line from being parsed into the output document.
 
 For example, this code:
 
-```
+```md
 % my comment
 ```
 
@@ -458,7 +464,7 @@ but is stored in the internal document structure for use by developers.
 
 For example, this code:
 
-```
+```md
 +++ some text
 ```
 
@@ -476,19 +482,19 @@ to them.
 
 Target headers are defined with this syntax:
 
-```
+```md
 (header_target)=
 ```
 
 They can then be referred to with the [ref inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-ref):
 
-```
+```md
 {ref}`header_target`
 ```
 
 By default, the reference will use the text of the target (such as the section title), but also you can directly specify the text:
 
-```
+```md
 {ref}`my text <header_target>`
 ```
 
@@ -497,13 +503,13 @@ this page: {ref}`my text <example_syntax>`.
 
 Alternatively using the markdown syntax:
 
-```markdown
+```md
 [my text](header_target)
 ```
 
 is synonymous with using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any):
 
-```
+```md
 {any}`my text <header_target>`
 ```
 
diff --git a/myst_parser/ast_renderer.py b/myst_parser/ast_renderer.py
index 7eb9e83e..b01d49e9 100644
--- a/myst_parser/ast_renderer.py
+++ b/myst_parser/ast_renderer.py
@@ -29,7 +29,32 @@ def render(self, token, to_json=False):
 
         Overrides super().render. Delegates the logic to get_ast.
         """
-        ast = ast_renderer.get_ast(token)
+        ast = get_ast(token)
         if to_json:
             return json.dumps(ast, indent=2) + "\n"
         return ast
+
+
+def get_ast(token):
+    """
+    Recursively unrolls token attributes into dictionaries (token.children
+    into lists).
+
+    Returns:
+        a dictionary of token's attributes.
+    """
+    node = {}
+    # Python 3.6 uses [ordered dicts] [1].
+    # Put in 'type' entry first to make the final tree format somewhat
+    # similar to [MDAST] [2].
+    #
+    #   [1]: https://docs.python.org/3/whatsnew/3.6.html
+    #   [2]: https://github.com/syntax-tree/mdast
+    node["type"] = token.__class__.__name__
+    # here we ignore 'private' underscore attribute
+    node.update({k: v for k, v in token.__dict__.items() if not k.startswith("_")})
+    if "header" in node:
+        node["header"] = get_ast(node["header"])
+    if "children" in node:
+        node["children"] = [get_ast(child) for child in node["children"]]
+    return node
diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py
index 76e57140..6d19cc4c 100644
--- a/myst_parser/block_tokens.py
+++ b/myst_parser/block_tokens.py
@@ -79,14 +79,19 @@ def __repr__(self):
 class Document(block_token.BlockToken):
     """Document token."""
 
-    def __init__(self, lines, start_line=0, inc_front_matter=True):
+    def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False):
 
         self.footnotes = {}
+        self._start_line = start_line
         block_token._root_node = self
         span_token._root_node = self
 
         if isinstance(lines, str):
             lines = lines.splitlines(keepends=True)
+
+        if store_lines:
+            self._lines = lines
+
         lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines]
         self.children = []
         if lines and lines[0].startswith("---"):
diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py
index 0c0d9d74..fa0b60a3 100644
--- a/myst_parser/docutils_renderer.py
+++ b/myst_parser/docutils_renderer.py
@@ -73,6 +73,13 @@ def new_document(self, source_path="notset") -> nodes.document:
         settings = OptionParser(components=(RSTParser,)).get_default_values()
         return new_document(source_path, settings=settings)
 
+    def add_line_and_source_path(self, node, token):
+        try:
+            node.line = token.range[0] + 1
+        except AttributeError:
+            pass
+        node.source = self.document["source"]
+
     def nested_render_text(self, text: str, lineno: int):
         """Render unparsed text."""
         token = myst_block_tokens.Document(
@@ -134,7 +141,7 @@ def render_paragraph(self, token):
             # promote the target to block level
             return self.render_target(token.children[0])
         para = nodes.paragraph("")
-        para.line = token.range[0]
+        self.add_line_and_source_path(para, token)
         with self.current_node_context(para, append=True):
             self.render_children(token)
 
@@ -175,7 +182,7 @@ def render_emphasis(self, token):
 
     def render_quote(self, token):
         quote = nodes.block_quote()
-        quote.line = token.range[0]
+        self.add_line_and_source_path(quote, token)
         with self.current_node_context(quote, append=True):
             self.render_children(token)
 
@@ -200,17 +207,22 @@ def render_math(self, token):
             node = nodes.math(content, content)
         self.current_node.append(node)
 
+    def render_block_code(self, token):
+        # this should never have a language, since it is just indented text, however,
+        # creating a literal_block with no language will raise a warning in sphinx
+        text = token.children[0].content
+        language = token.language or "none"
+        node = nodes.literal_block(text, text, language=language)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
     def render_code_fence(self, token):
         if token.language.startswith("{") and token.language.endswith("}"):
             return self.render_directive(token)
-        self.render_block_code(token, default_language=True)
 
-    def render_block_code(self, token, default_language=False):
-        # indented code blocks will always have no language,
-        # but for code fences, if not set, a default_language will be retrieved
         text = token.children[0].content
         language = token.language
-        if not language and default_language:
+        if not language:
             try:
                 sphinx_env = self.document.settings.env
                 language = sphinx_env.temp_data.get(
@@ -218,9 +230,10 @@ def render_block_code(self, token, default_language=False):
                 )
             except AttributeError:
                 pass
-        if not language and default_language:
+        if not language:
             language = self.config.get("highlight_language", "")
         node = nodes.literal_block(text, text, language=language)
+        self.add_line_and_source_path(node, token)
         self.current_node.append(node)
 
     def render_inline_code(self, token):
@@ -255,10 +268,10 @@ def render_heading(self, token):
                 self.current_node = self.current_node.parent
 
         title_node = nodes.title()
-        title_node.line = token.range[0]
+        self.add_line_and_source_path(title_node, token)
 
         new_section = nodes.section()
-        new_section.line = token.range[0]
+        self.add_line_and_source_path(new_section, token)
         new_section.append(title_node)
 
         self._add_section(new_section, token.level)
@@ -496,7 +509,7 @@ def render_directive(self, token):
                 # the absolute line number of the first line of the directive
                 lineno=token.range[0],
                 # the line offset of the first line of the content
-                content_offset=0,
+                content_offset=0,  # TODO get content offset from `parse_directive_text`
                 # a string containing the entire directive
                 block_text="\n".join(body_lines),
                 state=state,
@@ -760,7 +773,7 @@ class Struct:
 
     def nested_parse(
         self,
-        block: List[str],
+        block: StringList,
         input_offset: int,
         node: nodes.Element,
         match_titles: bool = False,
@@ -770,7 +783,7 @@ def nested_parse(
         current_match_titles = self.state_machine.match_titles
         self.state_machine.match_titles = match_titles
         with self._renderer.current_node_context(node):
-            self._renderer.nested_render_text(block, self._lineno)
+            self._renderer.nested_render_text(block, self._lineno + input_offset)
         self.state_machine.match_titles = current_match_titles
 
     def inline_text(self, text: str, lineno: int):
@@ -1002,7 +1015,7 @@ def run(self):
             literal_block = nodes.literal_block(
                 file_content, source=str(path), classes=self.options.get("class", [])
             )
-            literal_block.line = 1
+            literal_block.line = 1  # TODO don;t think this should be 1?
             self.add_name(literal_block)
             if "number-lines" in self.options:
                 try: