executablebooks · chrisjsewell · Mar 3, 2021 · Mar 3, 2021 · Mar 3, 2021 · Mar 3, 2021
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,6 +7,7 @@ exclude: >
       test.*\.md|
       test.*\.txt|
       test.*\.html|
+      test.*\.xml|
       .*commonmark\.json|
       benchmark/.*\.md|
       .*/spec\.md
@@ -31,6 +32,7 @@ repos:
     rev: 3.8.4
     hooks:
     - id: flake8
+      additional_dependencies: [flake8-bugbear==21.3.1]
 
   - repo: https://github.com/psf/black
     rev: 20.8b1

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -6,6 +6,7 @@ python:
       - method: pip
         path: .
         extra_requirements:
+          - linkify
           - rtd
 
 sphinx:

diff --git a/docs/conf.py b/docs/conf.py
@@ -45,7 +45,13 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
-nitpick_ignore = [("py:class", "Match"), ("py:class", "x in the interval [0, 1).")]
+nitpick_ignore = [
+    ("py:class", "Match"),
+    ("py:class", "x in the interval [0, 1)."),
+    ("py:class", "markdown_it.helpers.parse_link_destination._Result"),
+    ("py:class", "markdown_it.helpers.parse_link_title._Result"),
+    ("py:class", "MarkdownIt"),
+]
 
 
 # -- Options for HTML output -------------------------------------------------

diff --git a/docs/using.md b/docs/using.md
@@ -28,6 +28,7 @@ then these are converted to other formats using 'renderers'.
 The simplest way to understand how text will be parsed is using:
 
 ```{code-cell}
+from pprint import pprint
 from markdown_it import MarkdownIt
 ```
 
@@ -48,8 +49,15 @@ for token in md.parse("some *text*"):
 
 The `MarkdownIt` class is instantiated with parsing configuration options,
 dictating the syntax rules and additional options for the parser and renderer.
-You can define this configuration *via* a preset name (`'zero'`, `'commonmark'` or `'default'`),
-or by directly supplying a dictionary.
+You can define this configuration *via* directly supplying a dictionary or a preset name:
+
+- `zero`: This configures the minimum components to parse text (i.e. just paragraphs and text)
+- `commonmark` (default): This configures the parser to strictly comply with the [CommonMark specification](http://spec.commonmark.org/).
+- `js-default`: This is the default in the JavaScript version.
+  Compared to `commonmark`, it disables HTML parsing and enables the table and strikethrough components.
+- `gfm-like`: This configures the parser to approximately comply with the [GitHub Flavored Markdown specification](https://github.github.com/gfm/).
+  Compared to `commonmark`, it enables the table, strikethrough and linkify components.
+  **Important**, to use this configuration you must have `linkify-it-py` installed.
 
 ```{code-cell}
 from markdown_it.presets import zero
@@ -61,18 +69,26 @@ md = MarkdownIt("zero")
 md.options
 ```
 
+You can also override specific options:
+
 ```{code-cell}
-print(md.get_active_rules())
+md = MarkdownIt("zero", {"maxNesting": 99})
+md.options
 ```
 
 ```{code-cell}
-print(md.get_all_rules())
+pprint(md.get_active_rules())
 ```
 
 You can find all the parsing rules in the source code:
 `parser_core.py`, `parser_block.py`,
 `parser_inline.py`.
-Any of the parsing rules can be enabled/disabled, and these methods are chainable:
+
+```{code-cell}
+pprint(md.get_all_rules())
+```
+
+Any of the parsing rules can be enabled/disabled, and these methods are "chainable":
 
 ```{code-cell}
 md.render("- __*emphasise this*__")
@@ -97,6 +113,50 @@ Additionally `renderInline` runs the parser with all block syntax rules disabled
 md.renderInline("__*emphasise this*__")
 ```
 
+### Typographic components
+
+The `smartquotes` and `replacements` components are intended to improve typography:
+
+`smartquotes` will convert basic quote marks to their opening and closing variants:
+
+- 'single quotes' -> ‘single quotes’
+- "double quotes" -> “double quotes”
+
+`replacements` will replace particular text constructs:
+
+- ``(c)``, ``(C)`` → ©
+- ``(tm)``, ``(TM)`` → ™
+- ``(r)``, ``(R)`` → ®
+- ``(p)``, ``(P)`` → §
+- ``+-`` → ±
+- ``...`` → …
+- ``?....`` → ?..
+- ``!....`` → !..
+- ``????????`` → ???
+- ``!!!!!`` → !!!
+- ``,,,`` → ,
+- ``--`` → &ndash
+- ``---`` → &mdash
+
+Both of these components require typography to be turned on, as well as the components enabled:
+
+```{code-cell}
+md = MarkdownIt("commonmark", {"typographer": True})
+md.enable(["replacements", "smartquotes"])
+md.render("'single quotes' (c)")
+```
+
+### Linkify
+
+The `linkify` component requires that [linkify-it-py](https://github.com/tsutsu3/linkify-it-py) be installed (e.g. *via* `pip install markdown-it-py[linkify]`).
+This allows URI autolinks to be identified, without the need for enclosing in `<>` brackets:
+
+```{code-cell}
+md = MarkdownIt("commonmark", {"linkify": True})
+md.enable(["linkify"])
+md.render("github.com")
+```
+
 ### Plugins load
 
 Plugins load collections of additional syntax rules and render methods into the parser
@@ -130,7 +190,6 @@ md.render(text)
 
 ## The Token Stream
 
-
 +++
 
 Before rendering, the text is parsed to a flat token stream of block level syntax elements, with nesting defined by opening (1) and closing (-1) attributes:
@@ -183,20 +242,42 @@ This dictionary can also be deserialized:
 Token.from_dict(tokens[1].as_dict())
 ```
 
-In some use cases `nest_tokens` may be useful, to collapse the opening/closing tokens into single tokens:
+### Creating a syntax tree
+
+```{versionchanged} 0.7.0
+`nest_tokens` and `NestedTokens` are deprecated and replaced by `SyntaxTreeNode`.
+```
+
+In some use cases it may be useful to convert the token stream into a syntax tree,
+with opening/closing tokens collapsed into a single token that contains children.
 
 ```{code-cell}
-from markdown_it.token import nest_tokens
-nested_tokens = nest_tokens(tokens)
-[t.type for t in nested_tokens]
+from markdown_it.tree import SyntaxTreeNode
+
+md = MarkdownIt("commonmark")
+tokens = md.parse("""
+# Header
+
+Here's some text and an image ![title](image.png)
+
+1. a **list**
+
+> a *quote*
+""")
+
+node = SyntaxTreeNode.from_tokens(tokens)
+print(node.pretty(indent=2, show_text=True))
 ```
 
-This introduces a single additional class `NestedTokens`,
-containing an `opening`, `closing` and `children`, which can be a list of mixed
-`Token` and `NestedTokens`.
+You can then use methods to traverse the tree
+
+```{code-cell}
+node.children
+```
 
 ```{code-cell}
-nested_tokens[0]
+print(node[0])
+node[0].next_sibling
 ```
 
 ## Renderers

diff --git a/markdown_it/main.py b/markdown_it/main.py
@@ -27,36 +27,50 @@
     linkify_it = None
 
 
-_PRESETS = AttrDict(
-    {
-        "default": presets.default.make(),
-        "zero": presets.zero.make(),
-        "commonmark": presets.commonmark.make(),
-    }
-)
+_PRESETS = {
+    "default": presets.default.make(),
+    "js-default": presets.js_default.make(),
+    "zero": presets.zero.make(),
+    "commonmark": presets.commonmark.make(),
+    "gfm-like": presets.gfm_like.make(),
+}
 
 
 class MarkdownIt:
     def __init__(
-        self, config: Union[str, Mapping] = "commonmark", renderer_cls=RendererHTML
+        self,
+        config: Union[str, Mapping] = "commonmark",
+        options_update: Optional[Mapping] = None,
+        *,
+        renderer_cls=RendererHTML,
     ):
         """Main parser class
 
         :param config: name of configuration to load or a pre-defined dictionary
+        :param options_update: dictionary that will be merged into ``config["options"]``
         :param renderer_cls: the class to load as the renderer:
             ``self.renderer = renderer_cls(self)
         """
+        # add modules
+        self.utils = utils
+        self.helpers: Any = helpers
+
+        # initialise classes
         self.inline = ParserInline()
         self.block = ParserBlock()
         self.core = ParserCore()
         self.renderer = renderer_cls(self)
+        self.linkify = linkify_it.LinkifyIt() if linkify_it else None
 
-        self.utils = utils
-        self.helpers: Any = helpers
+        # set the configuration
+        if options_update and not isinstance(options_update, Mapping):
+            # catch signature change where renderer_cls was not used as a key-word
+            raise TypeError(
+                f"options_update should be a mapping: {options_update}"
+                "\n(Perhaps you intended this to be the renderer_cls?)"
+            )
         self.options = AttrDict()
-        self.configure(config)
-
-        self.linkify = linkify_it.LinkifyIt() if linkify_it else None
+        self.configure(config, options_update=options_update)
 
     def __repr__(self) -> str:
         return f"{self.__class__.__module__}.{self.__class__.__name__}()"
@@ -79,7 +93,9 @@ def set(self, options: AttrDict) -> None:
         """
         self.options = options
 
-    def configure(self, presets: Union[str, Mapping]) -> "MarkdownIt":
+    def configure(
+        self, presets: Union[str, Mapping], options_update: Optional[Mapping] = None
+    ) -> "MarkdownIt":
         """Batch load of all options and component settings.
         This is an internal method, and you probably will not need it.
         But if you will - see available presets and data structure
@@ -89,21 +105,24 @@ def configure(self, presets: Union[str, Mapping]) -> "MarkdownIt":
         That will give better compatibility with next versions.
         """
         if isinstance(presets, str):
-            presetName = presets
-            presets = _PRESETS.get(presetName, None)
-            if not presets:
-                raise KeyError(
-                    'Wrong `markdown-it` preset "' + presetName + '", check name'
-                )
-        if not presets:
-            raise ValueError("Wrong `markdown-it` preset, can't be empty")
-        config = AttrDict(presets)
-
-        if "options" in config:
-            self.set(config.options)
+            if presets not in _PRESETS:
+                raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
+            config = _PRESETS[presets]
+        else:
+            config = presets
+
+        if not config:
+            raise ValueError("Wrong `markdown-it` config, can't be empty")
+
+        options = config.get("options", {}) or {}
+        if options_update:
+            options = {**options, **options_update}
+
+        if options:
+            self.set(AttrDict(options))
 
         if "components" in config:
-            for name, component in config.components.items():
+            for name, component in config["components"].items():
                 rules = component.get("rules", None)
                 if rules:
                     self[name].ruler.enableOnly(rules)

diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
@@ -23,6 +23,11 @@
       In markdown_it/rules_block/reference.py,
       record line range in state.env["references"] and add state.env["duplicate_refs"]
       This is to allow renderers to report on issues regarding references
+    - |
+      The `MarkdownIt.__init__` signature is slightly different for updating options,
+      since you must always specify the config first, e.g.
+      use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
+    - The default configuration preset for `MarkdownIt` is "commonmark" not "default"
     - Allow custom renderer to be passed to `MarkdownIt`
     - |
       change render method signatures

diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py
@@ -1 +1,25 @@
 from . import commonmark, default, zero  # noqa: F401
+
+js_default = default
+
+
+class gfm_like:
+    """GitHub Flavoured Markdown (GFM) like.
+
+    This adds the linkify, table and strikethrough components to CommmonMark.
+
+    Note, it lacks task-list items and raw HTML filtering,
+    to meet the the full GFM specification
+    (see https://github.github.com/gfm/#autolinks-extension-).
+    """
+
+    @staticmethod
+    def make():
+        config = commonmark.make()
+        config["components"]["core"]["rules"].append("linkify")
+        config["components"]["block"]["rules"].append("table")
+        config["components"]["inline"]["rules"].append("strikethrough")
+        config["components"]["inline"]["rules2"].append("strikethrough")
+        config["options"]["linkify"] = True
+        config["options"]["html"] = True
+        return config
diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py
@@ -1,4 +1,11 @@
-"""Commonmark default options."""
+"""Commonmark default options.
+
+This differs to presets.default,
+primarily in that it allows HTML and does not enable components:
+
+- block: table
+- inline: strikethrough
+"""
 
 
 def make():

diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py
@@ -34,7 +34,7 @@ def strong_open(self, tokens, idx, options, env):
             def strong_close(self, tokens, idx, options, env):
                 return '</b>'
 
-        md = MarkdownIt(renderer=CustomRenderer)
+        md = MarkdownIt(renderer_cls=CustomRenderer)
 
         result = md.render(...)
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ python: @@
           - method: pip
             path: .
             extra_requirements:
+              - linkify
               - rtd
     sphinx:
@@ Expand Down @@