From cf3352c8a224af219cb062cfe467d7da9289284a Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Fri, 28 Feb 2020 18:01:24 +1100
Subject: [PATCH] Add performance benchmark (#88)

Add CLI and documentation, benchmarking `myst-parser` against other Python based markdown parsers.
---
 .vscode/settings.json                 |    7 +-
 docs/conf.py                          |    1 +
 docs/using/benchmark.md               | 1000 +++++++++++++++++++++++++
 docs/using/index.md                   |    1 +
 docs/using/use_api.md                 |   18 +
 docstring.fmt.mustache                |   20 +
 myst_parser/__init__.py               |   23 +
 myst_parser/block_tokens.py           |   27 +-
 myst_parser/cli/__init__.py           |    0
 myst_parser/cli/benchmark.py          |  113 +++
 myst_parser/docutils_renderer.py      |   15 +-
 setup.py                              |    4 +-
 tests/test_cli.py                     |   11 +
 tests/test_renderers/test_docutils.py |   13 +-
 14 files changed, 1236 insertions(+), 17 deletions(-)
 create mode 100644 docs/using/benchmark.md
 create mode 100644 docstring.fmt.mustache
 create mode 100644 myst_parser/cli/__init__.py
 create mode 100644 myst_parser/cli/benchmark.py
 create mode 100644 tests/test_cli.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 2ee89ab6..db5a5166 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -8,9 +8,12 @@
         "**/__pycache__": true,
         "**/.pytest_cache": true
     },
-    "editor.rulers": [88],
+    "editor.rulers": [
+        88
+    ],
     "python.formatting.provider": "black",
     "python.linting.pylintEnabled": false,
     "python.linting.flake8Enabled": true,
     "python.linting.enabled": true,
-}
+    "autoDocstring.customTemplatePath": "docstring.fmt.mustache"
+}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index f58a5ea5..3fdc5f7f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -117,6 +117,7 @@ def run_apidoc(app):
     ("py:class", "mistletoe.block_token.Table"),
     ("py:class", "mistletoe.block_token.Footnote"),
     ("py:class", "mistletoe.block_token.Paragraph"),
+    ("py:class", "mistletoe.block_token.ThematicBreak"),
     ("py:class", "mistletoe.base_renderer.BaseRenderer"),
     ("py:class", "mistletoe.html_renderer.HTMLRenderer"),
     ("py:class", "mistletoe.span_token.SpanToken"),
diff --git a/docs/using/benchmark.md b/docs/using/benchmark.md
new file mode 100644
index 00000000..9cfb8f7c
--- /dev/null
+++ b/docs/using/benchmark.md
@@ -0,0 +1,1000 @@
+Parsing Performance Benchmark
+=============================
+
+The following document describes Markdown syntax, [as written by John Gruber][src].
+It is used to benchmark the parsing speed of the MyST-Parser against
+some existing Markdown parsers written in Python:
+
+  [src]: https://daringfireball.net/projects/markdown/syntax
+
+    $ myst-benchmark docs/using/benchmark.md
+    Test document: benchmark.md
+    Test iterations: 1000
+    Running tests ...
+    =================
+    python-markdown:extra (3.2): 53.78 s
+    mistune (0.8.4): 15.22 s
+    commonmark.py (0.9.1): 61.92 s
+    mistletoe (0.8.0): 43.51 s
+    myst_parser:html (0.2.0): 52.47 s
+    myst_parser:docutils (0.2.0): 65.27 s
+    myst_parser:sphinx (0.2.0): 73.54 s
+
+As already noted by [mistletoe](https://github.com/miyuchina/mistletoe#performance)
+(which this package is built on), although Mistune is the fastest,
+this is because it does not strictly follow the CommonMark spec,
+which outlines a highly context-sensitive grammar for Markdown.
+The simpler approach taken by Mistune  means that it cannot handle more
+complex parsing cases, such as precedence of different types of tokens, escaping rules, etc.
+
+The MyST parser is slightly slower than the base mistletoe parser, due to the additional
+syntax which it parses. Then the conversion to docutils AST takes some more time,
+but is still comparably performant to the core CommonMark.py parser. The sphinx
+parse takes some extra time, due to loading the full sphinx environment,
+including its roles and directives.
+
+## Contents
+
+*   [Overview](#overview)
+    *   [Philosophy](#philosophy)
+    *   [Inline HTML](#html)
+    *   [Automatic Escaping for Special Characters](#autoescape)
+*   [Block Elements](#block)
+    *   [Paragraphs and Line Breaks](#p)
+    *   [Headers](#header)
+    *   [Blockquotes](#blockquote)
+    *   [Lists](#list)
+    *   [Code Blocks](#precode)
+    *   [Horizontal Rules](#hr)
+*   [Span Elements](#span)
+    *   [Links](#link)
+    *   [Emphasis](#em)
+    *   [Code](#code)
+    *   [Images](#img)
+*   [Miscellaneous](#misc)
+    *   [Backslash Escapes](#backslash)
+    *   [Automatic Links](#autolink)
+
+* * *
+
+<h2 id="overview">Overview</h2>
+
+<h3 id="philosophy">Philosophy</h3>
+
+Markdown is intended to be as easy-to-read and easy-to-write as is feasible.
+
+Readability, however, is emphasized above all else. A Markdown-formatted
+document should be publishable as-is, as plain text, without looking
+like it's been marked up with tags or formatting instructions. While
+Markdown's syntax has been influenced by several existing text-to-HTML
+filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4],
+[Grutatext] [5], and [EtText] [6] -- the single biggest source of
+inspiration for Markdown's syntax is the format of plain text email.
+
+  [1]: http://docutils.sourceforge.net/mirror/setext.html
+  [2]: http://www.aaronsw.com/2002/atx/
+  [3]: http://textism.com/tools/textile/
+  [4]: http://docutils.sourceforge.net/rst.html
+  [5]: http://www.triptico.com/software/grutatxt.html
+  [6]: http://ettext.taint.org/doc/
+
+To this end, Markdown's syntax is comprised entirely of punctuation
+characters, which punctuation characters have been carefully chosen so
+as to look like what they mean. E.g., asterisks around a word actually
+look like \*emphasis\*. Markdown lists look like, well, lists. Even
+blockquotes look like quoted passages of text, assuming you've ever
+used email.
+
+<h3 id="html">Inline HTML</h3>
+
+Markdown's syntax is intended for one purpose: to be used as a
+format for *writing* for the web.
+
+Markdown is not a replacement for HTML, or even close to it. Its
+syntax is very small, corresponding only to a very small subset of
+HTML tags. The idea is *not* to create a syntax that makes it easier
+to insert HTML tags. In my opinion, HTML tags are already easy to
+insert. The idea for Markdown is to make it easy to read, write, and
+edit prose. HTML is a *publishing* format; Markdown is a *writing*
+format. Thus, Markdown's formatting syntax only addresses issues that
+can be conveyed in plain text.
+
+For any markup that is not covered by Markdown's syntax, you simply
+use HTML itself. There's no need to preface it or delimit it to
+indicate that you're switching from Markdown to HTML; you just use
+the tags.
+
+The only restrictions are that block-level HTML elements -- e.g. `<div>`,
+`<table>`, `<pre>`, `<p>`, etc. -- must be separated from surrounding
+content by blank lines, and the start and end tags of the block should
+not be indented with tabs or spaces. Markdown is smart enough not
+to add extra (unwanted) `<p>` tags around HTML block-level tags.
+
+For example, to add an HTML table to a Markdown article:
+
+    This is a regular paragraph.
+
+    <table>
+        <tr>
+            <td>Foo</td>
+        </tr>
+    </table>
+
+    This is another regular paragraph.
+
+Note that Markdown formatting syntax is not processed within block-level
+HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an
+HTML block.
+
+Span-level HTML tags -- e.g. `<span>`, `<cite>`, or `<del>` -- can be
+used anywhere in a Markdown paragraph, list item, or header. If you
+want, you can even use HTML tags instead of Markdown formatting; e.g. if
+you'd prefer to use HTML `<a>` or `<img>` tags instead of Markdown's
+link or image syntax, go right ahead.
+
+Unlike block-level HTML tags, Markdown syntax *is* processed within
+span-level tags.
+
+
+<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
+
+In HTML, there are two characters that demand special treatment: `<`
+and `&`. Left angle brackets are used to start tags; ampersands are
+used to denote HTML entities. If you want to use them as literal
+characters, you must escape them as entities, e.g. `&lt;`, and
+`&amp;`.
+
+Ampersands in particular are bedeviling for web writers. If you want to
+write about 'AT&T', you need to write '`AT&amp;T`'. You even need to
+escape ampersands within URLs. Thus, if you want to link to:
+
+    http://images.google.com/images?num=30&q=larry+bird
+
+you need to encode the URL as:
+
+    http://images.google.com/images?num=30&amp;q=larry+bird
+
+in your anchor tag `href` attribute. Needless to say, this is easy to
+forget, and is probably the single most common source of HTML validation
+errors in otherwise well-marked-up web sites.
+
+Markdown allows you to use these characters naturally, taking care of
+all the necessary escaping for you. If you use an ampersand as part of
+an HTML entity, it remains unchanged; otherwise it will be translated
+into `&amp;`.
+
+So, if you want to include a copyright symbol in your article, you can write:
+
+    &copy;
+
+and Markdown will leave it alone. But if you write:
+
+    AT&T
+
+Markdown will translate it to:
+
+    AT&amp;T
+
+Similarly, because Markdown supports [inline HTML](#html), if you use
+angle brackets as delimiters for HTML tags, Markdown will treat them as
+such. But if you write:
+
+    4 < 5
+
+Markdown will translate it to:
+
+    4 &lt; 5
+
+However, inside Markdown code spans and blocks, angle brackets and
+ampersands are *always* encoded automatically. This makes it easy to use
+Markdown to write about HTML code. (As opposed to raw HTML, which is a
+terrible format for writing about HTML syntax, because every single `<`
+and `&` in your example code needs to be escaped.)
+
+
+* * *
+
+
+<h2 id="block">Block Elements</h2>
+
+
+<h3 id="p">Paragraphs and Line Breaks</h3>
+
+A paragraph is simply one or more consecutive lines of text, separated
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing but spaces or tabs is considered
+blank.) Normal paragraphs should not be indented with spaces or tabs.
+
+The implication of the "one or more consecutive lines of text" rule is
+that Markdown supports "hard-wrapped" text paragraphs. This differs
+significantly from most other text-to-HTML formatters (including Movable
+Type's "Convert Line Breaks" option) which translate every line break
+character in a paragraph into a `<br />` tag.
+
+When you *do* want to insert a `<br />` break tag using Markdown, you
+end a line with two or more spaces, then type return.
+
+Yes, this takes a tad more effort to create a `<br />`, but a simplistic
+"every line break is a `<br />`" rule wouldn't work for Markdown.
+Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l]
+work best -- and look better -- when you format them with hard breaks.
+
+  [bq]: #blockquote
+  [l]:  #list
+
+
+
+<h3 id="header">Headers</h3>
+
+Markdown supports two styles of headers, [Setext] [1] and [atx] [2].
+
+Setext-style headers are "underlined" using equal signs (for first-level
+headers) and dashes (for second-level headers). For example:
+
+    This is an H1
+    =============
+
+    This is an H2
+    -------------
+
+This is an H1
+=============
+
+This is an H2
+-------------
+
+Any number of underlining `=`'s or `-`'s will work.
+
+Atx-style headers use 1-6 hash characters at the start of the line,
+corresponding to header levels 1-6. For example:
+
+    # This is an H1
+
+    ## This is an H2
+
+    ###### This is an H6
+
+Optionally, you may "close" atx-style headers. This is purely
+cosmetic -- you can use this if you think it looks better. The
+closing hashes don't even need to match the number of hashes
+used to open the header. (The number of opening hashes
+determines the header level.) :
+
+    # This is an H1 #
+
+    ## This is an H2 ##
+
+    ### This is an H3 ######
+
+
+<h3 id="blockquote">Blockquotes</h3>
+
+Markdown uses email-style `>` characters for blockquoting. If you're
+familiar with quoting passages of text in an email message, then you
+know how to create a blockquote in Markdown. It looks best if you hard
+wrap the text and put a `>` before every line:
+
+    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+    > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+    > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+    >
+    > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+    > id sem consectetuer libero luctus adipiscing.
+
+* * *
+
+> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+>
+> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+> id sem consectetuer libero luctus adipiscing.
+
+* * *
+
+Markdown allows you to be lazy and only put the `>` before the first
+line of a hard-wrapped paragraph:
+
+    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+    consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+    Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+
+    > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+    id sem consectetuer libero luctus adipiscing.
+
+* * *
+
+> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+
+> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+id sem consectetuer libero luctus adipiscing.
+
+* * *
+
+Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
+adding additional levels of `>`:
+
+    > This is the first level of quoting.
+    >
+    > > This is nested blockquote.
+    >
+    > Back to the first level.
+
+* * *
+
+> This is the first level of quoting.
+>
+> > This is nested blockquote.
+>
+> Back to the first level.
+
+* * *
+
+Blockquotes can contain other Markdown elements, including headers, lists,
+and code blocks:
+
+	> ## This is a header.
+	>
+	> 1.   This is the first list item.
+	> 2.   This is the second list item.
+	>
+	> Here's some example code:
+	>
+	>     return shell_exec("echo $input | $markdown_script");
+
+Any decent text editor should make email-style quoting easy. For
+example, with BBEdit, you can make a selection and choose Increase
+Quote Level from the Text menu.
+
+
+<h3 id="list">Lists</h3>
+
+Markdown supports ordered (numbered) and unordered (bulleted) lists.
+
+Unordered lists use asterisks, pluses, and hyphens -- interchangably
+-- as list markers:
+
+    *   Red
+    *   Green
+    *   Blue
+
+* * *
+
+*   Red
+*   Green
+*   Blue
+
+* * *
+
+is equivalent to:
+
+    +   Red
+    +   Green
+    +   Blue
+
+* * *
+
++   Red
++   Green
++   Blue
+
+* * *
+
+and:
+
+    -   Red
+    -   Green
+    -   Blue
+
+* * *
+
+-   Red
+-   Green
+-   Blue
+
+* * *
+
+Ordered lists use numbers followed by periods:
+
+    1.  Bird
+    2.  McHale
+    3.  Parish
+
+* * *
+
+1.  Bird
+2.  McHale
+3.  Parish
+
+* * *
+
+It's important to note that the actual numbers you use to mark the
+list have no effect on the HTML output Markdown produces. The HTML
+Markdown produces from the above list is:
+
+    <ol>
+    <li>Bird</li>
+    <li>McHale</li>
+    <li>Parish</li>
+    </ol>
+
+If you instead wrote the list in Markdown like this:
+
+    1.  Bird
+    2.  McHale
+    3.  Parish
+
+or even:
+
+    3. Bird
+    1. McHale
+    8. Parish
+
+* * *
+
+1. Bird
+2. McHale
+3. Parish
+
+* * *
+
+you'd get the exact same HTML output. The point is, if you want to,
+you can use ordinal numbers in your ordered Markdown lists, so that
+the numbers in your source match the numbers in your published HTML.
+But if you want to be lazy, you don't have to.
+
+If you do use lazy list numbering, however, you should still start the
+list with the number 1. At some point in the future, Markdown may support
+starting ordered lists at an arbitrary number.
+
+List markers typically start at the left margin, but may be indented by
+up to three spaces. List markers must be followed by one or more spaces
+or a tab.
+
+To make lists look nice, you can wrap items with hanging indents:
+
+    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+        Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+        viverra nec, fringilla in, laoreet vitae, risus.
+    *   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+        Suspendisse id sem consectetuer libero luctus adipiscing.
+
+But if you want to be lazy, you don't have to:
+
+    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+    viverra nec, fringilla in, laoreet vitae, risus.
+    *   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+    Suspendisse id sem consectetuer libero luctus adipiscing.
+
+If list items are separated by blank lines, Markdown will wrap the
+items in `<p>` tags in the HTML output. For example, this input:
+
+    *   Bird
+    *   Magic
+
+will turn into:
+
+    <ul>
+    <li>Bird</li>
+    <li>Magic</li>
+    </ul>
+
+But this:
+
+    *   Bird
+
+    *   Magic
+
+will turn into:
+
+    <ul>
+    <li><p>Bird</p></li>
+    <li><p>Magic</p></li>
+    </ul>
+
+List items may consist of multiple paragraphs. Each subsequent
+paragraph in a list item must be indented by either 4 spaces
+or one tab:
+
+    1.  This is a list item with two paragraphs. Lorem ipsum dolor
+        sit amet, consectetuer adipiscing elit. Aliquam hendrerit
+        mi posuere lectus.
+
+        Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+        vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
+        sit amet velit.
+
+    2.  Suspendisse id sem consectetuer libero luctus adipiscing.
+
+It looks nice if you indent every line of the subsequent
+paragraphs, but here again, Markdown will allow you to be
+lazy:
+
+    *   This is a list item with two paragraphs.
+
+        This is the second paragraph in the list item. You're
+    only required to indent the first line. Lorem ipsum dolor
+    sit amet, consectetuer adipiscing elit.
+
+    *   Another item in the same list.
+
+To put a blockquote within a list item, the blockquote's `>`
+delimiters need to be indented:
+
+    *   A list item with a blockquote:
+
+        > This is a blockquote
+        > inside a list item.
+
+* * *
+
+*   A list item with a blockquote:
+
+    > This is a blockquote
+    > inside a list item.
+
+* * *
+
+To put a code block within a list item, the code block needs
+to be indented *twice* -- 8 spaces or two tabs:
+
+    *   A list item with a code block:
+
+            <code goes here>
+
+
+It's worth noting that it's possible to trigger an ordered list by
+accident, by writing something like this:
+
+    1.    What a great season.
+
+In other words, a *number-period-space* sequence at the beginning of a
+line. To avoid this, you can backslash-escape the period:
+
+    1986\. What a great season.
+
+
+
+<h3 id="precode">Code Blocks</h3>
+
+Pre-formatted code blocks are used for writing about programming or
+markup source code. Rather than forming normal paragraphs, the lines
+of a code block are interpreted literally. Markdown wraps a code block
+in both `<pre>` and `<code>` tags.
+
+To produce a code block in Markdown, simply indent every line of the
+block by at least 4 spaces or 1 tab. For example, given this input:
+
+    This is a normal paragraph:
+
+        This is a code block.
+
+Markdown will generate:
+
+    <p>This is a normal paragraph:</p>
+
+    <pre><code>This is a code block.
+    </code></pre>
+
+One level of indentation -- 4 spaces or 1 tab -- is removed from each
+line of the code block. For example, this:
+
+    Here is an example of AppleScript:
+
+        tell application "Foo"
+            beep
+        end tell
+
+will turn into:
+
+    <p>Here is an example of AppleScript:</p>
+
+    <pre><code>tell application "Foo"
+        beep
+    end tell
+    </code></pre>
+
+A code block continues until it reaches a line that is not indented
+(or the end of the article).
+
+Within a code block, ampersands (`&`) and angle brackets (`<` and `>`)
+are automatically converted into HTML entities. This makes it very
+easy to include example HTML source code using Markdown -- just paste
+it and indent it, and Markdown will handle the hassle of encoding the
+ampersands and angle brackets. For example, this:
+
+        <div class="footer">
+            &copy; 2004 Foo Corporation
+        </div>
+
+will turn into:
+
+    <pre><code>&lt;div class="footer"&gt;
+        &amp;copy; 2004 Foo Corporation
+    &lt;/div&gt;
+    </code></pre>
+
+Regular Markdown syntax is not processed within code blocks. E.g.,
+asterisks are just literal asterisks within a code block. This means
+it's also easy to use Markdown to write about Markdown's own syntax.
+
+
+
+<h3 id="hr">Horizontal Rules</h3>
+
+You can produce a horizontal rule tag (`<hr />`) by placing three or
+more hyphens, asterisks, or underscores on a line by themselves. If you
+wish, you may use spaces between the hyphens or asterisks. Each of the
+following lines will produce a horizontal rule:
+
+    * * *
+
+    ***
+
+    *****
+
+    - - -
+
+    ---------------------------------------
+
+
+* * *
+
+<h2 id="span">Span Elements</h2>
+
+<h3 id="link">Links</h3>
+
+Markdown supports two style of links: *inline* and *reference*.
+
+In both styles, the link text is delimited by [square brackets].
+
+To create an inline link, use a set of regular parentheses immediately
+after the link text's closing square bracket. Inside the parentheses,
+put the URL where you want the link to point, along with an *optional*
+title for the link, surrounded in quotes. For example:
+
+    This is [an example](http://example.com/ "Title") inline link.
+
+    [This link](http://example.net/) has no title attribute.
+
+Will produce:
+
+    <p>This is <a href="http://example.com/" title="Title">
+    an example</a> inline link.</p>
+
+    <p><a href="http://example.net/">This link</a> has no
+    title attribute.</p>
+
+If you're referring to a local resource on the same server, you can
+use relative paths:
+
+    See my [About](/about/) page for details.
+
+Reference-style links use a second set of square brackets, inside
+which you place a label of your choosing to identify the link:
+
+    This is [an example][id] reference-style link.
+
+You can optionally use a space to separate the sets of brackets:
+
+    This is [an example] [id] reference-style link.
+
+Then, anywhere in the document, you define your link label like this,
+on a line by itself:
+
+    [id]: http://example.com/  "Optional Title Here"
+
+That is:
+
+*   Square brackets containing the link identifier (optionally
+    indented from the left margin using up to three spaces);
+*   followed by a colon;
+*   followed by one or more spaces (or tabs);
+*   followed by the URL for the link;
+*   optionally followed by a title attribute for the link, enclosed
+    in double or single quotes, or enclosed in parentheses.
+
+The following three link definitions are equivalent:
+
+	[foo]: http://example.com/  "Optional Title Here"
+	[foo]: http://example.com/  'Optional Title Here'
+	[foo]: http://example.com/  (Optional Title Here)
+
+**Note:** There is a known bug in Markdown.pl 1.0.1 which prevents
+single quotes from being used to delimit link titles.
+
+The link URL may, optionally, be surrounded by angle brackets:
+
+    [id]: <http://example.com/>  "Optional Title Here"
+
+You can put the title attribute on the next line and use extra spaces
+or tabs for padding, which tends to look better with longer URLs:
+
+    [id]: http://example.com/longish/path/to/resource/here
+        "Optional Title Here"
+
+Link definitions are only used for creating links during Markdown
+processing, and are stripped from your document in the HTML output.
+
+Link definition names may consist of letters, numbers, spaces, and
+punctuation -- but they are *not* case sensitive. E.g. these two
+links:
+
+	[link text][a]
+	[link text][A]
+
+are equivalent.
+
+The *implicit link name* shortcut allows you to omit the name of the
+link, in which case the link text itself is used as the name.
+Just use an empty set of square brackets -- e.g., to link the word
+"Google" to the google.com web site, you could simply write:
+
+	[Google][]
+
+And then define the link:
+
+	[Google]: http://google.com/
+
+Because link names may contain spaces, this shortcut even works for
+multiple words in the link text:
+
+	Visit [Daring Fireball][] for more information.
+
+And then define the link:
+
+	[Daring Fireball]: http://daringfireball.net/
+
+Link definitions can be placed anywhere in your Markdown document. I
+tend to put them immediately after each paragraph in which they're
+used, but if you want, you can put them all at the end of your
+document, sort of like footnotes.
+
+Here's an example of reference links in action:
+
+    I get 10 times more traffic from [Google] [1] than from
+    [Yahoo] [2] or [MSN] [3].
+
+      [1]: http://google.com/        "Google"
+      [2]: http://search.yahoo.com/  "Yahoo Search"
+      [3]: http://search.msn.com/    "MSN Search"
+
+Using the implicit link name shortcut, you could instead write:
+
+    I get 10 times more traffic from [Google][] than from
+    [Yahoo][] or [MSN][].
+
+      [google]: http://google.com/        "Google"
+      [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
+      [msn]:    http://search.msn.com/    "MSN Search"
+
+Both of the above examples will produce the following HTML output:
+
+    <p>I get 10 times more traffic from <a href="http://google.com/"
+    title="Google">Google</a> than from
+    <a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
+    or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
+
+For comparison, here is the same paragraph written using
+Markdown's inline link style:
+
+    I get 10 times more traffic from [Google](http://google.com/ "Google")
+    than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
+    [MSN](http://search.msn.com/ "MSN Search").
+
+The point of reference-style links is not that they're easier to
+write. The point is that with reference-style links, your document
+source is vastly more readable. Compare the above examples: using
+reference-style links, the paragraph itself is only 81 characters
+long; with inline-style links, it's 176 characters; and as raw HTML,
+it's 234 characters. In the raw HTML, there's more markup than there
+is text.
+
+With Markdown's reference-style links, a source document much more
+closely resembles the final output, as rendered in a browser. By
+allowing you to move the markup-related metadata out of the paragraph,
+you can add links without interrupting the narrative flow of your
+prose.
+
+
+<h3 id="em">Emphasis</h3>
+
+Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
+emphasis. Text wrapped with one `*` or `_` will be wrapped with an
+HTML `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML
+`<strong>` tag. E.g., this input:
+
+    *single asterisks*
+
+    _single underscores_
+
+    **double asterisks**
+
+    __double underscores__
+
+will produce:
+
+    <em>single asterisks</em>
+
+    <em>single underscores</em>
+
+    <strong>double asterisks</strong>
+
+    <strong>double underscores</strong>
+
+You can use whichever style you prefer; the lone restriction is that
+the same character must be used to open and close an emphasis span.
+
+Emphasis can be used in the middle of a word:
+
+    un*frigging*believable
+
+But if you surround an `*` or `_` with spaces, it'll be treated as a
+literal asterisk or underscore.
+
+To produce a literal asterisk or underscore at a position where it
+would otherwise be used as an emphasis delimiter, you can backslash
+escape it:
+
+    \*this text is surrounded by literal asterisks\*
+
+
+
+<h3 id="code">Code</h3>
+
+To indicate a span of code, wrap it with backtick quotes (`` ` ``).
+Unlike a pre-formatted code block, a code span indicates code within a
+normal paragraph. For example:
+
+    Use the `printf()` function.
+
+will produce:
+
+    <p>Use the <code>printf()</code> function.</p>
+
+To include a literal backtick character within a code span, you can use
+multiple backticks as the opening and closing delimiters:
+
+    ``There is a literal backtick (`) here.``
+
+which will produce this:
+
+    <p><code>There is a literal backtick (`) here.</code></p>
+
+The backtick delimiters surrounding a code span may include spaces --
+one after the opening, one before the closing. This allows you to place
+literal backtick characters at the beginning or end of a code span:
+
+	A single backtick in a code span: `` ` ``
+
+	A backtick-delimited string in a code span: `` `foo` ``
+
+will produce:
+
+	<p>A single backtick in a code span: <code>`</code></p>
+
+	<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
+
+With a code span, ampersands and angle brackets are encoded as HTML
+entities automatically, which makes it easy to include example HTML
+tags. Markdown will turn this:
+
+    Please don't use any `<blink>` tags.
+
+into:
+
+    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
+
+You can write this:
+
+    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
+
+to produce:
+
+    <p><code>&amp;#8212;</code> is the decimal-encoded
+    equivalent of <code>&amp;mdash;</code>.</p>
+
+
+
+<h3 id="img">Images</h3>
+
+Admittedly, it's fairly difficult to devise a "natural" syntax for
+placing images into a plain text document format.
+
+Markdown uses an image syntax that is intended to resemble the syntax
+for links, allowing for two styles: *inline* and *reference*.
+
+Inline image syntax looks like this:
+
+    ![Alt text](/path/to/img.jpg)
+
+    ![Alt text](/path/to/img.jpg "Optional title")
+
+That is:
+
+*   An exclamation mark: `!`;
+*   followed by a set of square brackets, containing the `alt`
+    attribute text for the image;
+*   followed by a set of parentheses, containing the URL or path to
+    the image, and an optional `title` attribute enclosed in double
+    or single quotes.
+
+Reference-style image syntax looks like this:
+
+    ![Alt text][id]
+
+Where "id" is the name of a defined image reference. Image references
+are defined using syntax identical to link references:
+
+    [id]: url/to/image  "Optional title attribute"
+
+As of this writing, Markdown has no syntax for specifying the
+dimensions of an image; if this is important to you, you can simply
+use regular HTML `<img>` tags.
+
+
+* * *
+
+
+<h2 id="misc">Miscellaneous</h2>
+
+<h3 id="autolink">Automatic Links</h3>
+
+Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:
+
+    <http://example.com/>
+
+Markdown will turn this into:
+
+    <a href="http://example.com/">http://example.com/</a>
+
+Automatic links for email addresses work similarly, except that
+Markdown will also perform a bit of randomized decimal and hex
+entity-encoding to help obscure your address from address-harvesting
+spambots. For example, Markdown will turn this:
+
+    <address@example.com>
+
+into something like this:
+
+    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
+    &#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
+    &#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
+    &#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
+
+which will render in a browser as a clickable link to "address@example.com".
+
+(This sort of entity-encoding trick will indeed fool many, if not
+most, address-harvesting bots, but it definitely won't fool all of
+them. It's better than nothing, but an address published in this way
+will probably eventually start receiving spam.)
+
+
+
+<h3 id="backslash">Backslash Escapes</h3>
+
+Markdown allows you to use backslash escapes to generate literal
+characters which would otherwise have special meaning in Markdown's
+formatting syntax. For example, if you wanted to surround a word
+with literal asterisks (instead of an HTML `<em>` tag), you can use
+backslashes before the asterisks, like this:
+
+    \*literal asterisks\*
+
+Markdown provides backslash escapes for the following characters:
+
+    \   backslash
+    `   backtick
+    *   asterisk
+    _   underscore
+    {}  curly braces
+    []  square brackets
+    ()  parentheses
+    #   hash mark
+	+	plus sign
+	-	minus sign (hyphen)
+    .   dot
+    !   exclamation mark
diff --git a/docs/using/index.md b/docs/using/index.md
index e393b02b..d8ce3964 100644
--- a/docs/using/index.md
+++ b/docs/using/index.md
@@ -7,5 +7,6 @@ MyST documents.
 install.md
 sphinx.md
 syntax.md
+benchmark.md
 use_api.md
 ```
diff --git a/docs/using/use_api.md b/docs/using/use_api.md
index c05488b3..1c87cb87 100644
--- a/docs/using/use_api.md
+++ b/docs/using/use_api.md
@@ -6,6 +6,24 @@ MyST-Parser may be used as an API *via* the `myst_parser` package.
 The raw text is first parsed to syntax 'tokens',
 then these are converted to other formats using 'renderers'.
 
+The simplest way to parse text is using:
+
+```python
+from myst_parser import parse_text
+parse_text("some *text*", "html")
+```
+
+```html
+'<p>some <em>text</em></p>\n'
+```
+
+The output type can be one of:
+
+- `dict` (a.k.a ast)
+- `html`
+- `docutils`
+- `sphinx`
+
 ## Convert Text to Tokens
 
 To convert some text to tokens:
diff --git a/docstring.fmt.mustache b/docstring.fmt.mustache
new file mode 100644
index 00000000..717a4572
--- /dev/null
+++ b/docstring.fmt.mustache
@@ -0,0 +1,20 @@
+{{! Sphinx Docstring Template }}
+{{summaryPlaceholder}}
+
+{{extendedSummaryPlaceholder}}
+
+{{#args}}
+:param {{var}}: {{descriptionPlaceholder}}
+{{/args}}
+{{#kwargs}}
+:param {{var}}: {{descriptionPlaceholder}}
+{{/kwargs}}
+{{#exceptions}}
+:raises {{type}}: {{descriptionPlaceholder}}
+{{/exceptions}}
+{{#returns}}
+:return: {{descriptionPlaceholder}}
+{{/returns}}
+{{#yields}}
+:yield: {{descriptionPlaceholder}}
+{{/yields}}
diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py
index e26a853d..d5c142b6 100644
--- a/myst_parser/__init__.py
+++ b/myst_parser/__init__.py
@@ -17,6 +17,29 @@ def render_tokens(root_token, renderer, **kwargs):
         return renderer.render(root_token)
 
 
+def parse_text(text: str, output_type: str, **kwargs):
+    """Convert MyST text to another format.
+
+    :param text: the text to convert
+    :param output_type: one of 'dict', 'html', 'docutils', 'sphinx'
+    :param kwargs: parsed to the render initiatiation
+    """
+    if output_type == "dict":
+        from myst_parser.ast_renderer import AstRenderer as renderer_cls
+    elif output_type == "html":
+        from myst_parser.html_renderer import HTMLRenderer as renderer_cls
+    elif output_type == "docutils":
+        from myst_parser.docutils_renderer import DocutilsRenderer as renderer_cls
+    elif output_type == "sphinx":
+        from myst_parser.docutils_renderer import SphinxRenderer as renderer_cls
+    else:
+        raise ValueError("output_type not recognised: {}".format(output_type))
+    from myst_parser.block_tokens import Document
+
+    with renderer_cls(**kwargs) as renderer:
+        return renderer.render(Document(text))
+
+
 def setup(app):
     """Initialize Sphinx extension."""
     from myst_parser.sphinx_parser import MystParser
diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py
index c417b05b..357b3c36 100644
--- a/myst_parser/block_tokens.py
+++ b/myst_parser/block_tokens.py
@@ -3,13 +3,7 @@
 from mistletoe import block_token, span_token
 import mistletoe.block_tokenizer as tokenizer
 
-from mistletoe.block_token import (  # noqa: F401
-    tokenize,
-    HTMLBlock,
-    ThematicBreak,
-    Footnote,
-    TableRow,
-)
+from mistletoe.block_token import tokenize, HTMLBlock, Footnote, TableRow  # noqa: F401
 
 """
 Tokens to be included in the parsing process, in the order specified.
@@ -147,6 +141,25 @@ def __repr__(self):
         return "MyST.{}(range={})".format(self.__class__.__name__, self.range)
 
 
+class ThematicBreak(block_token.ThematicBreak):
+    """
+    Thematic break token (a.k.a. horizontal rule.)
+    """
+
+    def __init__(self, result):
+        line, lineno = result
+        self.raw = line.splitlines()[0]
+        self.range = (lineno, lineno)
+
+    @classmethod
+    def read(cls, lines):
+        line = next(lines)
+        return line, lines.lineno
+
+    def __repr__(self):
+        return "MyST.{}(range={})".format(self.__class__.__name__, self.range)
+
+
 class BlockBreak(block_token.BlockToken):
     """Block break token ``+++``.
 
diff --git a/myst_parser/cli/__init__.py b/myst_parser/cli/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/myst_parser/cli/benchmark.py b/myst_parser/cli/benchmark.py
new file mode 100644
index 00000000..9603166b
--- /dev/null
+++ b/myst_parser/cli/benchmark.py
@@ -0,0 +1,113 @@
+import argparse
+from importlib import import_module
+import os
+import re
+from time import perf_counter
+
+ALL_PACKAGES = (
+    "python-markdown:extra",
+    "mistune",
+    "commonmark.py",
+    "mistletoe",
+    "myst_parser:html",
+    "myst_parser:docutils",
+    "myst_parser:sphinx",
+)
+
+
+def benchmark(package_name, version=None):
+    def decorator(func):
+        def inner(text, num_parses):
+            try:
+                package = import_module(package_name)
+                print("(" + (version or package.__version__) + ")", end=": ")
+            except ImportError:
+                return "not available."
+            start = perf_counter()
+            for i in range(num_parses):
+                func(package, text)
+            end = perf_counter()
+
+            return end - start
+
+        return inner
+
+    return decorator
+
+
+@benchmark("markdown")
+def run_python_markdown_extra(package, text):
+    return package.markdown(text, extensions=["extra"])
+
+
+@benchmark("mistune")
+def run_mistune(package, text):
+    return package.markdown(text)
+
+
+@benchmark("commonmark", "0.9.1")
+def run_commonmark_py(package, text):
+    return package.commonmark(text)
+
+
+@benchmark("mistletoe")
+def run_mistletoe(package, text):
+    return package.markdown(text)
+
+
+@benchmark("myst_parser")
+def run_myst_parser_html(package, text):
+    package.parse_text(text, "html")
+
+
+@benchmark("myst_parser")
+def run_myst_parser_docutils(package, text):
+    package.parse_text(text, "docutils", config={"ignore_missing_refs": True})
+
+
+@benchmark("myst_parser")
+def run_myst_parser_sphinx(package, text):
+    package.parse_text(text, "sphinx", load_sphinx_env=True)
+
+
+def run_all(package_names, text, num_parses):
+    prompt = "Running {} test(s) ...".format(len(package_names))
+    print(prompt)
+    print("=" * len(prompt))
+    for package_name in package_names:
+        print(package_name, end=" ")
+        func_name = re.sub(r"[\.\-\:]", "_", package_name.lower())
+        print(
+            "{:.2f} s".format(globals()["run_{}".format(func_name)](text, num_parses))
+        )
+    return True
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser(description="Run benchmark test.")
+    parser.add_argument("path", type=str, help="the path to the file to parse")
+    parser.add_argument(
+        "-n",
+        "--num-parses",
+        metavar="NPARSES",
+        default=1000,
+        type=int,
+        help="The number of parse iterations (default: 1000)",
+    )
+    parser.add_argument(
+        "-p",
+        "--package",
+        action="append",
+        default=[],
+        help="The package(s) to run (use -p multiple times).",
+        choices=ALL_PACKAGES,
+        metavar="PACKAGE_NAME",
+    )
+    args = parser.parse_args(args)
+
+    assert os.path.exists(args.path), "path does not exist"
+    print("Test document: {}".format(os.path.basename(args.path)))
+    print("Test iterations: {}".format(args.num_parses))
+    with open(args.path, "r") as handle:
+        text = handle.read()
+    return run_all(args.package or ALL_PACKAGES, text, args.num_parses)
diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py
index c015a237..190e1bc8 100644
--- a/myst_parser/docutils_renderer.py
+++ b/myst_parser/docutils_renderer.py
@@ -186,7 +186,9 @@ def render_strikethrough(self, token):
         raise NotImplementedError
 
     def render_thematic_break(self, token):
-        self.current_node.append(nodes.transition())
+        node = nodes.transition()
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
 
     def render_block_break(self, token):
         block_break = nodes.comment(token.content, token.content)
@@ -284,11 +286,14 @@ def render_heading(self, token):
         self.document.note_implicit_target(section, section)
         self.current_node = section
 
-    def handle_cross_reference(self, token, destination, ref_node):
+    def handle_cross_reference(self, token, destination):
         # TODO use the docutils error reporting mechanisms, rather than raising
-        raise NotImplementedError(
-            "reference not found in current document: {}".format(destination)
-        )
+        if not self.config.get("ignore_missing_refs", False):
+            raise NotImplementedError(
+                "reference not found in current document: {}\n{}".format(
+                    destination, token
+                )
+            )
 
     def render_link(self, token):
         ref_node = nodes.reference()
diff --git a/setup.py b/setup.py
index 8e527bb4..4e412994 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,9 @@
     author_email="chrisj_sewell@hotmail.com",
     license="MIT",
     packages=find_packages(),
-    entry_points={"console_scripts": []},
+    entry_points={
+        "console_scripts": ["myst-benchmark = myst_parser.cli.benchmark:main"]
+    },
     classifiers=[
         "Development Status :: 3 - Alpha",
         "Intended Audience :: Developers",
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 00000000..4b069ca9
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,11 @@
+import pathlib
+import tempfile
+
+from myst_parser.cli import benchmark
+
+
+def test_benchmark():
+    with tempfile.TemporaryDirectory() as tempdir:
+        path = pathlib.Path(tempdir).joinpath("test.md")
+        path.write_text("a b c")
+        assert benchmark.main(["-n", "1", "-p", "myst_parser:html", str(path)])
diff --git a/tests/test_renderers/test_docutils.py b/tests/test_renderers/test_docutils.py
index 31963b00..60698363 100644
--- a/tests/test_renderers/test_docutils.py
+++ b/tests/test_renderers/test_docutils.py
@@ -4,7 +4,7 @@
 from mistletoe.block_token import tokenize
 from mistletoe.span_token import tokenize_inner
 
-from myst_parser import text_to_tokens, render_tokens
+from myst_parser import text_to_tokens, render_tokens, parse_text
 from myst_parser.block_tokens import Document
 from myst_parser.docutils_renderer import SphinxRenderer
 
@@ -21,7 +21,7 @@ def render_token(
     render_func(mock_token)
 
 
-def test_render_tokens():
+def test_text_to_tokens():
     root = text_to_tokens("abc")
     document = render_tokens(
         root,
@@ -34,6 +34,15 @@ def test_render_tokens():
     )
 
 
+def test_parse_text():
+    document = parse_text(
+        "abc", "sphinx", load_sphinx_env=True, sphinx_conf={"project": "MyST Parser"}
+    )
+    assert document.pformat() == (
+        '<document source="notset">\n    <paragraph>\n        abc\n'
+    )
+
+
 def test_strong(renderer_mock):
     render_token(renderer_mock, "Strong")
     assert renderer_mock.document.pformat() == dedent(