From 6bdc10322fa358cdcf5eb1bb701b0b138c2b4736 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Wed, 11 Mar 2020 22:08:42 +0000
Subject: [PATCH 1/8] add TODOs

---
 mistletoe/__init__.py        | 2 +-
 mistletoe/block_tokenizer.py | 4 ++++
 mistletoe/block_tokens.py    | 2 ++
 mistletoe/parse_context.py   | 4 ++++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/mistletoe/__init__.py b/mistletoe/__init__.py
index 031fd5e..ddc4010 100644
--- a/mistletoe/__init__.py
+++ b/mistletoe/__init__.py
@@ -2,7 +2,7 @@
 Make mistletoe easier to import.
 """
 
-__version__ = "0.10.0a2"
+__version__ = "0.10.0a3"
 __all__ = [
     "renderers",
     "base_elements",
diff --git a/mistletoe/block_tokenizer.py b/mistletoe/block_tokenizer.py
index d304b7e..5e5253b 100644
--- a/mistletoe/block_tokenizer.py
+++ b/mistletoe/block_tokenizer.py
@@ -5,6 +5,10 @@
 from mistletoe.parse_context import get_parse_context
 
 
+# TODO we should parse tokenize_main/tokenize_block SourceLines
+# instances directly which would also
+
+
 def tokenize_main(
     iterable,
     token_types=None,
diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index 05c6df3..733a377 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -140,6 +140,8 @@ def read(
         """
         if isinstance(lines, str):
             lines = lines.splitlines(keepends=True)
+
+        # TODO what if windows style `\r` delimited?
         lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines]
         if reset_definitions:
             get_parse_context().reset_definitions()
diff --git a/mistletoe/parse_context.py b/mistletoe/parse_context.py
index 8fa5e84..3d480a0 100644
--- a/mistletoe/parse_context.py
+++ b/mistletoe/parse_context.py
@@ -71,6 +71,10 @@ class ParseContext:
     :param nesting_matches: a dict of matches recorded from `find_nested_tokenizer`
     """
 
+    # TODO allow the initialisaion of a global logger
+    # where we can report any errors/warnings such as duplicate
+    # link/footnote defintions
+
     def __init__(
         self,
         find_blocks=None,

From caef1e4f189bf8b86a1478002675fc3fa69f2d23 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Wed, 11 Mar 2020 23:17:53 +0000
Subject: [PATCH 2/8] `tokenize_main`  now takes `SourceLines`

Also rewrite FrontMatter parser to use ``SourceLines``
---
 docs/using/intro.md                           |  9 ++--
 mistletoe/base_elements.py                    | 29 +++++++++--
 mistletoe/block_tokenizer.py                  | 22 +++-----
 mistletoe/block_tokens.py                     | 50 +++++++++----------
 test/test_block_token.py                      |  3 ++
 .../test_doc_read_with_front_matter_basic.yml |  2 +-
 .../test_json_renderer/test_basic.yml         |  2 +-
 7 files changed, 67 insertions(+), 50 deletions(-)

diff --git a/docs/using/intro.md b/docs/using/intro.md
index c82bbf7..6a09c37 100644
--- a/docs/using/intro.md
+++ b/docs/using/intro.md
@@ -244,8 +244,9 @@ This process is illustrated in the following example, using the lower level pars
 {py:func}`~mistletoe.block_tokenizer.tokenize_main`:
 
 ```python
->> from mistletoe.block_tokenizer import tokenize_main
->> paragraph = tokenize_main(["a [text][key]\n", "\n", '[key]: link "target"\n'], expand_spans=False)[0]
+>> from mistletoe.block_tokenizer import tokenize_main, SourceLines
+>> lines = SourceLines('a [text][key]\n\n[key]: link "target"', standardize_ends=True)
+>> paragraph = tokenize_main(lines, expand_spans=False)[0]
 >> paragraph.children
 SpanContainer('a [text][key]')
 ```
@@ -265,8 +266,8 @@ ParseContext(block_cls=11,span_cls=9,link_defs=1,footnotes=0)
 
 ````{important}
 If directly using {py:func}`~mistletoe.block_tokenizer.tokenize_main`,
-you should (a) ensure all lines are terminated with `\n`, and
-(b) ensure that the global context is reset (if you don't want to use previously read defintions):
+you should ensure that the global context is reset,
+if you don't want to use previously read defintions:
 
 ```python
 >> get_parse_context(reset=True)
diff --git a/mistletoe/base_elements.py b/mistletoe/base_elements.py
index 1429e3f..9f5285a 100644
--- a/mistletoe/base_elements.py
+++ b/mistletoe/base_elements.py
@@ -1,6 +1,7 @@
 from collections import namedtuple, OrderedDict
 import json
-from typing import List, Optional, Pattern, Tuple
+import re
+from typing import List, Optional, Pattern, Tuple, Union
 
 import attr
 
@@ -150,14 +151,34 @@ class SourceLines:
     """A class for storing source lines and tracking current line index.
 
     :param lines: the source lines
-    :param start_line: the position of the lines with the full source text.
+    :param start_line: the position of the initial line within the full source text.
+    :param standardize_ends: standardize all lines to end with ``\\n``
+    :param metadata: any metadata associated with the lines
     """
 
-    def __init__(self, lines: List[str], start_line=0):
-        self.lines = lines if isinstance(lines, list) else list(lines)
+    line_end_pattern = re.compile(".*(\n|\r)$")
+
+    def __init__(
+        self,
+        lines: Union[str, List[str]],
+        start_line: int = 0,
+        standardize_ends: bool = False,
+        metadata: Optional[dict] = None,
+    ):
+
+        if isinstance(lines, str):
+            lines = lines.splitlines(keepends=True)
+        if standardize_ends:
+            lines = [
+                "{}\n".format(l[:-1] if self.line_end_pattern.match(l) else l)
+                for l in lines
+            ]
+
+        self.lines = lines
         self._index = -1
         self._anchor = 0
         self.start_line = start_line
+        self.metadata = metadata or {}
 
     @property
     def lineno(self):
diff --git a/mistletoe/block_tokenizer.py b/mistletoe/block_tokenizer.py
index 5e5253b..78da3d2 100644
--- a/mistletoe/block_tokenizer.py
+++ b/mistletoe/block_tokenizer.py
@@ -5,20 +5,15 @@
 from mistletoe.parse_context import get_parse_context
 
 
-# TODO we should parse tokenize_main/tokenize_block SourceLines
-# instances directly which would also
-
-
 def tokenize_main(
-    iterable,
+    lines: SourceLines,
     token_types=None,
-    start_line: int = 0,
     expand_spans: bool = True,
     skip_tokens: list = ("LinkDefinition", "Footnote"),
 ):
     """Searches for token_types in an iterable.
 
-    :param iterable: list of strings (each line must end with a newline `\\n`!).
+    :param lines: the source lines
     :param token_types: override block-level tokens set in global context
     :param start_line: the source line number corresponding to `iterable[0]`
     :param expand_spans: After the initial parse the span text is not yet tokenized,
@@ -30,14 +25,11 @@ def tokenize_main(
 
     :returns: list of block-level token instances.
     """
+    if not isinstance(lines, SourceLines):
+        lines = SourceLines(lines)
     if token_types is None:
         token_types = get_parse_context().block_tokens
-    tokens = tokenize_block(
-        iterable,
-        token_types=token_types,
-        start_line=start_line,
-        skip_tokens=skip_tokens,
-    )
+    tokens = tokenize_block(lines, token_types=token_types, skip_tokens=skip_tokens)
     if expand_spans:
         for token in tokens + list(get_parse_context().foot_definitions.values()):
             for result in list(token.walk(include_self=True)):
@@ -47,12 +39,12 @@ def tokenize_main(
 
 
 def tokenize_block(
-    iterable, token_types=None, start_line=0, skip_tokens=("LinkDefinition", "Footnote")
+    lines: SourceLines, token_types=None, skip_tokens=("LinkDefinition", "Footnote")
 ):
     """Returns a list of parsed tokens."""
+    assert isinstance(lines, SourceLines), "lines must be `SourceLines` instance"
     if token_types is None:
         token_types = get_parse_context().block_tokens
-    lines = SourceLines(iterable, start_line)
     parsed_tokens = ParseBuffer()
     line = lines.peek()
     while line is not None:
diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index 733a377..fd7f344 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -17,7 +17,7 @@
     normalize_label,
 )
 from mistletoe.parse_context import get_parse_context
-from mistletoe.base_elements import Token, BlockToken, SpanContainer
+from mistletoe.base_elements import Token, BlockToken, SpanContainer, SourceLines
 from mistletoe.attr_doc import autodoc
 
 
@@ -67,21 +67,24 @@ class FrontMatter(BlockToken):
 
     @classmethod
     def start(cls, line: str) -> bool:
+        # handled by Document
         return False
 
     @classmethod
     def read(cls, lines):
-        assert lines and lines[0].startswith("---")
-        end_line = None
-        for i, line in enumerate(lines[1:]):
-            if line.startswith("---"):
-                end_line = i + 2
-                break
-        # TODO raise/report error if closing block not found
-        if end_line is None:
-            end_line = len(lines)
+        start_line = lines.lineno + 1
+
+        next(lines)  # skip first ``---``
+        line_buffer = []
+        next_line = lines.peek()
+        while not (next_line is None or next_line.startswith("---")):
+            line_buffer.append(next(lines))
+            next_line = lines.peek()
+        if next_line is not None:
+            next(lines)  # move pasr closing ``---``
+        # TODO raise/report error if closing block not found?
 
-        return cls(content="".join(lines[1 : end_line - 1]), position=(0, end_line))
+        return cls(content="".join(line_buffer), position=(start_line, lines.lineno))
 
 
 @autodoc
@@ -138,26 +141,19 @@ def read(
         :param front_matter: search for an initial YAML block front matter block
             (note this is not strictly CommonMark compliant)
         """
-        if isinstance(lines, str):
-            lines = lines.splitlines(keepends=True)
-
-        # TODO what if windows style `\r` delimited?
-        lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines]
         if reset_definitions:
             get_parse_context().reset_definitions()
 
+        lines = SourceLines(lines, start_line=start_line, standardize_ends=True)
+
         # TODO can we do this in a way where we are checking
         # FrontMatter in get_parse_context().block_tokens?
         # then it would be easier to add/remove it in the renderers
         front_matter_token = None
-        if front_matter and lines and lines[0].startswith("---"):
+        if front_matter and lines.peek() and lines.peek().startswith("---"):
             front_matter_token = FrontMatter.read(lines)
-            start_line += front_matter_token.position[1]
-            lines = lines[front_matter_token.position[1] :]
 
-        children = tokenizer.tokenize_main(
-            lines, start_line=start_line, skip_tokens=skip_tokens
-        )
+        children = tokenizer.tokenize_main(lines=lines, skip_tokens=skip_tokens)
         foot_defs = get_parse_context().foot_definitions
         return cls(
             children=children,
@@ -307,7 +303,9 @@ def read(cls, lines):
         # in quotes can be recognized before span-level tokenizing.
         Paragraph.parse_setext = False
         try:
-            child_tokens = tokenizer.tokenize_block(line_buffer, start_line=start_line)
+            child_tokens = tokenizer.tokenize_block(
+                SourceLines(line_buffer, start_line=start_line)
+            )
         finally:
             Paragraph.parse_setext = True
         return cls(children=child_tokens, position=(start_line, lines.lineno))
@@ -690,7 +688,7 @@ def read(cls, lines, prev_marker=None):
         next_line = lines.peek()
         if empty_first_line and next_line is not None and next_line.strip() == "":
             child_tokens = tokenizer.tokenize_block(
-                [next(lines)], start_line=lines.lineno
+                SourceLines([next(lines)], start_line=lines.lineno)
             )
             next_line = lines.peek()
             if next_line is not None:
@@ -745,7 +743,9 @@ def read(cls, lines, prev_marker=None):
             newline = newline + 1 if next_line.strip() == "" else 0
             next_line = lines.peek()
 
-        child_tokens = tokenizer.tokenize_block(line_buffer, start_line=start_line)
+        child_tokens = tokenizer.tokenize_block(
+            SourceLines(line_buffer, start_line=start_line)
+        )
 
         return cls(
             children=child_tokens,
diff --git a/test/test_block_token.py b/test/test_block_token.py
index 9e885e3..2e24df2 100644
--- a/test/test_block_token.py
+++ b/test/test_block_token.py
@@ -73,6 +73,9 @@ def test_html_block(name, source, data_regression):
     ],
 )
 def test_fenced_code(name, source, data_regression):
+    from mistletoe.base_elements import SourceLines
+
+    print(SourceLines(source).lines)
     data_regression.check(
         serialize_tokens(tokenize_main(source), as_dict=True),
         basename=f"test_fenced_code_{name}",
diff --git a/test/test_block_token/test_doc_read_with_front_matter_basic.yml b/test/test_block_token/test_doc_read_with_front_matter_basic.yml
index 396f0fd..6155673 100644
--- a/test/test_block_token/test_doc_read_with_front_matter_basic.yml
+++ b/test/test_block_token/test_doc_read_with_front_matter_basic.yml
@@ -8,6 +8,6 @@ Document:
 
         '
       position:
-      - 0
+      - 1
       - 3
   link_definitions: {}
diff --git a/test/test_renderers/test_json_renderer/test_basic.yml b/test/test_renderers/test_json_renderer/test_basic.yml
index 9e84942..bf1f518 100644
--- a/test/test_renderers/test_json_renderer/test_basic.yml
+++ b/test/test_renderers/test_json_renderer/test_basic.yml
@@ -222,7 +222,7 @@ front_matter:
 
     '
   position:
-  - 0
+  - 1
   - 3
   type: FrontMatter
 link_definitions:

From eff60fa1536b98fcbe79ef71440282ba44df586c Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Wed, 11 Mar 2020 23:23:38 +0000
Subject: [PATCH 3/8] allow `Document.read` to take `SourceLines`

---
 mistletoe/block_tokens.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index fd7f344..27abe57 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -2,7 +2,7 @@
 Built-in block-level token classes.
 """
 import re
-from typing import Dict, Optional, Tuple
+from typing import Dict, Optional, Tuple, Union
 from typing import List as ListType
 
 import attr
@@ -71,7 +71,7 @@ def start(cls, line: str) -> bool:
         return False
 
     @classmethod
-    def read(cls, lines):
+    def read(cls, lines: SourceLines):
         start_line = lines.lineno + 1
 
         next(lines)  # skip first ``---``
@@ -124,16 +124,14 @@ class Document(BlockToken):
     @classmethod
     def read(
         cls,
-        lines,
-        start_line: int = 0,
+        lines: Union[str, ListType[str], SourceLines],
         reset_definitions: bool = True,
         skip_tokens: list = ("LinkDefinition", "Footnote"),
         front_matter: bool = False,
     ):
         """Read a document
 
-        :param lines:  Lines or string to parse
-        :param start_line: The initial line (used for nested parsing)
+        :param lines: Lines to parse
         :param reset_definitions: remove any previously stored definitions
             in the global context (see ``ParseContext.reset_definitions()``).
         :param skip_tokens: do not store these ``token.name`` in the syntax tree.
@@ -144,7 +142,8 @@ def read(
         if reset_definitions:
             get_parse_context().reset_definitions()
 
-        lines = SourceLines(lines, start_line=start_line, standardize_ends=True)
+        if not isinstance(lines, SourceLines):
+            lines = SourceLines(lines, standardize_ends=True)
 
         # TODO can we do this in a way where we are checking
         # FrontMatter in get_parse_context().block_tokens?

From f60ac93b1736d9ede03a681ff2b5d84f791ce51c Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Wed, 11 Mar 2020 23:33:15 +0000
Subject: [PATCH 4/8] move expand spans logic to `Token` class

---
 mistletoe/base_elements.py   | 6 ++++++
 mistletoe/block_tokenizer.py | 6 ++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/mistletoe/base_elements.py b/mistletoe/base_elements.py
index 9f5285a..cd8b99c 100644
--- a/mistletoe/base_elements.py
+++ b/mistletoe/base_elements.py
@@ -91,6 +91,12 @@ def _get_children(_parent):
 
             next_children = new_children
 
+    def expand_spans(self):
+        """Walk through children and process any ``SpanContainer``."""
+        for result in self.walk(include_self=True):
+            if isinstance(result.node.children, SpanContainer):
+                result.node.children = result.node.children.expand()
+
 
 class TokenEncoder(json.JSONEncoder):
     """A JSON encoder for mistletoe tokens."""
diff --git a/mistletoe/block_tokenizer.py b/mistletoe/block_tokenizer.py
index 78da3d2..f102baa 100644
--- a/mistletoe/block_tokenizer.py
+++ b/mistletoe/block_tokenizer.py
@@ -1,7 +1,7 @@
 """
 Block-level tokenizer for mistletoe.
 """
-from mistletoe.base_elements import SpanContainer, SourceLines
+from mistletoe.base_elements import SourceLines
 from mistletoe.parse_context import get_parse_context
 
 
@@ -32,9 +32,7 @@ def tokenize_main(
     tokens = tokenize_block(lines, token_types=token_types, skip_tokens=skip_tokens)
     if expand_spans:
         for token in tokens + list(get_parse_context().foot_definitions.values()):
-            for result in list(token.walk(include_self=True)):
-                if isinstance(result.node.children, SpanContainer):
-                    result.node.children = result.node.children.expand()
+            token.expand_spans()
     return tokens
 
 

From b5c36d3028d4637df5a82b556e40e69a70fccf9f Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Wed, 11 Mar 2020 23:59:44 +0000
Subject: [PATCH 5/8] Add a logger to the global context

Also log warnings if duplicate link/footnote definitions
---
 docs/conf.py                  |  1 -
 mistletoe/block_tokens.py     | 18 ++++++++++--------
 mistletoe/block_tokens_ext.py |  7 ++++++-
 mistletoe/parse_context.py    | 21 +++++++++++++++++----
 test/test_block_token.py      |  6 ++++++
 test/test_footnotes.py        |  9 +++++++++
 6 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index c94c695..f0f3b0d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -116,5 +116,4 @@ def run_apidoc(app):
 
 def setup(app):
     """Add functions to the Sphinx setup."""
-    # TODO run apidoc
     # app.connect("builder-inited", run_apidoc)
diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index 27abe57..5e93fef 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -795,12 +795,9 @@ def read(cls, lines):
                 break
             offset, match = match_info
             matches.append(match)
-        cls.append_link_definitions(matches)
-        return (
-            cls(position=(start_line, lines.lineno), definitions=matches)
-            if matches
-            else None
-        )
+        position = (start_line, lines.lineno)
+        cls.append_link_definitions(matches, position)
+        return cls(position=position, definitions=matches) if matches else None
 
     @classmethod
     def match_reference(cls, lines, string, offset):
@@ -923,15 +920,20 @@ def match_link_title(cls, string, offset):
         return None
 
     @staticmethod
-    def append_link_definitions(matches):
+    def append_link_definitions(matches, position):
         for key, dest, title in matches:
             key = normalize_label(key)
             dest = span_tokens.EscapeSequence.strip(dest.strip())
             title = span_tokens.EscapeSequence.strip(title)
             link_definitions = get_parse_context().link_definitions
             if key not in link_definitions:
-                # TODO store/emit warning if duplicate
                 link_definitions[key] = dest, title
+            else:
+                get_parse_context().logger.warning(
+                    "ignoring duplicate link definition '{}' at: {}".format(
+                        key, position
+                    )
+                )
 
     @staticmethod
     def backtrack(lines, string, offset):
diff --git a/mistletoe/block_tokens_ext.py b/mistletoe/block_tokens_ext.py
index 6a9a7f5..0ca77f0 100644
--- a/mistletoe/block_tokens_ext.py
+++ b/mistletoe/block_tokens_ext.py
@@ -67,8 +67,13 @@ def read(cls, lines: SourceLines):
             position=(start_line, lines.lineno),
         )
         if target not in get_parse_context().foot_definitions:
-            # TODO store/emit warning if duplicate
             get_parse_context().foot_definitions[target] = token
+        else:
+            get_parse_context().logger.warning(
+                "ignoring duplicate footnote definition '{}' at: {}".format(
+                    target, token.position
+                )
+            )
         return token
 
 
diff --git a/mistletoe/parse_context.py b/mistletoe/parse_context.py
index 3d480a0..4a03389 100644
--- a/mistletoe/parse_context.py
+++ b/mistletoe/parse_context.py
@@ -7,10 +7,14 @@
 from collections.abc import MutableSet
 from copy import deepcopy
 from importlib import import_module
+import logging
 from threading import local
+from typing import Optional
 
 THREAD = local()
 
+LOGGER = logging.getLogger(__name__)
+
 
 class OrderedSet(MutableSet):
     """An ordered set, optimized for `a in set` tests"""
@@ -71,16 +75,13 @@ class ParseContext:
     :param nesting_matches: a dict of matches recorded from `find_nested_tokenizer`
     """
 
-    # TODO allow the initialisaion of a global logger
-    # where we can report any errors/warnings such as duplicate
-    # link/footnote defintions
-
     def __init__(
         self,
         find_blocks=None,
         find_spans=None,
         link_definitions=None,
         foot_definitions=None,
+        logger: Optional[logging.Logger] = None,
     ):
         # tokens used for matching
         if find_blocks is not None:
@@ -109,6 +110,10 @@ def __init__(
         self.nesting_matches = {}
         self._foot_references = OrderedSet()
 
+        if logger is None:
+            logger = LOGGER
+        self._logger = logger
+
     def __repr__(self):
         return "{0}(block_cls={1},span_cls={2},link_defs={3},footnotes={4})".format(
             self.__class__.__name__,
@@ -130,6 +135,14 @@ def foot_definitions(self) -> dict:
     def foot_references(self) -> OrderedSet:
         return self._foot_references
 
+    @property
+    def logger(self) -> logging.Logger:
+        return self._logger
+
+    @logger.setter
+    def logger(self, logger: logging.Logger):
+        self._logger = logger
+
     def reset_definitions(self):
         self._link_definitions = {}
         self._foot_definitions = {}
diff --git a/test/test_block_token.py b/test/test_block_token.py
index 2e24df2..1485923 100644
--- a/test/test_block_token.py
+++ b/test/test_block_token.py
@@ -216,6 +216,12 @@ def test_doc_read_store_link_defs(name, source, data_regression):
     )
 
 
+def test_repeated_link_defs(caplog):
+    tokenize_main(["[a]: value1\n", "[a]: value2\n"])
+    assert "ignoring duplicate link definition" in caplog.text
+    assert len(get_parse_context().link_definitions) == 1
+
+
 def test_table_parse_align():
     assert block_tokens_ext.Table.parse_align(":------") is None
     assert block_tokens_ext.Table.parse_align(":-----:") == 0
diff --git a/test/test_footnotes.py b/test/test_footnotes.py
index a3e405a..53bd018 100644
--- a/test/test_footnotes.py
+++ b/test/test_footnotes.py
@@ -55,6 +55,15 @@ def test_foot_definition(name, source, data_regression):
     )
 
 
+def test_repeated_footnote(caplog):
+    get_parse_context().block_tokens.insert_before(
+        block_tokens_ext.Footnote, block_tokens.LinkDefinition
+    )
+    tokenize_main(["[^1]: value1\n", "[^1]: value2\n"])
+    assert "ignoring duplicate footnote definition" in caplog.text
+    assert len(get_parse_context().foot_definitions) == 1
+
+
 @pytest.mark.parametrize(
     "name,source", [("basic", ["[^1]\n", "\n", "[^1]: a *footnote*\n", "\n", "[^1]\n"])]
 )

From 13d09112aab62237ae8e2792c90193ccb4f622c1 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 12 Mar 2020 01:07:33 +0000
Subject: [PATCH 6/8] Allow FrontMatter content to be dict

---
 mistletoe/block_tokens.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index 5e93fef..73bffa9 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -58,13 +58,21 @@ class FrontMatter(BlockToken):
     if `front_matter=True`, and stored on `Document.front_matter` in the syntax tree.
     """
 
-    content: str = attr.ib(
+    content: Union[str, dict] = attr.ib(
         repr=False, metadata={"doc": "Source text (should be valid YAML)"}
     )
     position: Tuple[int, int] = attr.ib(
         metadata={"doc": "Line position in source text (start, end)"}
     )
 
+    def get_data(self) -> dict:
+        """Return the de-serialized front matter data (requires pyyaml)."""
+        if isinstance(self.content, str):
+            import yaml
+
+            return yaml.safe_load(self.content) or {}
+        return self.content
+
     @classmethod
     def start(cls, line: str) -> bool:
         # handled by Document

From 76e8b4a83360e3e19ff824561fbe432ec0d80d42 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 12 Mar 2020 01:09:34 +0000
Subject: [PATCH 7/8] bump nyst_parser version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 545b44e..7cb6d79 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
     extras_require={
         "code_style": ["flake8<3.8.0,>=3.7.0", "black==19.10b0", "pre-commit==1.17.0"],
         "testing": ["coverage", "pytest>=3.6,<4", "pytest-cov", "pytest-regressions"],
-        "rtd": ["sphinx>=2,<3", "myst-parser~=0.6.0a1", "pyyaml"],
+        "rtd": ["sphinx>=2,<3", "myst-parser~=0.6.0a2", "pyyaml"],
         "benchmark": [
             "commonmark~=0.9.1",
             "markdown~=3.2",

From 353c5783304cbd230c2301de74355f378a83342a Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 12 Mar 2020 01:43:05 +0000
Subject: [PATCH 8/8] Add warning for unclosed frontmatter block

---
 mistletoe/block_tokens.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
index 73bffa9..4945f5a 100644
--- a/mistletoe/block_tokens.py
+++ b/mistletoe/block_tokens.py
@@ -89,8 +89,11 @@ def read(cls, lines: SourceLines):
             line_buffer.append(next(lines))
             next_line = lines.peek()
         if next_line is not None:
-            next(lines)  # move pasr closing ``---``
-        # TODO raise/report error if closing block not found?
+            next(lines)  # move past closing ``---``
+        else:
+            get_parse_context().logger.warning(
+                "No closing --- was found for initial metadata block."
+            )
 
         return cls(content="".join(line_buffer), position=(start_line, lines.lineno))