From c37bdab7e4f6bec72a21c4679e269e5a840c43ff Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Aug 2022 15:02:13 -0500 Subject: [PATCH] Add support for footnotes (#48) * Add plugin for supporting footnotes * Use n2y.plugins.footnotes as key instead of footntoes * Raise UseNextClass when not footnote in paragraph * Raise UseNextClass when not footnote in richtext * Pre-fetch page blocks to ensure constructor called before to_pandoc * Add warning when footnote id is overloaded * Add warning when footnote is missing * Add block reference to plugin * Add plugin_data dict to Page class * Use page plugin_data instead of client plugin_data * Revert pre-fetching of blocks inside pages * Always return None because UseNextClass handles non-footnotes * Clarify footnote stripping logic * Add warning for empty footnote * Prserve prefix and suffix of footnote containing token * Add footnotes test to end-to-end tests * Add notion url to warning for missing footnote * Add notion url to warning for empty footnote * Warning re-word * Add documentation about n2y.plugins.footnotes * Fix _footnote_empty() and rename --- README.md | 5 ++ n2y/page.py | 2 + n2y/plugins/footnotes.py | 107 +++++++++++++++++++++++++++++++++++++++ tests/test_end_to_end.py | 6 +++ 4 files changed, 120 insertions(+) create mode 100644 n2y/plugins/footnotes.py diff --git a/README.md b/README.md index 5a52b191..9b011898 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,10 @@ This plugin assumes that the `mmdc` mermaid commandline tool is available, and w If there are errors with the mermaid syntax, it is treated as a normal codeblock and the warning is logged. +### Footnotes + +Adds support for Pandoc-style footnotes. Any `text` rich texts that contain footnote references in the format `[^NUMBER]` (eg: `...some claim [^2].`) will be linked to the corresponding footnote paragraph block starting with `[NUMBER]:` (eg: `[2]: This is a footnote.`). + ## Architecture N2y's architecture is divided into four main steps: @@ -180,6 +184,7 @@ Here are some features we're planning to add in the future: - Add support for dumping the notion urls using `--url-property`. - Add support for all types of rollups (including arrays of other values) - Add a property to rich text arrays, rich text, and mention instances back to the block they're contained in IF they happen to be contained in a block (some rich text arrays, etc. are from property values). This is useful when developing plugins. +- Add `n2y.plugins.footnotes` plugin ### v0.4.2 diff --git a/n2y/page.py b/n2y/page.py index adf1bcec..f32acd01 100644 --- a/n2y/page.py +++ b/n2y/page.py @@ -33,6 +33,8 @@ def __init__(self, client, notion_data): self._block = None self._children = None + self.plugin_data = {} + @property def title(self): for property_value in self.properties.values(): diff --git a/n2y/plugins/footnotes.py b/n2y/plugins/footnotes.py new file mode 100644 index 00000000..ccaa4f8f --- /dev/null +++ b/n2y/plugins/footnotes.py @@ -0,0 +1,107 @@ +import re +import logging + +from pandoc.types import Note, Str, Para + +from n2y.rich_text import TextRichText +from n2y.blocks import ParagraphBlock +from n2y.errors import UseNextClass + + +plugin_data_key = "n2y.plugins.footnotes" + +logger = logging.getLogger(__name__) + + +class ParagraphWithFootnoteBlock(ParagraphBlock): + def __init__(self, client, notion_data, page, get_children=True): + super().__init__(client, notion_data, page, get_children) + if self._is_footnote(): + self._attach_footnote_data() + else: + raise UseNextClass() + + def to_pandoc(self): + return None + + def _attach_footnote_data(self): + if plugin_data_key not in self.page.plugin_data: + self.page.plugin_data[plugin_data_key] = {} + if self._footnote() not in self.page.plugin_data[plugin_data_key]: + self.page.plugin_data[plugin_data_key][self._footnote()] = self._footnote_ast() + if self._footnote_empty(): + msg = 'Empty footnote "[%s]" (%s)' + logger.warning(msg, self._footnote(), self.notion_url) + else: + msg = 'Multiple footnotes for "[%s]", skipping latest (%s)' + logger.warning(msg, self._footnote(), self.notion_url) + + def _is_footnote(self): + return self._footnote() is not None + + def _footnote(self): + first_str = self.rich_text.to_plain_text().split(" ")[0] + footnotes = re.findall(r"\[(\d+)\]:", first_str) + if len(footnotes) != 1: + return None + return footnotes[0] + + def _footnote_ast(self): + ast = super().to_pandoc() + if isinstance(ast, list): + first_paragraph_footnote_stripped = Para(ast[0][0][2:]) + remaining_paragraphs = ast[1:] + return [first_paragraph_footnote_stripped] + remaining_paragraphs + else: + paragraph_footnote_stripped = Para(ast[0][2:]) + return paragraph_footnote_stripped + + def _footnote_empty(self): + return len(self.rich_text.to_plain_text()) == 0 + + +class TextRichTextWithFootnoteRef(TextRichText): + def __init__(self, client, notion_data, block=None): + super().__init__(client, notion_data, block) + if not self._is_footnote(): + raise UseNextClass() + + def to_pandoc(self): + pandoc_ast = [] + for token in super().to_pandoc(): + ref = self._footnote_from_token(token) + if ref is None: + pandoc_ast.append(token) + continue + if ref not in self.block.page.plugin_data[plugin_data_key]: + pandoc_ast.append(token) + msg = 'Missing footnote "[%s]". Rendering as plain text (%s)' + logger.warning(msg, ref, self.block.notion_url) + continue + self._append_footnote_to_ast(pandoc_ast, token, ref) + return pandoc_ast + + def _append_footnote_to_ast(self, pandoc_ast, token, ref): + block = self.block.page.plugin_data[plugin_data_key][ref] + footnote = Note(block) if isinstance(block, list) else Note([block]) + prefix, suffix = token[0].split(f"[^{ref}]") + pandoc_ast.append(Str(prefix)) + pandoc_ast.append(footnote) + pandoc_ast.append(Str(suffix)) + + def _is_footnote(self): + return any(self._footnote_from_token(t) is not None for t in super().to_pandoc()) + + def _footnote_from_token(self, token): + if not isinstance(token, Str): + return None + refs = re.findall(r"\[\^(\d+)\]", token[0]) + if len(refs) != 1: + return None + return refs[0] + + +notion_classes = { + "blocks": {"paragraph": ParagraphWithFootnoteBlock}, + "rich_texts": {"text": TextRichTextWithFootnoteRef}, +} diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index ce6456a8..5bf9f70a 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -263,6 +263,7 @@ def test_builtin_plugins(tmp_path): '--plugin', 'n2y.plugins.removecallouts', '--plugin', 'n2y.plugins.rawcodeblocks', '--plugin', 'n2y.plugins.mermaid', + '--plugin', 'n2y.plugins.footnotes', '--media-root', str(tmp_path), ]) assert status == 0 @@ -281,6 +282,11 @@ def test_builtin_plugins(tmp_path): assert 'Raw markdown should show up' in lines assert 'Raw html should not show up' not in lines + assert "# Header with Footnotes[^1]" in lines + assert "Paragraph with footnote.[^2]" in lines + assert "[^1]: First **footnote**." in lines + assert "[^2]: Second footnote" in lines + def test_missing_object_exception(): invalid_page_id = "11111111111111111111111111111111"