Merge pull request #2 from michalpokusa/removing-8-x-x-re-module-bug-…

…workarounds Removing 8.x.x `re` module bug workarounds, `trim_blocks` and `lstrip_blocks` parameters
adafruit · Jan 8, 2024 · 95f75b3 · 95f75b3
2 parents 0014d2a + ec4cf3a
commit 95f75b3
Show file tree

Hide file tree

Showing 10 changed files with 146 additions and 77 deletions.
diff --git a/README.rst b/README.rst
@@ -30,7 +30,7 @@ but it does not implement all of their features and takes a different approach t
 
 Main diffrences from Jinja2 and Django Templates:
 
-- filter are not supported, and there is no plan to support them
+- filters are not supported, and there is no plan to support them
 - all variables passed inside context must be accessed using the ``context`` object
 - you can call methods inside templates just like in Python
 - no support for nested blocks, although inheritance is supported

diff --git a/adafruit_templateengine.py b/adafruit_templateengine.py
@@ -29,6 +29,18 @@
 import os
 import re
 
+try:
+    from sys import implementation
+
+    if implementation.name == "circuitpython" and implementation.version < (9, 0, 0):
+        print(
+            "Warning: adafruit_templateengine requires CircuitPython 9.0.0, as previous versions"
+            " will have limited functionality when using block comments and non-ASCII characters."
+        )
+finally:
+    # Unimport sys to prevent accidental use
+    del implementation
+
 
 class Language:  # pylint: disable=too-few-public-methods
     """
@@ -59,12 +71,12 @@ def safe_html(value: Any) -> str:
         # 1e&minus;10
     """
 
-    def replace_amp_or_semi(match: re.Match):
+    def _replace_amp_or_semi(match: re.Match):
         return "&amp;" if match.group(0) == "&" else "&semi;"
 
     return (
         # Replace initial & and ; together
-        re.sub(r"&|;", replace_amp_or_semi, str(value))
+        re.sub(r"&|;", _replace_amp_or_semi, str(value))
         # Replace other characters
         .replace('"', "&quot;")
         .replace("_", "&lowbar;")
@@ -152,47 +164,48 @@ def safe_markdown(value: Any) -> str:
     )
 
 
-_PRECOMPILED_EXTENDS_PATTERN = re.compile(r"{% extends '.+?' %}|{% extends \".+?\" %}")
-_PRECOMPILED_BLOCK_PATTERN = re.compile(r"{% block \w+? %}")
-_PRECOMPILED_INCLUDE_PATTERN = re.compile(r"{% include '.+?' %}|{% include \".+?\" %}")
-_PRECOMPILED_HASH_COMMENT_PATTERN = re.compile(r"{# .+? #}")
-_PRECOMPILED_BLOCK_COMMENT_PATTERN = re.compile(
+_EXTENDS_PATTERN = re.compile(r"{% extends '.+?' %}|{% extends \".+?\" %}")
+_BLOCK_PATTERN = re.compile(r"{% block \w+? %}")
+_INCLUDE_PATTERN = re.compile(r"{% include '.+?' %}|{% include \".+?\" %}")
+_HASH_COMMENT_PATTERN = re.compile(r"{# .+? #}")
+_BLOCK_COMMENT_PATTERN = re.compile(
     r"{% comment ('.*?' |\".*?\" )?%}[\s\S]*?{% endcomment %}"
 )
-_PRECOMPILED_TOKEN_PATTERN = re.compile(r"{{ .+? }}|{% .+? %}")
+_TOKEN_PATTERN = re.compile(r"{{ .+? }}|{% .+? %}")
+_LSTRIP_BLOCK_PATTERN = re.compile(r"\n( )+$")
 
 
-def _find_next_extends(template: str):
-    return _PRECOMPILED_EXTENDS_PATTERN.search(template)
+def _find_extends(template: str):
+    return _EXTENDS_PATTERN.search(template)
 
 
-def _find_next_block(template: str):
-    return _PRECOMPILED_BLOCK_PATTERN.search(template)
+def _find_block(template: str):
+    return _BLOCK_PATTERN.search(template)
 
 
-def _find_next_include(template: str):
-    return _PRECOMPILED_INCLUDE_PATTERN.search(template)
+def _find_include(template: str):
+    return _INCLUDE_PATTERN.search(template)
 
 
 def _find_named_endblock(template: str, name: str):
     return re.search(r"{% endblock " + name + r" %}", template)
 
 
-def _exists_and_is_file(path: str):
+def _exists_and_is_file(path: str) -> bool:
     try:
         return (os.stat(path)[0] & 0b_11110000_00000000) == 0b_10000000_00000000
     except OSError:
         return False
 
 
 def _resolve_includes(template: str):
-    while (include_match := _find_next_include(template)) is not None:
+    while (include_match := _find_include(template)) is not None:
         template_path = include_match.group(0)[12:-4]
 
         # TODO: Restrict include to specific directory
 
         if not _exists_and_is_file(template_path):
-            raise FileNotFoundError(f"Include template not found: {template_path}")
+            raise OSError(f"Include template not found: {template_path}")
 
         # Replace the include with the template content
         with open(template_path, "rt", encoding="utf-8") as template_file:
@@ -205,15 +218,15 @@ def _resolve_includes(template: str):
 
 
 def _check_for_unsupported_nested_blocks(template: str):
-    if _find_next_block(template) is not None:
+    if _find_block(template) is not None:
         raise ValueError("Nested blocks are not supported")
 
 
 def _resolve_includes_blocks_and_extends(template: str):
     block_replacements: "dict[str, str]" = {}
 
     # Processing nested child templates
-    while (extends_match := _find_next_extends(template)) is not None:
+    while (extends_match := _find_extends(template)) is not None:
         extended_template_name = extends_match.group(0)[12:-4]
 
         # Load extended template
@@ -229,20 +242,15 @@ def _resolve_includes_blocks_and_extends(template: str):
         template = _resolve_includes(template)
 
         # Save block replacements
-        while (block_match := _find_next_block(template)) is not None:
+        while (block_match := _find_block(template)) is not None:
             block_name = block_match.group(0)[9:-3]
 
             endblock_match = _find_named_endblock(template, block_name)
 
             if endblock_match is None:
                 raise ValueError(r"Missing {% endblock %} for block: " + block_name)
 
-            # Workaround for bug in re module https://github.com/adafruit/circuitpython/issues/6860
-            block_content = template.encode("utf-8")[
-                block_match.end() : endblock_match.start()
-            ].decode("utf-8")
-            # TODO: Uncomment when bug is fixed
-            # block_content = template[block_match.end() : endblock_match.start()]
+            block_content = template[block_match.end() : endblock_match.start()]
 
             _check_for_unsupported_nested_blocks(block_content)
 
@@ -267,7 +275,7 @@ def _resolve_includes_blocks_and_extends(template: str):
 
 def _replace_blocks_with_replacements(template: str, replacements: "dict[str, str]"):
     # Replace blocks in top-level template
-    while (block_match := _find_next_block(template)) is not None:
+    while (block_match := _find_block(template)) is not None:
         block_name = block_match.group(0)[9:-3]
 
         # Self-closing block tag without default content
@@ -309,34 +317,61 @@ def _replace_blocks_with_replacements(template: str, replacements: "dict[str, st
     return template
 
 
-def _find_next_hash_comment(template: str):
-    return _PRECOMPILED_HASH_COMMENT_PATTERN.search(template)
+def _find_hash_comment(template: str):
+    return _HASH_COMMENT_PATTERN.search(template)
+
+
+def _find_block_comment(template: str):
+    return _BLOCK_COMMENT_PATTERN.search(template)
+
+
+def _remove_comments(
+    template: str,
+    *,
+    trim_blocks: bool = True,
+    lstrip_blocks: bool = True,
+):
+    def _remove_matched_comment(template: str, comment_match: re.Match):
+        text_before_comment = template[: comment_match.start()]
+        text_after_comment = template[comment_match.end() :]
 
+        if text_before_comment:
+            if lstrip_blocks:
+                if _token_is_on_own_line(text_before_comment):
+                    text_before_comment = text_before_comment.rstrip(" ")
 
-def _find_next_block_comment(template: str):
-    return _PRECOMPILED_BLOCK_COMMENT_PATTERN.search(template)
+        if text_after_comment:
+            if trim_blocks:
+                if text_after_comment.startswith("\n"):
+                    text_after_comment = text_after_comment[1:]
 
+        return text_before_comment + text_after_comment
 
-def _remove_comments(template: str):
     # Remove hash comments: {# ... #}
-    while (comment_match := _find_next_hash_comment(template)) is not None:
-        template = template[: comment_match.start()] + template[comment_match.end() :]
+    while (comment_match := _find_hash_comment(template)) is not None:
+        template = _remove_matched_comment(template, comment_match)
 
     # Remove block comments: {% comment %} ... {% endcomment %}
-    while (comment_match := _find_next_block_comment(template)) is not None:
-        template = template[: comment_match.start()] + template[comment_match.end() :]
+    while (comment_match := _find_block_comment(template)) is not None:
+        template = _remove_matched_comment(template, comment_match)
 
     return template
 
 
-def _find_next_token(template: str):
-    return _PRECOMPILED_TOKEN_PATTERN.search(template)
+def _find_token(template: str):
+    return _TOKEN_PATTERN.search(template)
+
+
+def _token_is_on_own_line(text_before_token: str) -> bool:
+    return _LSTRIP_BLOCK_PATTERN.search(text_before_token) is not None
 
 
 def _create_template_function(  # pylint: disable=,too-many-locals,too-many-branches,too-many-statements
     template: str,
     language: str = Language.HTML,
     *,
+    trim_blocks: bool = True,
+    lstrip_blocks: bool = True,
     function_name: str = "_",
     context_name: str = "context",
     dry_run: bool = False,
@@ -351,22 +386,34 @@ def _create_template_function(  # pylint: disable=,too-many-locals,too-many-bran
     function_string = f"def {function_name}({context_name}):\n"
     indent, indentation_level = "    ", 1
 
-    # Keep track of the tempalte state
+    # Keep track of the template state
     forloop_iterables: "list[str]" = []
     autoescape_modes: "list[bool]" = ["default_on"]
+    last_token_was_block = False
 
     # Resolve tokens
-    while (token_match := _find_next_token(template)) is not None:
+    while (token_match := _find_token(template)) is not None:
         token = token_match.group(0)
 
         # Add the text before the token
         if text_before_token := template[: token_match.start()]:
-            function_string += (
-                indent * indentation_level + f"yield {repr(text_before_token)}\n"
-            )
+            if lstrip_blocks and token.startswith(r"{% "):
+                if _token_is_on_own_line(text_before_token):
+                    text_before_token = text_before_token.rstrip(" ")
+
+            if trim_blocks:
+                if last_token_was_block and text_before_token.startswith("\n"):
+                    text_before_token = text_before_token[1:]
+
+            if text_before_token:
+                function_string += (
+                    indent * indentation_level + f"yield {repr(text_before_token)}\n"
+                )
 
         # Token is an expression
         if token.startswith(r"{{ "):
+            last_token_was_block = False
+
             autoescape = autoescape_modes[-1] in ("on", "default_on")
 
             # Expression should be escaped with language-specific function
@@ -383,6 +430,8 @@ def _create_template_function(  # pylint: disable=,too-many-locals,too-many-bran
 
         # Token is a statement
         elif token.startswith(r"{% "):
+            last_token_was_block = True
+
             # Token is a some sort of if statement
             if token.startswith(r"{% if "):
                 function_string += indent * indentation_level + f"{token[3:-3]}:\n"
@@ -449,9 +498,16 @@ def _create_template_function(  # pylint: disable=,too-many-locals,too-many-bran
         # Continue with the rest of the template
         template = template[token_match.end() :]
 
-    # Add the text after the last token (if any) and return
-    if template:
-        function_string += indent * indentation_level + f"yield {repr(template)}\n"
+    # Add the text after the last token (if any)
+    text_after_last_token = template
+
+    if text_after_last_token:
+        if trim_blocks and text_after_last_token.startswith("\n"):
+            text_after_last_token = text_after_last_token[1:]
+
+        function_string += (
+            indent * indentation_level + f"yield {repr(text_after_last_token)}\n"
+        )
 
     # If dry run, return the template function string
     if dry_run:

diff --git a/docs/examples.rst b/docs/examples.rst
@@ -27,7 +27,7 @@ It is up to the user to decide which method is more suitable for a given use cas
 **Generally, the first method will be sufficient for most use cases.**
 
 It is also worth noting that compiling all used templates using the second method might not be possible,
-depending one the project and board used, due to the limited amount of RAM.
+depending on the project and board used, due to the limited amount of RAM.
 
 .. literalinclude:: ../examples/templateengine_reusing.py
     :caption: examples/templateengine_reusing.py
@@ -51,7 +51,7 @@ Every expression that would be valid in an f-string is also valid in the templat
 This includes, but is not limited to:
 
 - mathemathical operations e.g. ``{{ 5 + 2 ** 3 }}`` will be replaced with ``"13"``
-- string operations e.g. ``{{ 'hello'.title() }}`` will be replaced with ``"Hello"``
+- string operations e.g. ``{{ 'hello'.upper() }}`` will be replaced with ``"HELLO"``
 - logical operations e.g. ``{{ 1 == 2 }}`` will be replaced with ``"False"``
 - ternary operator e.g. ``{{ 'ON' if True else 'OFF' }}`` will be replaced with ``"ON"``
 - built-in functions e.g. ``{{ len('Adafruit Industries') }}`` will be replaced with ``"19"``
@@ -140,13 +140,13 @@ and then include it in multiple pages.
 
 .. literalinclude:: ../examples/footer.html
     :caption: examples/footer.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :linenos:
 
 .. literalinclude:: ../examples/base_without_footer.html
     :caption: examples/base_without_footer.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :emphasize-lines: 12
     :linenos:
@@ -173,13 +173,13 @@ This allows sharing whole layout, not only single parts.
 
 .. literalinclude:: ../examples/child.html
     :caption: examples/child.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :linenos:
 
 .. literalinclude:: ../examples/parent_layout.html
     :caption: examples/parent_layout.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :linenos:
 
@@ -196,7 +196,7 @@ Executing Python code in templates
 ----------------------------------
 
 It is also possible to execute Python code in templates.
-This an be used for e.g. defining variables, modifying context, or breaking from loops.
+This can be used for e.g. defining variables, modifying context, or breaking from loops.
 
 
 .. literalinclude:: ../examples/templateengine_exec.py
@@ -221,7 +221,7 @@ Supported comment syntaxes:
 
 .. literalinclude:: ../examples/comments.html
     :caption: examples/comments.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :linenos:
 
@@ -247,12 +247,13 @@ and in all ``Template`` constructors.
 
 .. literalinclude:: ../examples/autoescape.html
     :caption: examples/autoescape.html
-    :lines: 5-
+    :lines: 7-
     :language: html
     :linenos:
 
 .. literalinclude:: ../examples/autoescape.md
     :caption: examples/autoescape.md
+    :lines: 5-
     :language: markdown
     :linenos:
 

diff --git a/examples/autoescape.html b/examples/autoescape.html
@@ -1,6 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 Michał Pokusa
-#
-# SPDX-License-Identifier: Unlicense
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2023 Michał Pokusa
+
+SPDX-License-Identifier: Unlicense
+-->
 
 <!DOCTYPE html>
 <html>