Minor fixes and optimizations

- Fix up and add pre-commit hooks - Fix some cmark code
frnmst · Nov 20, 2023 · 9edbafc · 9edbafc
1 parent 696d334
commit 9edbafc
Show file tree

Hide file tree

Showing 13 changed files with 63 additions and 93 deletions.
diff --git a/.allowed_licenses.yml b/.allowed_licenses.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -25,6 +25,10 @@ repos:
     - id: check-added-large-files
       args: ['--maxkb=4096']
     - id: destroyed-symlinks
+    - id: double-quote-string-fixer
+    - id: mixed-line-ending
+      args: ['--fix=lf']
+    - id: requirements-txt-fixer
 
 # YAPF and flake8 need to remain pinned.
 - repo: https://github.com/pre-commit/mirrors-yapf
@@ -45,12 +49,6 @@ repos:
     - id: bandit
       args: ['--skip', 'B404,B506,B410,B603,B324', '--level', 'LOW']
 
-#   -   repo: https://github.com/pre-commit/mirrors-mypy
-#       rev: 'v0.991'
-#       hooks:
-#       -      id: mypy
-#              args: ['--no-strict-optional', '--ignore-missing-imports', '--show-error-context', '--show-error-codes', '--follow-imports', 'silent']
-
 - repo: https://github.com/pycqa/isort
   rev: '5.12.0'
   hooks:
@@ -61,14 +59,8 @@ repos:
   hooks:
     - id: pyupgrade
 
-#    -   repo: https://codeberg.org/frnmst/licheck
-#        rev: '1.0.0'
-#        hooks:
-#        -   id: licheck
-#            args: ['--configuration-file', '.allowed_licenses.yml']
-
 - repo: https://codeberg.org/frnmst/md-toc
-  rev: '8.2.0'  # or a specific git tag from md-toc
+  rev: '8.2.2'  # or a specific git tag from md-toc
   hooks:
     - id: md-toc
       args: [-p, 'cmark', '-l6']  # CLI options
@@ -78,12 +70,6 @@ repos:
   hooks:
     - id: check-manifest
 
-      #- repo: https://github.com/rstcheck/rstcheck
-      #  rev: 'v6.2.0'
-      #  hooks:
-      #    - id: rstcheck
-      #      additional_dependencies: ["tomli"]
-
 - repo: local
   hooks:
     - id: unit-tests

diff --git a/Makefile b/Makefile
@@ -122,7 +122,7 @@ clean:
 	find -regex ".*\.[mM][dD]" \
 		! -name 'README.md' \
 		! -name 'CONTRIBUTING.md' \
-		| -name 'SECURITY.md' \
+		! -name 'SECURITY.md' \
 		-type f -exec rm -f {} +
 	$(VENV_CMD) \
 		&& $(MAKE) -C docs clean \

diff --git a/README.md b/README.md
@@ -298,6 +298,7 @@ Changelogs, instructions, sources and keys can be found at
 ## Support this project
 
 - [Buy Me a Coffee](https://www.buymeacoffee.com/frnmst)
+- [Liberapay](https://liberapay.com/frnmst)
 - Bitcoin: `bc1qnkflazapw3hjupawj0lm39dh9xt88s7zal5mwu`
 - Monero: `84KHWDTd9hbPyGwikk33Qp5GW7o7zRwPb8kJ6u93zs4sNMpDSnM5ZTWVnUp2cudRYNT6rNqctnMQ9NbUewbj7MzCBUcrQEY`
 - Dogecoin: `DMB5h2GhHiTNW7EcmDnqkYpKs6Da2wK3zP`

diff --git a/docs/conf.py b/docs/conf.py
@@ -194,8 +194,8 @@
 epub_theme = 'epub'
 epub_author = 'Franco Masotti'
 epub_theme_options = {
-    "relbar1": False,
-    "footer": False,
+    'relbar1': False,
+    'footer': False,
 }
 epub_css_style = [
     'css/epub.css',

diff --git a/md_toc/cli.py b/md_toc/cli.py
@@ -23,13 +23,7 @@
 import argparse
 import sys
 import textwrap
-
-# See
-# https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
-if sys.version_info >= (3, 8):
-    from importlib import metadata
-else:
-    import importlib_metadata as metadata
+from importlib import metadata
 
 from . import generic
 from .api import (
@@ -42,18 +36,16 @@
 
 PROGRAM_DESCRIPTION = 'Markdown Table Of Contents: Automatically generate a compliant table\nof contents for a markdown file to improve document readability.'
 VERSION_NAME = 'md_toc'
+VERSION_COPYRIGHT = 'Copyright (C) 2017-2023 Franco Masotti, frnmst'
+VERSION_LICENSE = 'License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.'
+RETURN_VALUES = 'Return values: 0 ok, 1 error, 2 invalid command, 128 TOC differs from the one in the file (see --diff option)'
 
 try:
-    dist = metadata.distribution('md_toc')
-    VERSION_NUMBER = dist.version
+    VERSION_NUMBER = metadata.distribution('md_toc').version
 except metadata.PackageNotFoundError:
-    VERSION_NUMBER = 'vDevel'
+    VERSION_NUMBER = 'development version (venv)'
 
-VERSION_COPYRIGHT = 'Copyright (C) 2017-2023 Franco Masotti, frnmst'
-VERSION_LICENSE = 'License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.'
-RETURN_VALUES = 'Return values: 0 ok, 1 error, 2 invalid command, 128 TOC differs from the one in the file (see --diff option)'
-ADVICE = 'Please read the documentation to understand how each parser works'
-PROGRAM_EPILOG = ADVICE + '\n\n' + RETURN_VALUES + '\n\n' + VERSION_COPYRIGHT + '\n' + VERSION_LICENSE
+PROGRAM_EPILOG = RETURN_VALUES + '\n\n' + VERSION_COPYRIGHT + '\n' + VERSION_LICENSE
 
 
 class CliToApi():

diff --git a/md_toc/cmark/buffer_c.py b/md_toc/cmark/buffer_c.py
@@ -20,7 +20,6 @@
 #
 r"""A cmark implementation file."""
 
-import copy
 import sys
 
 from ..constants import parser as md_parser
@@ -85,8 +84,8 @@ def _cmark_cmark_strbuf_grow(buf: _cmarkCmarkStrbuf, target_size: int):
     target_size &= INT32_MAX
 
     if target_size > int(INT32_MAX / 2):
-        print("[cmark] _cmark_cmark_strbuf_grow requests buffer with size > " +
-              str(INT32_MAX / 2) + ", aborting")
+        print('[cmark] _cmark_cmark_strbuf_grow requests buffer with size > ' +
+              str(INT32_MAX / 2) + ', aborting')
         sys.exit(1)
 
     # Oversize the buffer by 50% to guarantee amortized linear time
@@ -127,7 +126,7 @@ def _cmark_cmark_strbuf_set(buf: _cmarkCmarkStrbuf, data: str, length: int):
 
             # alternative to
             #     memmove(buf->ptr, data, len)
-            buf.ptr = copy.deepcopy(data[0:length - 0])
+            buf.ptr = ''.join([data[:length], buf.ptr[length:]])
         buf.size = length
 
         # No need to set termination character
@@ -138,7 +137,9 @@ def _cmark_cmark_strbuf_set(buf: _cmarkCmarkStrbuf, data: str, length: int):
 # Add a single character to a buffer.
 def _cmark_cmark_strbuf_putc(buf: _cmarkCmarkStrbuf, c: int):
     _cmark_S_strbuf_grow_by(buf, 1)
-    buf.ptr = buf.ptr[:buf.size - 1] + chr(c & 0xFF) + buf.ptr[:buf.size + 1:]
+
+    # buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
+    buf.ptr = ''.join([buf.ptr, chr(c & 0xFF)])
     buf.size += 1
 
     # No need for the terminator character.
@@ -159,24 +160,21 @@ def _cmark_cmark_strbuf_put(
     _cmark_S_strbuf_grow_by(buf, length)
 
     # Alternative to
-    #     memmove(buf.ptr + buf.size, data, len)
+    #     memmove(buf->ptr + buf->size, data, len)
     if isinstance(data, list):
         # See
         # https://stackoverflow.com/a/5661889
         dt = bytearray(data).decode('UTF-8')
     else:
         dt = data
-
     # buf.ptr =
     #   buf.ptr[0] -> buf.ptr[buf.size - 1]
     #   +
     #   dt[0] -> dt[length - 1]
     #   +
     #   buf.ptr[buf.size + 1 + length] ->  buf.ptr[-1]
-    buf.ptr = ''.join([
-        buf.ptr[:buf.size],
-        copy.deepcopy(dt[:length]), buf.ptr[buf.size + 1 + length:]
-    ])
+    buf.ptr = ''.join(
+        [buf.ptr[:buf.size], dt[:length], buf.ptr[buf.size + 1 + length:]])
     buf.size += length
     # No need for line terminator.
     #     buf.ptr[buf.size] = '\0';
@@ -208,16 +206,21 @@ def _cmark_cmark_strbuf_strchr(buf: _cmarkCmarkStrbuf, c: int,
     if pos < 0:
         pos = 0
 
+    # `p` is the memory address (so absolute) where the character `c` lies.
+    # Here we use relative indices.
     # const unsigned char *p =
     #  (unsigned char *)memchr(buf.ptr + pos, c, buf.size - pos);
-    p = buf.ptr[pos:buf.size - pos + 1].find(chr(c))
+    p: int = buf.ptr[pos:buf.size - pos + 1].find(chr(c))
 
+    # `find` returns -1 if nothing is found
     if p == -1:
         return -1
 
+    # Pointer arithmetics: return the index of the buf->ptr string where
+    # the character `c` lies.
+    # Add the offset (`pos`) to start counting from the start of the string.
     # return (bufsize_t)(p - (const unsigned char *)buf->ptr);
-    # return int(ss[p:] - buf.ptr)
-    return 0
+    return p + pos
 
 
 # 0.29, 0.30
@@ -241,7 +244,8 @@ def _cmark_cmark_strbuf_drop(buf: _cmarkCmarkStrbuf, n: int):
         if buf.size:
             # Alternative to
             #     memmove(buf->ptr, buf->ptr + n, buf->size);
-            buf.ptr = copy.deepcopy(buf.ptr[n:buf.size - n])
+            buf.ptr = ''.join(
+                [buf.ptr[:n], buf.ptr[n:buf.size + n], buf.ptr[n + buf.size:]])
 
     # No need for the terminator character.
     # buf->ptr[buf->size] = '\0';
@@ -310,7 +314,7 @@ def _cmark_cmark_strbuf_unescape(buf: _cmarkCmarkStrbuf):
     #
     #     len(bytes('㤀', 'utf-8')) == 3
     #     bptr = ['foo%20', '㤀', '']
-    #     len(bptr) == 7
+    #     sum([len(bptr[i]) for i in range(0, len(bptr))])
     #     buf.size == 10
     #
     # So instead of
@@ -320,15 +324,20 @@ def _cmark_cmark_strbuf_unescape(buf: _cmarkCmarkStrbuf):
     # we have to put
     #
     #     while r < min(buf.size, len(buf.ptr))
-    while r < min(buf.size, len(buf.ptr)):
+    #
+    # or simply
+    #
+    #     while r < len(buf.ptr)
+    #
+    while r < len(buf.ptr):
         if buf.ptr[r] == '\\' and _cmark_cmark_ispunct(ord(buf.ptr[r + 1])):
             r += 1
 
-        #     buf->ptr[w] = buf->ptr[r];
-        bptr = [buf.ptr[:w], buf.ptr[r], buf.ptr[w + 1:]]
-        buf.ptr = ''.join(bptr)
-
+        #     buf->ptr[w+] = buf->ptr[r];
+        buf.ptr = ''.join([buf.ptr[:w], buf.ptr[r], buf.ptr[w + 1:]])
         w += 1
+
+        # ++r
         r += 1
 
     _cmark_cmark_strbuf_truncate(buf, w)

diff --git a/md_toc/cmark/inlines_c.py b/md_toc/cmark/inlines_c.py
@@ -161,7 +161,7 @@ def __str__(self):
             else:
                 next = hex(id(self.next))
 
-            el = '== element ' + hex(id(self)) + " =="
+            el = '== element ' + hex(id(self)) + ' =='
             it = 'inl_text = ' + str(self.inl_text)
             de = 'delim_char = ' + self.delim_char
             le = 'length = ' + str(self.length)
@@ -360,7 +360,7 @@ def _cmark_cmark_clean_autolink(mem: _cmarkCmarkMem, url: _cmarkCmarkChunk,
     _cmark_cmark_chunk_trim(url)
 
     if is_email:
-        _cmark_cmark_strbuf_puts(buf, "mailto:")
+        _cmark_cmark_strbuf_puts(buf, 'mailto:')
 
     _cmark_houdini_unescape_html_f(buf, url.data, url.length)
     return _cmark_cmark_strbuf_detach(buf)
@@ -1165,7 +1165,7 @@ def _cmark_handle_pointy_brace(subj: _cmarkSubject,
 
     # if nothing matches, just return the opening <:
     return _cmark_make_str(subj, subj.pos - 1, subj.pos - 1,
-                           _cmark_cmark_chunk_literal("<"))
+                           _cmark_cmark_chunk_literal('<'))
 
 
 # Parse backslash-escape or just a backslash, returning an inline.
@@ -1453,7 +1453,7 @@ def _cmark_handle_close_bracket(subj: _cmarkSubject,
     if not match_0:
         # Next, look for a following [link label] that matches in refmap.
         # skip spaces
-        raw_label = _cmark_cmark_chunk_literal("")
+        raw_label = _cmark_cmark_chunk_literal('')
         found_label = _cmark_link_label(subj, raw_label)
         if not found_label:
             # If we have a shortcut reference link, back up

diff --git a/md_toc/constants.py b/md_toc/constants.py
@@ -924,7 +924,7 @@
 __cmark_declaration = '[A-Z]+' + __cmark_spacechar + '+' + '[^>\x00]*'
 # Excludes tag opening.
 __cmark_cdata = r'CDATA\[([^\]\x00]+|\][^\]\x00]|\]\][^>\x00])*'
-__cmark_htmlcomment = "(--->|(-([-]?[^\x00>-])([-]?[^\x00-])*-->))"
+__cmark_htmlcomment = '(--->|(-([-]?[^\x00>-])([-]?[^\x00-])*-->))'
 __cmark_processinginstruction = '([^?>\x00]+|[?][^>\x00]|[>])+'
 
 parser['cmark']['re'] = {

diff --git a/md_toc/tests/tests.py b/md_toc/tests/tests.py
@@ -486,7 +486,7 @@ def setUp(self):
         r"""Fake filesystem."""
         self.setUpPyfakefs()
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_tocs_equal(self):
         r"""Test if two TOCs are equal."""
 
@@ -823,19 +823,19 @@ def test_write_string_on_file_between_markers(self):
             lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER +
             '\n' + MARKER)
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_write_strings_on_files_between_markers(self):
         r"""Test that the TOC is written correctly on the files."""
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_build_toc(self):
         r"""Test that the TOC is built correctly.
 
         TODO: tests will be needed eventually because the complexity of
         this function is growing.
         """
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_build_multiple_tocs(self):
         r"""Test that the TOC is built correctly for multiple files."""
 
@@ -866,7 +866,7 @@ def test_increase_index_ordered_list(self):
         with self.assertRaises(exceptions.GithubOverflowOrderedListMarker):
             api.increase_index_ordered_list(ht, 1, 1)
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_build_list_marker_log(self):
         r"""Test that the list_marker_log data structure is built correctly.
 
@@ -1302,7 +1302,7 @@ def test_build_toc_line_without_indentation(self):
             ')',
         )
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_build_toc_line(self):
         r"""Test that the TOC line is built correctly.
 
@@ -4424,7 +4424,7 @@ def test_is_closing_code_fence(self):
             api.is_closing_code_fence(TILDE3 + T1, TILDE3, 'cmark'), )
         self.assertFalse(api.is_closing_code_fence(TILDE3 + T4 + S4, TILDE3), )
 
-    @unittest.skip("empty test")
+    @unittest.skip('empty test')
     def test_init_indentation_status_list(self):
         r"""Test building of the indentation data structure.
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -18,6 +18,11 @@
 # along with md-toc.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+
+# Demo.
+asciinema>=2,<3
+build>=1.0,<1.1
+pre-commit>=3,<4
 # Documentation.
 sphinx-book-theme>=1.0,<1.1
 sphinx-copybutton>=0.5,<0.6
@@ -27,8 +32,3 @@ sphinx-copybutton>=0.5,<0.6
 # Tools.
 tox>=4,<5
 twine>=4,<5
-build>=1.0,<1.1
-pre-commit>=3,<4
-
-# Demo.
-asciinema>=2,<3