Pre-commit stuff (#627)

Tried out a bunch of ideas while I was on the plane, this is mostly a result of that. ### Summary - Add a script `markdown_conventions.py` that enforces some markdown conventions. This script - Prohibits completely empty sections (I.e. when a header is immediately succeeded by a header of the same or less deep level). Often this is a mistake, but if a section is empty because it is left for future work, I suggest noting this with a sentence like "This remains to be shown." instead of leaving the section empty. - Prohibits top-level headers after the first line - Fix current violators of these conventions. - Try out an `indentation_conventions.py` script that currently only detects unevenly indented code. I.e. when code is indented with an odd number of spaces. There are about 1000 violations at the moment so the script does not run by default, but it is something we can consider enabling in the future. - Rename some pre-commit scripts - Define infix binary operator `_-ℤ_` for the difference of integers, and use it where possible. --------- Co-authored-by: VictorBlanchi <76261859+VictorBlanchi@users.noreply.github.com> Co-authored-by: Egbert Rijke <e.m.rijke@gmail.com> Co-authored-by: Jonathan Cubides <jonathan.cubides@uib.no>
UniMath · May 22, 2023 · 7c322df · 7c322df
1 parent 92c52ba
commit 7c322df
Show file tree

Hide file tree

Showing 99 changed files with 979 additions and 621 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,13 +17,22 @@ repos:
 
   - repo: local
     hooks:
-      - id: remove-extra-blank-lines
-        name: Remove extra blank lines in Agda files
-        entry: scripts/remove_extra_blank_lines.py
+      - id: markdown-conventions
+        name: Markdown conventions
+        entry: scripts/markdown_conventions.py
+        language: python
+        always_run: true
+        files: ^src\/.+\.md$
+        types_or: [markdown, text]
+
+      - id: blank-line-conventions
+        name: Blank line conventions in literate Agda Markdown files
+        entry: scripts/blank_line_conventions.py
         language: python
         always_run: true
         files: ^src\/.+\.lagda\.md$
         types_or: [markdown, text]
+        #depends_on: markdown-conventions #TODO: find the correct way to express this
 
       - id: fix-imports
         name: Format Agda imports
@@ -33,25 +42,34 @@ repos:
         files: ^src\/.+\.lagda\.md$
         types_or: [markdown, text]
 
-      - id: spaces-convention-simple
+      - id: spaces-conventions-simple
         name: Simple Agda space conventions
-        entry: scripts/spaces_convention_simple.py
+        entry: scripts/spaces_conventions_simple.py
         language: python
         always_run: true
         files: ^src\/.+\.lagda\.md$
         types_or: [markdown, text]
 
-      - id: easily-fixable-long-lines
-        name: Easily fixable long lines of Agda code
-        entry: scripts/easily_fixable_long_lines.py
+      # - id: indentation-conventions
+      #   name: Agda indentation conventions
+      #   entry: scripts/indentation_conventions.py
+      #   language: python
+      #   always_run: true
+      #   files: ^src\/.+\.lagda\.md$
+      #   types_or: [markdown, text]
+      #   require_serial: true
+
+      - id: simply-wrappable-long-lines
+        name: Fix simply wrappable long lines of Agda code
+        entry: scripts/simply_wrappable_long_lines.py
         language: python
         always_run: true
         files: ^src\/.+\.lagda\.md$
         types_or: [markdown, text]
 
-      - id: max-line-length
-        name: Check the maximum line length is respected
-        entry: scripts/max_line_length.py
+      - id: max-line-length-conventions
+        name: Check the maximum line length conventions
+        entry: scripts/max_line_length_conventions.py
         language: python
         files: ^src\/[^/]+\/.+\.lagda\.md$ # Don't match top level modules
         types_or: [markdown, text]

diff --git a/scripts/blank_line_conventions.py b/scripts/blank_line_conventions.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+# Run this script:
+# $ python3 scripts/blank_line_conventions.py fileName.lagda.md
+
+import sys
+import utils
+import re
+
+open_tag_pattern = re.compile(r'^```\S+\n', flags=re.MULTILINE)
+close_tag_pattern = re.compile(r'\n```$', flags=re.MULTILINE)
+
+
+if __name__ == '__main__':
+
+    status = 0
+
+    for fpath in utils.get_agda_files(sys.argv[1:]):
+        with open(fpath, 'r') as f:
+            inputText = f.read()
+
+        output = re.sub(r'[ \t]+$', '', inputText, flags=re.MULTILINE)
+        output = re.sub(r'\n(\s*\n){2,}', '\n\n', output)
+
+        # # Add blank line after `module ... where`
+        # output = re.sub(r'(^([ \t]*)module[\s({][^\n]*\n(\2\s[^\n]*\n)*\2\s([^\n]*[\s)}])?where)\s*\n(?=\s*[^\n\s])', r'\1\n\n',
+        #                 output, flags=re.MULTILINE)
+        # # TODO: must match the first `where` after `module`
+
+        # Remove blank lines after a code block starts, and blank lines before a code block ends
+        # This only work properly in the absence of unspecified code blocks
+        output = re.sub(
+            r'\n{2,}```$', r'\n```', output, flags=re.MULTILINE)
+        output = re.sub(
+            r'^```(\S+)\n{2,}', r'```\1\n', output, flags=re.MULTILINE)
+
+        # Empty blocks should have an empty line
+        output = re.sub(r'^```(\S+)\n```$',
+                        r'```\1\n\n```', output, flags=re.MULTILINE)
+
+        if output != inputText:
+            with open(fpath, 'w') as f:
+                f.write(output)
+
+    sys.exit(status)
diff --git a/scripts/indentation_conventions.py b/scripts/indentation_conventions.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# Run this script:
+# python3 scripts/indentation_conventions.py fileName.lagda.md
+# Some simply enforcable space conventions
+
+import sys
+import utils
+import re
+import collections
+
+AGDA_INDENT = '  '
+
+even_indentation_pattern = re.compile(fr'({AGDA_INDENT})*(\S|$)')
+
+
+def is_even_indentation(line):
+    return even_indentation_pattern.match(line)
+
+
+if __name__ == '__main__':
+
+    STATUS_UNEVEN_INDENTATION = 1
+
+    status = 0
+
+    offender_files: collections.Counter = collections.Counter()
+
+    for fpath in utils.get_agda_files(sys.argv[1:]):
+
+        with open(fpath, 'r') as f:
+            contents = f.read()
+
+        lines = contents.split('\n')
+        is_in_agda_block: bool = False
+        block_comment_level: int = 0
+
+        for i, line in enumerate(lines):
+            is_tag, is_opening = utils.is_agda_opening_or_closing_tag(line)
+            if is_tag:
+                is_in_agda_block = is_opening
+            elif is_in_agda_block:
+                line, comment, block_comment_delta_pos, block_comment_delta_neg = utils.split_agda_line_comment_and_get_block_comment_delta(
+                    line)
+
+                block_comment_level += block_comment_delta_pos
+
+                if block_comment_level == 0:
+                    # line = space_after_opening_parenthesis_on_new_line(line)
+
+                    # Check even indentation
+                    if not is_even_indentation(line):
+                        if (status == 0):
+                            print('Error! Uneven indentation found')
+
+                        print(f'{fpath}:line {i}')
+
+                        offender_files[fpath] += 1
+                        status |= STATUS_UNEVEN_INDENTATION
+
+                block_comment_level -= block_comment_delta_neg
+
+                lines[i] = line + comment
+
+        new_contents = '\n'.join(lines)
+        if new_contents != contents:
+            with open(fpath, 'w') as f:
+                f.write(new_contents)
+
+    if status & STATUS_UNEVEN_INDENTATION != 0:  # There were offending lines
+
+        print('\nTop offending files:')
+        print(*map(lambda kv: f'  {kv[0]}: {kv[1]} lines',
+                   sorted(offender_files.items(), key=lambda kv: kv[1])), sep='\n')
+        print(
+            f'\nTotal number of lines in library unevenly indented: {sum(offender_files.values())}.')
+
+    sys.exit(status)
diff --git a/scripts/markdown_conventions.py b/scripts/markdown_conventions.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# Run this script:
+# $ python3 scripts/markdown_conventions.py fileName.md
+
+import sys
+import utils
+import re
+
+open_tag_pattern = re.compile(r'^```\S+\n', flags=re.MULTILINE)
+close_tag_pattern = re.compile(r'\n```$', flags=re.MULTILINE)
+
+
+def has_well_formed_blocks(mdcode, pos=0):
+    """
+    Checks if in a markdown file, every opening block tag is paired with a closing
+    block tag before a new one is opened.
+
+    Note: This also disallows unspecified code blocks.
+    """
+
+    if pos >= len(mdcode):
+        return True
+
+    open_match = open_tag_pattern.search(mdcode, pos)
+    close_match = close_tag_pattern.search(mdcode, pos)
+
+    if (open_match is None) != (close_match is None):
+        # Open or closing tag but not both
+        return False
+    elif open_match is None:
+        # No more blocks
+        return True
+
+    if close_match.start() < open_match.start():
+        # A block is closed before it is opened
+        return False
+
+    # Check if multiple open tags
+    second_open_match = open_tag_pattern.search(mdcode, open_match.end())
+    if second_open_match is None:
+        # Check for extra close tag
+        return open_tag_pattern.search(mdcode, close_match.end()) is None
+    elif second_open_match.start() < close_match.start():
+        return False
+
+    # Recurse
+    return has_well_formed_blocks(mdcode, close_match.end())
+
+
+top_level_header_after_first_line = re.compile(r'\n#\s', flags=re.MULTILINE)
+
+empty_section_nonincreasing_level = re.compile(
+    r'^((#{2}\s([^\n]*)\n(\s*\n)*#{1,2})|(#{3}\s([^\n]*)\n(\s*\n)*#{1,3})|(#{4}\s([^\n]*)\n(\s*\n)*#{1,4})|(#{5}\s([^\n]*)\n(\s*\n)*#{1,5}))(?!#)', flags=re.MULTILINE)
+
+if __name__ == '__main__':
+
+    STATUS_UNSPECIFIED_OR_ILL_FORMED_BLOCK = 1
+    STATUS_TOP_LEVEL_HEADER_AFTER_FIRST_LINE = 2
+    STATUS_EMPTY_SECTION_NONINCREASING_LEVEL = 4
+
+    status = 0
+
+    for fpath in utils.get_agda_files(sys.argv[1:]):
+        with open(fpath, 'r') as f:
+            inputText = f.read()
+
+        output = inputText
+
+        if not has_well_formed_blocks(output):
+            print(
+                f"Error! File '{fpath}' has an unspecified or ill-formed code block. Please note that unspecified code blocks are disallowed in this project. Otherwise, please check if there is an opening or closing code block tag (```) without a corresponding closing/opening tag.", file=sys.stderr)
+
+            status |= STATUS_UNSPECIFIED_OR_ILL_FORMED_BLOCK
+
+        if top_level_header_after_first_line.search(output):
+            print(
+                f"Error! File '{fpath}' has a top level header after the first line. Please increase it.")
+
+        # TODO: print line numbers
+        if empty_section_nonincreasing_level.search(output):
+            print(
+                f"Error! File '{fpath}' has an empty section. This may be caused by having a header with the wrong header level. If this was not by mistake, consider including a sentence explaining why the section is empty. For instance, depending on the context, you may write 'This remains to be shown.'")
+
+            status |= STATUS_EMPTY_SECTION_NONINCREASING_LEVEL
+
+        if output != inputText:
+            with open(fpath, 'w') as f:
+                f.write(output)
+
+    sys.exit(status)
diff --git a/scripts/max_line_length.py → scripts/max_line_length_conventions.py b/scripts/max_line_length.py → scripts/max_line_length_conventions.py
diff --git a/scripts/remove_extra_blank_lines.py b/scripts/remove_extra_blank_lines.py
diff --git a/scripts/easily_fixable_long_lines.py → scripts/simply_wrappable_long_lines.py b/scripts/easily_fixable_long_lines.py → scripts/simply_wrappable_long_lines.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 # Run this script:
-# python3 scripts/easily_fixable_long_lines.py fileName.lagda.md
+# python3 scripts/simply_wrappable_long_lines.py fileName.lagda.md
 # Fix some easily fixable long lines
 
 import sys
 import utils
 import re
 import os
-import max_line_length
+import max_line_length_conventions
 
 INDENT = '  '
 
@@ -102,7 +102,7 @@ def check_wrap_line_definition_parameters(line):
 
                 if block_comment_level == 0 and \
                         len(line) > MAX_LINE_LENGTH and\
-                        not max_line_length.can_forgive_line(line):
+                        not max_line_length_conventions.can_forgive_line(line):
 
                     line = check_wrap_line_type_signature(line)
                     line = check_wrap_line_definition(line)