Skip to content

Commit

Permalink
Clean up
Browse files Browse the repository at this point in the history
- Clean up constants file: use the unicodedata module instead
  • Loading branch information
frnmst committed Apr 5, 2024
1 parent ab64da0 commit 3dcbc6d
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 832 deletions.
19 changes: 9 additions & 10 deletions md_toc/cmark/cmark_ctype_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#
r"""The cmark implementation file."""

import string
import unicodedata

from ..constants import parser as md_parser

# License C applies to this file except for non derivative code:
Expand All @@ -30,21 +33,17 @@
# Return True if c is a "whitespace" character as defined by the spec.
# 0.30
def _cmark_cmark_isspace(char: int) -> bool:
value = False
if chr(char) in md_parser['cmark']['pseudo_re']['UWC']:
value = True

return value
# A Unicode whitespace character is any code point in the Unicode Zs
# general category, or a tab (U+0009), line feed (U+000A), form feed
# (U+000C), or carriage return (U+000D).
return (unicodedata.category(chr(char)) == 'Zs'
or chr(char) in ['\u0009', '\u000A', '\u000C', '\u000D'])


# Return True if c is an ascii punctuation character.
# 0.29, 0.30
def _cmark_cmark_ispunct(char: int) -> bool:
value = False
if chr(char) in md_parser['cmark']['pseudo_re']['APC']:
value = True

return value
return chr(char) in string.punctuation


if __name__ == '__main__':
Expand Down
15 changes: 4 additions & 11 deletions md_toc/cmark/utf8_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,22 +168,15 @@ def _cmark_cmark_utf8proc_case_fold(
# 0.29, 0.30
def _cmark_cmark_utf8proc_is_space(char: int) -> bool:
r"""Match anything in the Zs class, plus LF, CR, TAB, FF."""
value: bool = False
if chr(char) in md_parser['cmark']['pseudo_re']['UWC']:
value = True

return value
return (unicodedata.category(chr(char)) == 'Zs'
or chr(char) in ['\u0009', '\u000A', '\u000C', '\u000D'])


# 0.29, 0.30
def _cmark_cmark_utf8proc_is_punctuation(char: int) -> bool:
r"""Match anything in the P[cdefios] classes."""
value: bool = False
if ((char < 128 and _cmark_cmark_ispunct(char))
or chr(char) in md_parser['cmark']['pseudo_re']['UPC']):
value = True

return value
return ((char < 128 and _cmark_cmark_ispunct(char))
or unicodedata.category(chr(char)).startswith('P'))


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 3dcbc6d

Please sign in to comment.