executablebooks · chrisjsewell · Aug 17, 2020 · Aug 14, 2020 · Aug 14, 2020 · Aug 15, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ exclude: >
 
 repos:
 
-  - repo: git://github.com/pre-commit/pre-commit-hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v2.2.3
     hooks:
     - id: check-json

diff --git a/markdown_it/extensions/container/index.py b/markdown_it/extensions/container/index.py
@@ -36,7 +36,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
 
         # Check out the first character quickly,
         # this should filter out most of non-containers
-        if marker_char != charCodeAt(state.src, start):
+        if marker_char != state.srcCharCode[start]:
             return False
 
         # Check out the rest of the marker string
@@ -79,7 +79,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
                 #  test
                 break
 
-            if marker_char != charCodeAt(state.src, start):
+            if marker_char != state.srcCharCode[start]:
                 continue
 
             if state.sCount[nextLine] - state.blkIndent >= 4:

diff --git a/markdown_it/extensions/deflist/index.py b/markdown_it/extensions/deflist/index.py
@@ -1,6 +1,5 @@
 """Process definition lists."""
 from markdown_it import MarkdownIt
-from markdown_it.common.utils import charCodeAt
 from markdown_it.rules_block import StateBlock
 
 
@@ -16,7 +15,7 @@ def skipMarker(state: StateBlock, line: int):
             return -1
 
         # Check bullet
-        marker = charCodeAt(state.src, start)
+        marker = state.srcCharCode[start]
         start += 1
         if marker != 0x7E and marker != 0x3A:  # ~ :
             return -1
@@ -119,7 +118,7 @@ def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool):
                 )
 
                 while pos < maximum:
-                    ch = charCodeAt(state.src, pos)
+                    ch = state.srcCharCode[pos]
 
                     if isSpace(ch):
                         if ch == 0x09:

diff --git a/markdown_it/extensions/footnote/index.py b/markdown_it/extensions/footnote/index.py
@@ -6,7 +6,7 @@
 from markdown_it.rules_inline import StateInline
 from markdown_it.rules_block import StateBlock
 from markdown_it.helpers import parseLinkLabel
-from markdown_it.common.utils import isSpace, charCodeAt
+from markdown_it.common.utils import isSpace
 
 
 def footnote_plugin(md: MarkdownIt):
@@ -43,23 +43,23 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if start + 4 > maximum:
         return False
 
-    if charCodeAt(state.src, start) != 0x5B:  # /* [ */
+    if state.srcCharCode[start] != 0x5B:  # /* [ */
         return False
-    if charCodeAt(state.src, start + 1) != 0x5E:  # /* ^ */
+    if state.srcCharCode[start + 1] != 0x5E:  # /* ^ */
         return False
 
     pos = start + 2
     while pos < maximum:
-        if charCodeAt(state.src, pos) == 0x20:
+        if state.srcCharCode[pos] == 0x20:
             return False
-        if charCodeAt(state.src, pos) == 0x5D:  # /* ] */
+        if state.srcCharCode[pos] == 0x5D:  # /* ] */
             break
         pos += 1
 
     if pos == start + 2:  # no empty footnote labels
         return False
     pos += 1
-    if pos + 1 >= maximum or charCodeAt(state.src, pos) != 0x3A:  # /* : */
+    if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A:  # /* : */
         return False
     if silent:
         return True
@@ -87,7 +87,7 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
     )
 
     while pos < maximum:
-        ch = charCodeAt(state.src, pos)
+        ch = state.srcCharCode[pos]
 
         if isSpace(ch):
             if ch == 0x09:
@@ -136,9 +136,9 @@ def footnote_inline(state: StateInline, silent: bool):
 
     if start + 2 >= maximum:
         return False
-    if charCodeAt(state.src, start) != 0x5E:  # /* ^ */
+    if state.srcCharCode[start] != 0x5E:  # /* ^ */
         return False
-    if charCodeAt(state.src, start + 1) != 0x5B:  # /* [ */
+    if state.srcCharCode[start + 1] != 0x5B:  # /* [ */
         return False
 
     labelStart = start + 2
@@ -182,18 +182,18 @@ def footnote_ref(state: StateInline, silent: bool):
 
     if "footnotes" not in state.env or "refs" not in state.env["footnotes"]:
         return False
-    if charCodeAt(state.src, start) != 0x5B:  # /* [ */
+    if state.srcCharCode[start] != 0x5B:  # /* [ */
         return False
-    if charCodeAt(state.src, start + 1) != 0x5E:  # /* ^ */
+    if state.srcCharCode[start + 1] != 0x5E:  # /* ^ */
         return False
 
     pos = start + 2
     while pos < maximum:
-        if charCodeAt(state.src, pos) == 0x20:
+        if state.srcCharCode[pos] == 0x20:
             return False
-        if charCodeAt(state.src, pos) == 0x0A:
+        if state.srcCharCode[pos] == 0x0A:
             return False
-        if charCodeAt(state.src, pos) == 0x5D:  # /* ] */
+        if state.srcCharCode[pos] == 0x5D:  # /* ] */
             break
         pos += 1
 

diff --git a/markdown_it/extensions/front_matter/index.py b/markdown_it/extensions/front_matter/index.py
@@ -29,7 +29,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
 
         # Check out the first character of the first line quickly,
         # this should filter out non-front matter
-        if startLine != 0 or marker_char != charCodeAt(state.src, 0):
+        if startLine != 0 or marker_char != state.srcCharCode[0]:
             return False
 
         # Check out the rest of the marker string
@@ -73,7 +73,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
                 #  test
                 break
 
-            if marker_char != charCodeAt(state.src, start):
+            if marker_char != state.srcCharCode[start]:
                 continue
 
             if state.sCount[nextLine] - state.blkIndent >= 4:

diff --git a/markdown_it/extensions/myst_blocks/index.py b/markdown_it/extensions/myst_blocks/index.py
@@ -2,7 +2,7 @@
 
 from markdown_it import MarkdownIt
 from markdown_it.rules_block import StateBlock
-from markdown_it.common.utils import charCodeAt, isSpace, escapeHtml
+from markdown_it.common.utils import isSpace, escapeHtml
 
 
 TARGET_PATTERN = re.compile(r"^\(([a-zA-Z0-9\|\@\<\>\*\.\/\_\-\+\:]{1,100})\)\=\s*$")
@@ -40,7 +40,7 @@ def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if state.sCount[startLine] - state.blkIndent >= 4:
         return False
 
-    marker = charCodeAt(state.src, pos)
+    marker = state.srcCharCode[pos]
     pos += 1
 
     # Check block marker /* % */
@@ -70,7 +70,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if state.sCount[startLine] - state.blkIndent >= 4:
         return False
 
-    marker = charCodeAt(state.src, pos)
+    marker = state.srcCharCode[pos]
     pos += 1
 
     # Check block marker /* + */
@@ -81,7 +81,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
 
     cnt = 1
     while pos < maximum:
-        ch = charCodeAt(state.src, pos)
+        ch = state.srcCharCode[pos]
         if ch != marker and not isSpace(ch):
             break
         if ch == marker:

diff --git a/markdown_it/extensions/myst_role/index.py b/markdown_it/extensions/myst_role/index.py
@@ -2,7 +2,7 @@
 
 from markdown_it import MarkdownIt
 from markdown_it.rules_inline import StateInline
-from markdown_it.common.utils import charCodeAt, escapeHtml
+from markdown_it.common.utils import escapeHtml
 
 
 PATTERN = re.compile(r"^\{([a-zA-Z0-9\_\-\+\:]{1,36})\}(`+)(?!`)(.+?)(?<!`)\2(?!`)")
@@ -15,7 +15,7 @@ def myst_role_plugin(md: MarkdownIt):
 
 def myst_role(state: StateInline, silent: bool):
     try:
-        if charCodeAt(state.src, state.pos - 1) == 0x5C:  # /* \ */
+        if state.srcCharCode[state.pos - 1] == 0x5C:  # /* \ */
             # escaped (this could be improved in the case of edge case '\\{')
             return False
     except IndexError:

diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py
@@ -5,7 +5,6 @@
 returns the end of the label
 
 """
-from ..common.utils import charCodeAt
 
 
 def parseLinkLabel(state, start, disableNested=False):
@@ -18,7 +17,7 @@ def parseLinkLabel(state, start, disableNested=False):
     level = 1
 
     while state.pos < state.posMax:
-        marker = charCodeAt(state.src, state.pos)
+        marker = state.srcCharCode[state.pos]
         if marker == 0x5D:  # /* ] */)
             level -= 1
             if level == 0:

diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py
@@ -92,10 +92,10 @@ def tokenize(
                 line += 1
                 state.line = line
 
-    def parse(self, src: str, md, env, outTokens: List[Token]):
+    def parse(self, src: str, md, env, outTokens: List[Token], ords: List[int] = None):
         """Process input string and push block tokens into `outTokens`."""
         if not src:
             return
-        state = StateBlock(src, md, env, outTokens)
+        state = StateBlock(src, md, env, outTokens, ords)
         self.tokenize(state, state.line, state.lineMax)
         return state.tokens
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
@@ -11,6 +11,10 @@
       this is generally the main difference between the codes,
       because in python you can't do e.g. `for {i=1;i<x;i++} {}`
     - Use python version of `charCodeAt`
+    - |
+      Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
+      objects and sharing those whenever possible
+      This provides a significant performance boost
     - |
       Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for
       the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl)

diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
@@ -2,7 +2,7 @@
 import logging
 
 from .state_block import StateBlock
-from ..common.utils import isSpace, charCodeAt
+from ..common.utils import isSpace
 
 LOGGER = logging.getLogger(__name__)
 
@@ -22,7 +22,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
         return False
 
     # check the block quote marker
-    if charCodeAt(state.src, pos) != 0x3E:  # /* > */
+    if state.srcCharCode[pos] != 0x3E:  # /* > */
         pos += 1
         return False
     pos += 1
@@ -40,15 +40,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
     )
 
     # skip one optional space after '>'
-    if charCodeAt(state.src, pos) == 0x20:  # /* space */
+    if state.srcCharCode[pos] == 0x20:  # /* space */
         # ' >   test '
         #     ^ -- position start of line here:
         pos += 1
         initial += 1
         offset += 1
         adjustTab = False
         spaceAfterMarker = True
-    elif charCodeAt(state.src, pos) == 0x09:  # /* tab */
+    elif state.srcCharCode[pos] == 0x09:  # /* tab */
         spaceAfterMarker = True
 
         if (state.bsCount[startLine] + offset) % 4 == 3:
@@ -71,7 +71,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
     state.bMarks[startLine] = pos
 
     while pos < max:
-        ch = charCodeAt(state.src, pos)
+        ch = state.srcCharCode[pos]
 
         if isSpace(ch):
             if ch == 0x09:  # / tab /
@@ -147,9 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             # Case 1: line is not inside the blockquote, and this line is empty.
             break
 
-        evaluatesTrue = (
-            charCodeAt(state.src, pos) == 0x3E and not wasOutdented
-        )  # /* > */
+        evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented  # /* > */
         pos += 1
         if evaluatesTrue:
             # This line is inside the blockquote.
@@ -162,15 +160,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             )
 
             # skip one optional space after '>'
-            if charCodeAt(state.src, pos) == 0x20:  # /* space */
+            if state.srcCharCode[pos] == 0x20:  # /* space */
                 # ' >   test '
                 #     ^ -- position start of line here:
                 pos += 1
                 initial += 1
                 offset += 1
                 adjustTab = False
                 spaceAfterMarker = True
-            elif charCodeAt(state.src, pos) == 0x09:  # /* tab */
+            elif state.srcCharCode[pos] == 0x09:  # /* tab */
                 spaceAfterMarker = True
 
                 if (state.bsCount[nextLine] + offset) % 4 == 3:
@@ -193,7 +191,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             state.bMarks[nextLine] = pos
 
             while pos < max:
-                ch = charCodeAt(state.src, pos)
+                ch = state.srcCharCode[pos]
 
                 if isSpace(ch):
                     if ch == 0x09:

diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
@@ -1,7 +1,7 @@
 # fences (``` lang, ~~~ lang)
 import logging
 
-from ..common.utils import charCodeAt, stripEscape
+from ..common.utils import stripEscape
 from .state_block import StateBlock
 
 LOGGER = logging.getLogger(__name__)
@@ -22,7 +22,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if pos + 3 > maximum:
         return False
 
-    marker = charCodeAt(state.src, pos)
+    marker = state.srcCharCode[pos]
 
     # /* ~ */  /* ` */
     if marker != 0x7E and marker != 0x60:
@@ -68,7 +68,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
             #  test
             break
 
-        if charCodeAt(state.src, pos) != marker:
+        if state.srcCharCode[pos] != marker:
             continue
 
         if state.sCount[nextLine] - state.blkIndent >= 4: