From 4f9ef502431c5446e869fd8d1be0611aa79ed36a Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Fri, 14 Aug 2020 16:34:39 +0200 Subject: [PATCH 1/6] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Performance=20imp?= =?UTF-8?q?rovment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit aims at lowering the amount of redundant calls to charCodeAt to improve performances. Ord codes are computed once and stored in an attribute for StateBlock, StateCore and StateInline. We then check this attribute rather than calling the function. Transfer ord codes whenever possible between StateCore and StateBlocks so we don't recompute them. --- .pre-commit-config.yaml | 2 +- markdown_it/extensions/container/index.py | 4 +-- markdown_it/extensions/deflist/index.py | 5 ++-- markdown_it/extensions/footnote/index.py | 28 ++++++++++---------- markdown_it/extensions/front_matter/index.py | 4 +-- markdown_it/extensions/myst_blocks/index.py | 8 +++--- markdown_it/extensions/myst_role/index.py | 4 +-- markdown_it/helpers/parse_link_label.py | 3 +-- markdown_it/parser_block.py | 4 +-- markdown_it/rules_block/blockquote.py | 20 +++++++------- markdown_it/rules_block/fence.py | 6 ++--- markdown_it/rules_block/heading.py | 12 ++++----- markdown_it/rules_block/hr.py | 6 ++--- markdown_it/rules_block/html_block.py | 3 +-- markdown_it/rules_block/lheading.py | 3 +-- markdown_it/rules_block/list.py | 18 ++++++------- markdown_it/rules_block/reference.py | 9 +++---- markdown_it/rules_block/state_block.py | 22 ++++++++------- markdown_it/rules_block/table.py | 4 +-- markdown_it/rules_core/block.py | 2 +- markdown_it/rules_core/state_core.py | 1 + markdown_it/rules_inline/autolink.py | 3 +-- markdown_it/rules_inline/backticks.py | 7 +++-- markdown_it/rules_inline/emphasis.py | 3 +-- markdown_it/rules_inline/entity.py | 6 ++--- markdown_it/rules_inline/escape.py | 8 +++--- markdown_it/rules_inline/html_inline.py | 5 ++-- markdown_it/rules_inline/image.py | 18 ++++++------- markdown_it/rules_inline/link.py | 16 +++++------ markdown_it/rules_inline/newline.py | 4 +-- markdown_it/rules_inline/state_inline.py | 11 ++++---- markdown_it/rules_inline/strikethrough.py | 3 +-- markdown_it/rules_inline/text.py | 5 ++-- 33 files changed, 123 insertions(+), 134 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 20911e5a..33025e26 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ exclude: > repos: - - repo: git://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.2.3 hooks: - id: check-json diff --git a/markdown_it/extensions/container/index.py b/markdown_it/extensions/container/index.py index 2a6b8eb4..a0ccea1b 100644 --- a/markdown_it/extensions/container/index.py +++ b/markdown_it/extensions/container/index.py @@ -36,7 +36,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool # Check out the first character quickly, # this should filter out most of non-containers - if marker_char != charCodeAt(state.src, start): + if marker_char != state.ords[start]: return False # Check out the rest of the marker string @@ -79,7 +79,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool # test break - if marker_char != charCodeAt(state.src, start): + if marker_char != state.ords[start]: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/extensions/deflist/index.py b/markdown_it/extensions/deflist/index.py index 0c6246c7..0200b64e 100644 --- a/markdown_it/extensions/deflist/index.py +++ b/markdown_it/extensions/deflist/index.py @@ -1,6 +1,5 @@ """Process definition lists.""" from markdown_it import MarkdownIt -from markdown_it.common.utils import charCodeAt from markdown_it.rules_block import StateBlock @@ -16,7 +15,7 @@ def skipMarker(state: StateBlock, line: int): return -1 # Check bullet - marker = charCodeAt(state.src, start) + marker = state.ords[start] start += 1 if marker != 0x7E and marker != 0x3A: # ~ : return -1 @@ -119,7 +118,7 @@ def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if isSpace(ch): if ch == 0x09: diff --git a/markdown_it/extensions/footnote/index.py b/markdown_it/extensions/footnote/index.py index 02f465a1..1525a436 100644 --- a/markdown_it/extensions/footnote/index.py +++ b/markdown_it/extensions/footnote/index.py @@ -6,7 +6,7 @@ from markdown_it.rules_inline import StateInline from markdown_it.rules_block import StateBlock from markdown_it.helpers import parseLinkLabel -from markdown_it.common.utils import isSpace, charCodeAt +from markdown_it.common.utils import isSpace def footnote_plugin(md: MarkdownIt): @@ -43,23 +43,23 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool): if start + 4 > maximum: return False - if charCodeAt(state.src, start) != 0x5B: # /* [ */ + if state.ords[start] != 0x5B: # /* [ */ return False - if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */ + if state.ords[start + 1] != 0x5E: # /* ^ */ return False pos = start + 2 while pos < maximum: - if charCodeAt(state.src, pos) == 0x20: + if state.ords[pos] == 0x20: return False - if charCodeAt(state.src, pos) == 0x5D: # /* ] */ + if state.ords[pos] == 0x5D: # /* ] */ break pos += 1 if pos == start + 2: # no empty footnote labels return False pos += 1 - if pos + 1 >= maximum or charCodeAt(state.src, pos) != 0x3A: # /* : */ + if pos + 1 >= maximum or state.ords[pos] != 0x3A: # /* : */ return False if silent: return True @@ -87,7 +87,7 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if isSpace(ch): if ch == 0x09: @@ -136,9 +136,9 @@ def footnote_inline(state: StateInline, silent: bool): if start + 2 >= maximum: return False - if charCodeAt(state.src, start) != 0x5E: # /* ^ */ + if state.ords[start] != 0x5E: # /* ^ */ return False - if charCodeAt(state.src, start + 1) != 0x5B: # /* [ */ + if state.ords[start + 1] != 0x5B: # /* [ */ return False labelStart = start + 2 @@ -182,18 +182,18 @@ def footnote_ref(state: StateInline, silent: bool): if "footnotes" not in state.env or "refs" not in state.env["footnotes"]: return False - if charCodeAt(state.src, start) != 0x5B: # /* [ */ + if state.ords[start] != 0x5B: # /* [ */ return False - if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */ + if state.ords[start + 1] != 0x5E: # /* ^ */ return False pos = start + 2 while pos < maximum: - if charCodeAt(state.src, pos) == 0x20: + if state.ords[pos] == 0x20: return False - if charCodeAt(state.src, pos) == 0x0A: + if state.ords[pos] == 0x0A: return False - if charCodeAt(state.src, pos) == 0x5D: # /* ] */ + if state.ords[pos] == 0x5D: # /* ] */ break pos += 1 diff --git a/markdown_it/extensions/front_matter/index.py b/markdown_it/extensions/front_matter/index.py index f624684b..b4876c20 100644 --- a/markdown_it/extensions/front_matter/index.py +++ b/markdown_it/extensions/front_matter/index.py @@ -29,7 +29,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool): # Check out the first character of the first line quickly, # this should filter out non-front matter - if startLine != 0 or marker_char != charCodeAt(state.src, 0): + if startLine != 0 or marker_char != state.ords[0]: return False # Check out the rest of the marker string @@ -73,7 +73,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if marker_char != charCodeAt(state.src, start): + if marker_char != state.ords[start]: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/extensions/myst_blocks/index.py b/markdown_it/extensions/myst_blocks/index.py index ee0ca6b2..9913ffa4 100644 --- a/markdown_it/extensions/myst_blocks/index.py +++ b/markdown_it/extensions/myst_blocks/index.py @@ -2,7 +2,7 @@ from markdown_it import MarkdownIt from markdown_it.rules_block import StateBlock -from markdown_it.common.utils import charCodeAt, isSpace, escapeHtml +from markdown_it.common.utils import isSpace, escapeHtml TARGET_PATTERN = re.compile(r"^\(([a-zA-Z0-9\|\@\<\>\*\.\/\_\-\+\:]{1,100})\)\=\s*$") @@ -40,7 +40,7 @@ def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] pos += 1 # Check block marker /* % */ @@ -70,7 +70,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] pos += 1 # Check block marker /* + */ @@ -81,7 +81,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool): cnt = 1 while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if ch != marker and not isSpace(ch): break if ch == marker: diff --git a/markdown_it/extensions/myst_role/index.py b/markdown_it/extensions/myst_role/index.py index f2ac45c0..f9992ee2 100644 --- a/markdown_it/extensions/myst_role/index.py +++ b/markdown_it/extensions/myst_role/index.py @@ -2,7 +2,7 @@ from markdown_it import MarkdownIt from markdown_it.rules_inline import StateInline -from markdown_it.common.utils import charCodeAt, escapeHtml +from markdown_it.common.utils import escapeHtml PATTERN = re.compile(r"^\{([a-zA-Z\_\-\+\:]{1,36})\}(`+)(?!`)(.+?)(? */ + if state.ords[pos] != 0x3E: # /* > */ pos += 1 return False pos += 1 @@ -40,7 +40,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): ) # skip one optional space after '>' - if charCodeAt(state.src, pos) == 0x20: # /* space */ + if state.ords[pos] == 0x20: # /* space */ # ' > test ' # ^ -- position start of line here: pos += 1 @@ -48,7 +48,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): offset += 1 adjustTab = False spaceAfterMarker = True - elif charCodeAt(state.src, pos) == 0x09: # /* tab */ + elif state.ords[pos] == 0x09: # /* tab */ spaceAfterMarker = True if (state.bsCount[startLine] + offset) % 4 == 3: @@ -71,7 +71,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.bMarks[startLine] = pos while pos < max: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if isSpace(ch): if ch == 0x09: # / tab / @@ -147,9 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = ( - charCodeAt(state.src, pos) == 0x3E and not wasOutdented - ) # /* > */ + evaluatesTrue = state.ords[pos] == 0x3E and not wasOutdented # /* > */ pos += 1 if evaluatesTrue: # This line is inside the blockquote. @@ -162,7 +160,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): ) # skip one optional space after '>' - if charCodeAt(state.src, pos) == 0x20: # /* space */ + if state.ords[pos] == 0x20: # /* space */ # ' > test ' # ^ -- position start of line here: pos += 1 @@ -170,7 +168,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): offset += 1 adjustTab = False spaceAfterMarker = True - elif charCodeAt(state.src, pos) == 0x09: # /* tab */ + elif state.ords[pos] == 0x09: # /* tab */ spaceAfterMarker = True if (state.bsCount[nextLine] + offset) % 4 == 3: @@ -193,7 +191,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.bMarks[nextLine] = pos while pos < max: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if isSpace(ch): if ch == 0x09: diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index dc8efa32..aa6ef2ea 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -1,7 +1,7 @@ # fences (``` lang, ~~~ lang) import logging -from ..common.utils import charCodeAt, stripEscape +from ..common.utils import stripEscape from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -22,7 +22,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): if pos + 3 > maximum: return False - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] # /* ~ */ /* ` */ if marker != 0x7E and marker != 0x60: @@ -68,7 +68,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if charCodeAt(state.src, pos) != marker: + if state.ords[pos] != marker: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index a9c85a36..bcb99795 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -2,7 +2,7 @@ import logging from .state_block import StateBlock -from ..common.utils import isSpace, charCodeAt +from ..common.utils import isSpace LOGGER = logging.getLogger(__name__) @@ -18,7 +18,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] # /* # */ if ch != 0x23 or pos >= maximum: @@ -27,12 +27,12 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): # count heading level level = 1 pos += 1 - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] # /* # */ while ch == 0x23 and pos < maximum and level <= 6: level += 1 pos += 1 - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if level > 6 or (pos < maximum and not isSpace(ch)): return False @@ -44,13 +44,13 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): # maximum = state.skipSpacesBack(maximum, pos) # tmp = state.skipCharsBack(maximum, 0x23, pos) # # - # if tmp > pos and isSpace(charCodeAt(state.src, tmp - 1)): + # if tmp > pos and isSpace(state.ords[tmp - 1]): # maximum = tmp # TODO the code above doesn't seem to work, but this does # we should check why the code above doesn't work though _max = len(state.src[:maximum].rstrip().rstrip(chr(0x23))) try: - if isSpace(charCodeAt(state.src, _max - 1)): + if isSpace(state.ords[_max - 1]): maximum = _max except IndexError: pass diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 6790f297..c3a301b9 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -4,7 +4,7 @@ """ import logging -from ..common.utils import charCodeAt, isSpace +from ..common.utils import isSpace from .state_block import StateBlock @@ -22,7 +22,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] pos += 1 # Check hr marker: /* * */ /* - */ /* _ */ @@ -33,7 +33,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): cnt = 1 while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] pos += 1 if ch != marker and not isSpace(ch): return False diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index cd882dd4..202d93a3 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -3,7 +3,6 @@ import re from .state_block import StateBlock -from ..common.utils import charCodeAt from ..common.html_blocks import block_names from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR @@ -45,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): if not state.md.options.get("html", None): return False - if charCodeAt(state.src, pos) != 0x3C: # /* < */ + if state.ords[pos] != 0x3C: # /* < */ return False lineText = state.src[pos:maximum] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index f4ad8135..90b40aad 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -1,7 +1,6 @@ # lheading (---, ==) import logging -from ..common.utils import charCodeAt from ..ruler import Ruler from .state_block import StateBlock @@ -38,7 +37,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): maximum = state.eMarks[nextLine] if pos < maximum: - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] # /* - */ /* = */ if marker == 0x2D or marker == 0x3D: diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index dfe48991..46ed46e7 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -2,7 +2,7 @@ import logging from .state_block import StateBlock -from ..common.utils import charCodeAt, isSpace +from ..common.utils import isSpace LOGGER = logging.getLogger(__name__) @@ -14,14 +14,14 @@ def skipBulletListMarker(state: StateBlock, startLine: int): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - marker = charCodeAt(state.src, pos) + marker = state.ords[pos] pos += 1 # Check bullet /* * */ /* - */ /* + */ if marker != 0x2A and marker != 0x2D and marker != 0x2B: return -1 if pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if not isSpace(ch): # " -test " - is not a list item @@ -42,7 +42,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): if pos + 1 >= maximum: return -1 - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] pos += 1 # /* 0 */ /* 9 */ @@ -54,7 +54,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): if pos >= maximum: return -1 - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] pos += 1 # /* 0 */ /* 9 */ @@ -74,7 +74,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): return -1 if pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if not isSpace(ch): # " 1.test " - is not a list item @@ -156,7 +156,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): return False # We should terminate list on style change. Remember first one to compare. - markerCharCode = charCodeAt(state.src, posAfterMarker - 1) + markerCharCode = state.ords[posAfterMarker - 1] # For validation mode we can terminate immediately if silent: @@ -198,7 +198,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if ch == 0x09: offset += 4 - (offset + state.bsCount[nextLine]) % 4 @@ -317,7 +317,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): if posAfterMarker < 0: break - if markerCharCode != charCodeAt(state.src, posAfterMarker - 1): + if markerCharCode != state.ords[posAfterMarker - 1]: break # Finalize list diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 42e64361..38872eaa 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -24,20 +24,17 @@ def reference(state: StateBlock, startLine, _endLine, silent): if state.sCount[startLine] - state.blkIndent >= 4: return False - if charCodeAt(state.src, pos) != 0x5B: # /* [ */ + if state.ords[pos] != 0x5B: # /* [ */ return False # Simple check to quickly interrupt scan on [link](url) at the start of line. # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 while pos < maximum: # /* ] */ /* \ */ /* : */ - if ( - charCodeAt(state.src, pos) == 0x5D - and charCodeAt(state.src, pos - 1) != 0x5C - ): + if state.ords[pos] == 0x5D and state.ords[pos - 1] != 0x5C: if pos + 1 == maximum: return False - if charCodeAt(state.src, pos + 1) != 0x3A: + if state.ords[pos + 1] != 0x3A: return False break pos += 1 diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index b37d871c..5fa86a57 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -2,13 +2,17 @@ from ..token import Token from ..ruler import StateBase -from ..common.utils import isSpace, charCodeAt +from ..common.utils import isSpace class StateBlock(StateBase): - def __init__(self, src: str, md, env, tokens: List[Token]): + def __init__(self, src: str, md, env, tokens: List[Token], ords: List[int] = None): self.src = src + if ords is not None: + self.ords = ords + else: + self.ords = [ord(c) for c in src] # link to parser instance self.md = md @@ -64,9 +68,7 @@ def __init__(self, src: str, md, env, tokens: List[Token]): start = pos = indent = offset = 0 length = len(self.src) - for pos, character in enumerate(self.src): - character = ord(character) - + for pos, character in enumerate(self.ords): if not indent_found: if isSpace(character): indent += 1 @@ -135,7 +137,7 @@ def skipEmptyLines(self, from_pos): def skipSpaces(self, pos: int): """Skip spaces from given position.""" while pos < len(self.src): - if not isSpace(charCodeAt(self.src, pos)): + if not isSpace(self.ords[pos]): break pos += 1 return pos @@ -145,7 +147,7 @@ def skipSpacesBack(self, pos: int, minimum: int): if pos <= minimum: return pos while pos > minimum: - if not isSpace(charCodeAt(self.src, pos)): + if not isSpace(self.ords[pos]): return pos + 1 pos -= 1 return pos @@ -153,7 +155,7 @@ def skipSpacesBack(self, pos: int, minimum: int): def skipChars(self, pos: int, code: int): """Skip char codes from given position.""" while pos < len(self.src): - if charCodeAt(self.src, pos) != code: + if self.ords[pos] != code: break pos += 1 return pos @@ -163,7 +165,7 @@ def skipCharsBack(self, pos, code, minimum): if pos <= minimum: return pos while pos > minimum: - if code != charCodeAt(self.src, pos): + if code != self.ords[pos]: return pos + 1 pos -= 1 return pos @@ -186,7 +188,7 @@ def getLines(self, begin: int, end: int, indent, keepLastLF): last = self.eMarks[line] while (first < last) and (lineIndent < indent): - ch = charCodeAt(self.src, first) + ch = self.ords[first] if isSpace(ch): if ch == 0x09: lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 7fbd6b25..d931a3bd 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -85,14 +85,14 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): if pos >= state.eMarks[nextLine]: return False - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] pos += 1 # /* | */ /* - */ /* : */ if ch != 0x7C and ch != 0x2D and ch != 0x3A: return False while pos < state.eMarks[nextLine]: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] # /* | */ /* - */ /* : */ if ch != 0x7C and ch != 0x2D and ch != 0x3A and not isSpace(ch): diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index 0fb490b0..0a37ea72 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -11,4 +11,4 @@ def block(state: StateCore): token.children = [] state.tokens.append(token) else: - state.md.block.parse(state.src, state.md, state.env, state.tokens) + state.md.block.parse(state.src, state.md, state.env, state.tokens, state.ords) diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 6422bbd9..78f6eb04 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -7,6 +7,7 @@ class StateCore(StateBase): def __init__(self, src: str, md, env, tokens=None): self.src = src + self.ords = [ord(c) for c in src] self.md = md # link to parser instance self.env = env self.tokens: List[Token] = tokens or [] diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index c191dfef..d1da5ff0 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -1,7 +1,6 @@ # Process autolinks '' import re from .state_inline import StateBase -from ..common.utils import charCodeAt from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink EMAIL_RE = re.compile( @@ -14,7 +13,7 @@ def autolink(state: StateBase, silent: bool): pos = state.pos - if charCodeAt(state.src, pos) != 0x3C: # /* < */ + if state.ords[pos] != 0x3C: # /* < */ return False tail = state.src[pos:] diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 960cb17f..0b7d6590 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -2,7 +2,6 @@ import re from .state_inline import StateInline -from ..common.utils import charCodeAt regex = re.compile("^ (.+) $") @@ -10,7 +9,7 @@ def backtick(state: StateInline, silent: bool): pos = state.pos - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] # /* ` */ if ch != 0x60: @@ -21,7 +20,7 @@ def backtick(state: StateInline, silent: bool): maximum = state.posMax # /* ` */ - while pos < maximum and (charCodeAt(state.src, pos) == 0x60): + while pos < maximum and (state.ords[pos] == 0x60): pos += 1 marker = state.src[start:pos] @@ -35,7 +34,7 @@ def backtick(state: StateInline, silent: bool): break matchEnd = matchStart + 1 # /* ` */ - while matchEnd < maximum and (charCodeAt(state.src, matchEnd) == 0x60): + while matchEnd < maximum and (state.ords[matchEnd] == 0x60): matchEnd += 1 if matchEnd - matchStart == len(marker): diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index 9b0f560c..bbae1345 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -2,13 +2,12 @@ # from .state_inline import StateInline, Delimiter -from ..common.utils import charCodeAt def tokenize(state: StateInline, silent: bool): """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = charCodeAt(state.src, start) + marker = state.ords[start] if silent: return False diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 74213adb..a6ebd7a4 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -2,7 +2,7 @@ import re from ..common.entities import entities -from ..common.utils import has, isValidEntityCode, fromCodePoint, charCodeAt +from ..common.utils import has, isValidEntityCode, fromCodePoint from .state_inline import StateInline DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE) @@ -14,11 +14,11 @@ def entity(state: StateInline, silent: bool): pos = state.pos maximum = state.posMax - if charCodeAt(state.src, pos) != 0x26: # /* & */ + if state.ords[pos] != 0x26: # /* & */ return False if (pos + 1) < maximum: - ch = charCodeAt(state.src, pos + 1) + ch = state.ords[pos + 1] if ch == 0x23: # /* # */ match = DIGITAL_RE.search(state.src[pos:]) diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index b8e1fc55..3bc0e3b5 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -2,7 +2,7 @@ Process escaped chars and hardbreaks """ from .state_inline import StateInline -from ..common.utils import isSpace, charCodeAt +from ..common.utils import isSpace ESCAPED = [0 for _ in range(256)] @@ -15,13 +15,13 @@ def escape(state: StateInline, silent: bool): maximum = state.posMax # /* \ */ - if charCodeAt(state.src, pos) != 0x5C: + if state.ords[pos] != 0x5C: return False pos += 1 if pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if ch < 256 and ESCAPED[ch] != 0: if not silent: @@ -36,7 +36,7 @@ def escape(state: StateInline, silent: bool): pos += 1 # skip leading whitespaces from next line while pos < maximum: - ch = charCodeAt(state.src, pos) + ch = state.ords[pos] if not isSpace(ch): break pos += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 04ebf86a..9791f1bf 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -1,7 +1,6 @@ # Process html tags from .state_inline import StateInline from ..common.html_re import HTML_TAG_RE -from ..common.utils import charCodeAt def isLetter(ch: int): @@ -19,11 +18,11 @@ def html_inline(state: StateInline, silent: bool): # Check start maximum = state.posMax - if charCodeAt(state.src, pos) != 0x3C or pos + 2 >= maximum: # /* < */ + if state.ords[pos] != 0x3C or pos + 2 >= maximum: # /* < */ return False # Quick fail on second char - ch = charCodeAt(state.src, pos + 1) + ch = state.ords[pos + 1] if ( ch != 0x21 and ch != 0x3F # /* ! */ diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index a3b65842..ebad242a 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,7 +1,7 @@ # Process ![image]( "title") from .state_inline import StateInline -from ..common.utils import isSpace, charCodeAt, normalizeReference +from ..common.utils import isSpace, normalizeReference from ..common.normalize_url import normalizeLink, validateLink @@ -14,10 +14,10 @@ def image(state: StateInline, silent: bool): max = state.posMax # /* ! */ - if charCodeAt(state.src, state.pos) != 0x21: + if state.ords[state.pos] != 0x21: return False # /* [ */ - if charCodeAt(state.src, state.pos + 1) != 0x5B: + if state.ords[state.pos + 1] != 0x5B: return False labelStart = state.pos + 2 @@ -29,7 +29,7 @@ def image(state: StateInline, silent: bool): pos = labelEnd + 1 # /* ( */ - if pos < max and charCodeAt(state.src, pos) == 0x28: + if pos < max and state.ords[pos] == 0x28: # # Inline link # @@ -38,7 +38,7 @@ def image(state: StateInline, silent: bool): # ^^ skipping these spaces pos += 1 while pos < max: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -61,7 +61,7 @@ def image(state: StateInline, silent: bool): # ^^ skipping these spaces start = pos while pos < max: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -76,7 +76,7 @@ def image(state: StateInline, silent: bool): # [link]( "title" ) # ^^ skipping these spaces while pos < max: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -84,7 +84,7 @@ def image(state: StateInline, silent: bool): title = "" # /* ) */ - if pos >= max or charCodeAt(state.src, pos) != 0x29: + if pos >= max or state.ords[pos] != 0x29: state.pos = oldPos return False @@ -98,7 +98,7 @@ def image(state: StateInline, silent: bool): return False # /* [ */ - if pos < max and charCodeAt(state.src, pos) == 0x5B: + if pos < max and state.ords[pos] == 0x5B: start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 44d5b16b..7bf56f9d 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import normalizeReference, isSpace, charCodeAt +from ..common.utils import normalizeReference, isSpace from ..common.normalize_url import normalizeLink, validateLink from .state_inline import StateInline @@ -14,7 +14,7 @@ def link(state: StateInline, silent: bool): start = state.pos parseReference = True - if charCodeAt(state.src, state.pos) != 0x5B: # /* [ */ + if state.ords[state.pos] != 0x5B: # /* [ */ return False labelStart = state.pos + 1 @@ -26,7 +26,7 @@ def link(state: StateInline, silent: bool): pos = labelEnd + 1 - if pos < maximum and charCodeAt(state.src, pos) == 0x28: # /* ( */ + if pos < maximum and state.ords[pos] == 0x28: # /* ( */ # # Inline link # @@ -38,7 +38,7 @@ def link(state: StateInline, silent: bool): # ^^ skipping these spaces pos += 1 while pos < maximum: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -61,7 +61,7 @@ def link(state: StateInline, silent: bool): # ^^ skipping these spaces start = pos while pos < maximum: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -76,14 +76,14 @@ def link(state: StateInline, silent: bool): # [link]( "title" ) # ^^ skipping these spaces while pos < maximum: - code = charCodeAt(state.src, pos) + code = state.ords[pos] if not isSpace(code) and code != 0x0A: break pos += 1 else: title = "" - if pos >= maximum or charCodeAt(state.src, pos) != 0x29: # /* ) */ + if pos >= maximum or state.ords[pos] != 0x29: # /* ) */ # parsing a valid shortcut link failed, fallback to reference parseReference = True @@ -96,7 +96,7 @@ def link(state: StateInline, silent: bool): if "references" not in state.env: return False - if pos < maximum and charCodeAt(state.src, pos) == 0x5B: # /* [ */ + if pos < maximum and state.ords[pos] == 0x5B: # /* [ */ start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index c68b48d7..f0894351 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -11,7 +11,7 @@ def newline(state: StateInline, silent: bool): pos = state.pos # /* \n */ - if charCodeAt(state.src, pos) != 0x0A: + if state.ords[pos] != 0x0A: return False pmax = len(state.pending) - 1 @@ -36,7 +36,7 @@ def newline(state: StateInline, silent: bool): pos += 1 # skip heading spaces for next line - while pos < maximum and isSpace(charCodeAt(state.src, pos)): + while pos < maximum and isSpace(state.ords[pos]): pos += 1 state.pos = pos diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 6383ce61..11678d16 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -5,7 +5,7 @@ from ..token import Token from ..ruler import StateBase -from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct, charCodeAt +from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct @attr.s(slots=True) @@ -45,6 +45,7 @@ class Delimiter: class StateInline(StateBase): def __init__(self, src: str, md, env, outTokens: List[Token]): self.src = src + self.ords = [ord(c) for c in src] self.env = env self.md = md self.tokens = outTokens @@ -122,18 +123,18 @@ def scanDelims(self, start, canSplitWord): left_flanking = True right_flanking = True maximum = self.posMax - marker = charCodeAt(self.src, start) + marker = self.ords[start] # treat beginning of the line as a whitespace - lastChar = charCodeAt(self.src, start - 1) if start > 0 else 0x20 + lastChar = self.ords[start - 1] if start > 0 else 0x20 - while pos < maximum and charCodeAt(self.src, pos) == marker: + while pos < maximum and self.ords[pos] == marker: pos += 1 count = pos - start # treat end of the line as a whitespace - nextChar = charCodeAt(self.src, pos) if pos < maximum else 0x20 + nextChar = self.ords[pos] if pos < maximum else 0x20 isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 97f036f1..f14edb86 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -1,13 +1,12 @@ # ~~strike through~~ from typing import List from .state_inline import StateInline, Delimiter -from ..common.utils import charCodeAt def tokenize(state: StateInline, silent: bool): """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = charCodeAt(state.src, start) + marker = state.ords[start] if silent: return False diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index 21ea53ba..1b2bdbb5 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -2,7 +2,6 @@ # and increment current pos from .state_inline import StateInline -from ..common.utils import charCodeAt # Rule to skip pure text @@ -42,8 +41,8 @@ def isTerminatorChar(ch): def text(state: StateInline, silent: bool, **args): pos = state.pos - - while (pos < state.posMax) and not isTerminatorChar(charCodeAt(state.src, pos)): + posMax = state.posMax + while (pos < posMax) and not isTerminatorChar(state.ords[pos]): pos += 1 if pos == state.pos: From a90945fbbaed0473b13664b9ad08458cf57dc691 Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Fri, 14 Aug 2020 23:46:23 +0200 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20Fix=20a=20bug=20where?= =?UTF-8?q?=20we=20look=20at=20the=20next=20character=20while=20it=20may?= =?UTF-8?q?=20not=20exist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checks that the next character exists before trying to access it. --- markdown_it/rules_inline/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index ebad242a..ddf9b7cc 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -17,7 +17,7 @@ def image(state: StateInline, silent: bool): if state.ords[state.pos] != 0x21: return False # /* [ */ - if state.ords[state.pos + 1] != 0x5B: + if state.pos + 1 < state.posMax and state.ords[state.pos + 1] != 0x5B: return False labelStart = state.pos + 2 From e7ea3ef50dc133f748a34f1341ba72bca57945c7 Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Sat, 15 Aug 2020 10:20:31 +0200 Subject: [PATCH 3/6] Account for possible None as src --- markdown_it/rules_block/state_block.py | 2 +- markdown_it/rules_core/state_core.py | 2 +- markdown_it/rules_inline/state_inline.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index 5fa86a57..e96c3423 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -12,7 +12,7 @@ def __init__(self, src: str, md, env, tokens: List[Token], ords: List[int] = Non if ords is not None: self.ords = ords else: - self.ords = [ord(c) for c in src] + self.ords = [ord(c) for c in src] if src is not None else [] # link to parser instance self.md = md diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 78f6eb04..059c2cbf 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -7,7 +7,7 @@ class StateCore(StateBase): def __init__(self, src: str, md, env, tokens=None): self.src = src - self.ords = [ord(c) for c in src] + self.ords = [ord(c) for c in src] if src is not None else [] self.md = md # link to parser instance self.env = env self.tokens: List[Token] = tokens or [] diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 11678d16..a96772ea 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -45,7 +45,7 @@ class Delimiter: class StateInline(StateBase): def __init__(self, src: str, md, env, outTokens: List[Token]): self.src = src - self.ords = [ord(c) for c in src] + self.ords = [ord(c) for c in src] if src is not None else [] self.env = env self.md = md self.tokens = outTokens From 3f7fd44ba0fdad0de1cea5dc0e3d7a1f40ab2e7c Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Sat, 15 Aug 2020 10:57:58 +0200 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=A7=AA=20TEST:=20Added=20two=20tests?= =?UTF-8?q?=20for=20empty=20inputs=20and=20States=20without=20src=20conten?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_api/test_main.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 3a90325b..575a246e 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -1,5 +1,6 @@ from markdown_it import MarkdownIt from markdown_it.token import Token +from markdown_it.rules_core import StateCore def test_get_rules(): @@ -215,3 +216,37 @@ def test_renderInline(): md = MarkdownIt("zero") tokens = md.renderInline("abc\n\n*xyz*") assert tokens == "abc\n\n*xyz*" + + +def test_emptyStr(): + md = MarkdownIt() + tokens = md.parseInline("") + assert tokens == [ + Token( + type="inline", + tag="", + nesting=0, + attrs=None, + map=[0, 1], + level=0, + children=[], + content="", + markup="", + info="", + meta={}, + block=False, + hidden=False, + ) + ] + + +def test_noneState(): + md = MarkdownIt() + state = StateCore(None, md, {}, []) + + # Remove normalizing rule + rules = md.core.ruler.get_active_rules() + md.core.ruler.enableOnly(rules[rules.index("inline") :]) + + # Check that we can process None str with empty env and block_tokens + md.core.process(state) From b92d4a5e64aaca0277caffab55ee81a3b51ee541 Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Mon, 17 Aug 2020 18:51:26 +0200 Subject: [PATCH 5/6] Rename ords to srcCharCode --- markdown_it/extensions/container/index.py | 4 +-- markdown_it/extensions/deflist/index.py | 4 +-- markdown_it/extensions/footnote/index.py | 26 ++++++++++---------- markdown_it/extensions/front_matter/index.py | 4 +-- markdown_it/extensions/myst_blocks/index.py | 6 ++--- markdown_it/extensions/myst_role/index.py | 2 +- markdown_it/helpers/parse_link_label.py | 2 +- markdown_it/rules_block/blockquote.py | 16 ++++++------ markdown_it/rules_block/fence.py | 4 +-- markdown_it/rules_block/heading.py | 10 ++++---- markdown_it/rules_block/hr.py | 4 +-- markdown_it/rules_block/html_block.py | 2 +- markdown_it/rules_block/lheading.py | 2 +- markdown_it/rules_block/list.py | 16 ++++++------ markdown_it/rules_block/reference.py | 6 ++--- markdown_it/rules_block/state_block.py | 22 +++++++++-------- markdown_it/rules_block/table.py | 4 +-- markdown_it/rules_core/block.py | 4 ++- markdown_it/rules_core/state_core.py | 2 +- markdown_it/rules_inline/autolink.py | 2 +- markdown_it/rules_inline/backticks.py | 6 ++--- markdown_it/rules_inline/emphasis.py | 2 +- markdown_it/rules_inline/entity.py | 4 +-- markdown_it/rules_inline/escape.py | 6 ++--- markdown_it/rules_inline/html_inline.py | 4 +-- markdown_it/rules_inline/image.py | 16 ++++++------ markdown_it/rules_inline/link.py | 14 +++++------ markdown_it/rules_inline/newline.py | 4 +-- markdown_it/rules_inline/state_inline.py | 10 ++++---- markdown_it/rules_inline/strikethrough.py | 2 +- markdown_it/rules_inline/text.py | 2 +- 31 files changed, 108 insertions(+), 104 deletions(-) diff --git a/markdown_it/extensions/container/index.py b/markdown_it/extensions/container/index.py index a0ccea1b..7c91b89a 100644 --- a/markdown_it/extensions/container/index.py +++ b/markdown_it/extensions/container/index.py @@ -36,7 +36,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool # Check out the first character quickly, # this should filter out most of non-containers - if marker_char != state.ords[start]: + if marker_char != state.srcCharCode[start]: return False # Check out the rest of the marker string @@ -79,7 +79,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool # test break - if marker_char != state.ords[start]: + if marker_char != state.srcCharCode[start]: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/extensions/deflist/index.py b/markdown_it/extensions/deflist/index.py index 0200b64e..054906d0 100644 --- a/markdown_it/extensions/deflist/index.py +++ b/markdown_it/extensions/deflist/index.py @@ -15,7 +15,7 @@ def skipMarker(state: StateBlock, line: int): return -1 # Check bullet - marker = state.ords[start] + marker = state.srcCharCode[start] start += 1 if marker != 0x7E and marker != 0x3A: # ~ : return -1 @@ -118,7 +118,7 @@ def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: diff --git a/markdown_it/extensions/footnote/index.py b/markdown_it/extensions/footnote/index.py index 1525a436..82ef5c26 100644 --- a/markdown_it/extensions/footnote/index.py +++ b/markdown_it/extensions/footnote/index.py @@ -43,23 +43,23 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool): if start + 4 > maximum: return False - if state.ords[start] != 0x5B: # /* [ */ + if state.srcCharCode[start] != 0x5B: # /* [ */ return False - if state.ords[start + 1] != 0x5E: # /* ^ */ + if state.srcCharCode[start + 1] != 0x5E: # /* ^ */ return False pos = start + 2 while pos < maximum: - if state.ords[pos] == 0x20: + if state.srcCharCode[pos] == 0x20: return False - if state.ords[pos] == 0x5D: # /* ] */ + if state.srcCharCode[pos] == 0x5D: # /* ] */ break pos += 1 if pos == start + 2: # no empty footnote labels return False pos += 1 - if pos + 1 >= maximum or state.ords[pos] != 0x3A: # /* : */ + if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A: # /* : */ return False if silent: return True @@ -87,7 +87,7 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: @@ -136,9 +136,9 @@ def footnote_inline(state: StateInline, silent: bool): if start + 2 >= maximum: return False - if state.ords[start] != 0x5E: # /* ^ */ + if state.srcCharCode[start] != 0x5E: # /* ^ */ return False - if state.ords[start + 1] != 0x5B: # /* [ */ + if state.srcCharCode[start + 1] != 0x5B: # /* [ */ return False labelStart = start + 2 @@ -182,18 +182,18 @@ def footnote_ref(state: StateInline, silent: bool): if "footnotes" not in state.env or "refs" not in state.env["footnotes"]: return False - if state.ords[start] != 0x5B: # /* [ */ + if state.srcCharCode[start] != 0x5B: # /* [ */ return False - if state.ords[start + 1] != 0x5E: # /* ^ */ + if state.srcCharCode[start + 1] != 0x5E: # /* ^ */ return False pos = start + 2 while pos < maximum: - if state.ords[pos] == 0x20: + if state.srcCharCode[pos] == 0x20: return False - if state.ords[pos] == 0x0A: + if state.srcCharCode[pos] == 0x0A: return False - if state.ords[pos] == 0x5D: # /* ] */ + if state.srcCharCode[pos] == 0x5D: # /* ] */ break pos += 1 diff --git a/markdown_it/extensions/front_matter/index.py b/markdown_it/extensions/front_matter/index.py index b4876c20..a337f07d 100644 --- a/markdown_it/extensions/front_matter/index.py +++ b/markdown_it/extensions/front_matter/index.py @@ -29,7 +29,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool): # Check out the first character of the first line quickly, # this should filter out non-front matter - if startLine != 0 or marker_char != state.ords[0]: + if startLine != 0 or marker_char != state.srcCharCode[0]: return False # Check out the rest of the marker string @@ -73,7 +73,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if marker_char != state.ords[start]: + if marker_char != state.srcCharCode[start]: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/extensions/myst_blocks/index.py b/markdown_it/extensions/myst_blocks/index.py index 9913ffa4..625d5f1b 100644 --- a/markdown_it/extensions/myst_blocks/index.py +++ b/markdown_it/extensions/myst_blocks/index.py @@ -40,7 +40,7 @@ def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = state.ords[pos] + marker = state.srcCharCode[pos] pos += 1 # Check block marker /* % */ @@ -70,7 +70,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = state.ords[pos] + marker = state.srcCharCode[pos] pos += 1 # Check block marker /* + */ @@ -81,7 +81,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool): cnt = 1 while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if ch != marker and not isSpace(ch): break if ch == marker: diff --git a/markdown_it/extensions/myst_role/index.py b/markdown_it/extensions/myst_role/index.py index e295c53b..88a38047 100644 --- a/markdown_it/extensions/myst_role/index.py +++ b/markdown_it/extensions/myst_role/index.py @@ -15,7 +15,7 @@ def myst_role_plugin(md: MarkdownIt): def myst_role(state: StateInline, silent: bool): try: - if state.ords[state.pos - 1] == 0x5C: # /* \ */ + if state.srcCharCode[state.pos - 1] == 0x5C: # /* \ */ # escaped (this could be improved in the case of edge case '\\{') return False except IndexError: diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 2a4acd11..b0e4f0f1 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -17,7 +17,7 @@ def parseLinkLabel(state, start, disableNested=False): level = 1 while state.pos < state.posMax: - marker = state.ords[state.pos] + marker = state.srcCharCode[state.pos] if marker == 0x5D: # /* ] */) level -= 1 if level == 0: diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index 18260c4d..88be0211 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -22,7 +22,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): return False # check the block quote marker - if state.ords[pos] != 0x3E: # /* > */ + if state.srcCharCode[pos] != 0x3E: # /* > */ pos += 1 return False pos += 1 @@ -40,7 +40,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): ) # skip one optional space after '>' - if state.ords[pos] == 0x20: # /* space */ + if state.srcCharCode[pos] == 0x20: # /* space */ # ' > test ' # ^ -- position start of line here: pos += 1 @@ -48,7 +48,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): offset += 1 adjustTab = False spaceAfterMarker = True - elif state.ords[pos] == 0x09: # /* tab */ + elif state.srcCharCode[pos] == 0x09: # /* tab */ spaceAfterMarker = True if (state.bsCount[startLine] + offset) % 4 == 3: @@ -71,7 +71,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.bMarks[startLine] = pos while pos < max: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: # / tab / @@ -147,7 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = state.ords[pos] == 0x3E and not wasOutdented # /* > */ + evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented # /* > */ pos += 1 if evaluatesTrue: # This line is inside the blockquote. @@ -160,7 +160,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): ) # skip one optional space after '>' - if state.ords[pos] == 0x20: # /* space */ + if state.srcCharCode[pos] == 0x20: # /* space */ # ' > test ' # ^ -- position start of line here: pos += 1 @@ -168,7 +168,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): offset += 1 adjustTab = False spaceAfterMarker = True - elif state.ords[pos] == 0x09: # /* tab */ + elif state.srcCharCode[pos] == 0x09: # /* tab */ spaceAfterMarker = True if (state.bsCount[nextLine] + offset) % 4 == 3: @@ -191,7 +191,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): state.bMarks[nextLine] = pos while pos < max: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index aa6ef2ea..bacf54a2 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -22,7 +22,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): if pos + 3 > maximum: return False - marker = state.ords[pos] + marker = state.srcCharCode[pos] # /* ~ */ /* ` */ if marker != 0x7E and marker != 0x60: @@ -68,7 +68,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): # test break - if state.ords[pos] != marker: + if state.srcCharCode[pos] != marker: continue if state.sCount[nextLine] - state.blkIndent >= 4: diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index bcb99795..b4779fa8 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -18,7 +18,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - ch = state.ords[pos] + ch = state.srcCharCode[pos] # /* # */ if ch != 0x23 or pos >= maximum: @@ -27,12 +27,12 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): # count heading level level = 1 pos += 1 - ch = state.ords[pos] + ch = state.srcCharCode[pos] # /* # */ while ch == 0x23 and pos < maximum and level <= 6: level += 1 pos += 1 - ch = state.ords[pos] + ch = state.srcCharCode[pos] if level > 6 or (pos < maximum and not isSpace(ch)): return False @@ -44,13 +44,13 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): # maximum = state.skipSpacesBack(maximum, pos) # tmp = state.skipCharsBack(maximum, 0x23, pos) # # - # if tmp > pos and isSpace(state.ords[tmp - 1]): + # if tmp > pos and isSpace(state.srcCharCode[tmp - 1]): # maximum = tmp # TODO the code above doesn't seem to work, but this does # we should check why the code above doesn't work though _max = len(state.src[:maximum].rstrip().rstrip(chr(0x23))) try: - if isSpace(state.ords[_max - 1]): + if isSpace(state.srcCharCode[_max - 1]): maximum = _max except IndexError: pass diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index c3a301b9..01c68552 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -22,7 +22,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): if state.sCount[startLine] - state.blkIndent >= 4: return False - marker = state.ords[pos] + marker = state.srcCharCode[pos] pos += 1 # Check hr marker: /* * */ /* - */ /* _ */ @@ -33,7 +33,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): cnt = 1 while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] pos += 1 if ch != marker and not isSpace(ch): return False diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index 202d93a3..3cac032d 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -44,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): if not state.md.options.get("html", None): return False - if state.ords[pos] != 0x3C: # /* < */ + if state.srcCharCode[pos] != 0x3C: # /* < */ return False lineText = state.src[pos:maximum] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index 90b40aad..f26e2af0 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -37,7 +37,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): maximum = state.eMarks[nextLine] if pos < maximum: - marker = state.ords[pos] + marker = state.srcCharCode[pos] # /* - */ /* = */ if marker == 0x2D or marker == 0x3D: diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index 46ed46e7..9e6d3717 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -14,14 +14,14 @@ def skipBulletListMarker(state: StateBlock, startLine: int): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] - marker = state.ords[pos] + marker = state.srcCharCode[pos] pos += 1 # Check bullet /* * */ /* - */ /* + */ if marker != 0x2A and marker != 0x2D and marker != 0x2B: return -1 if pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if not isSpace(ch): # " -test " - is not a list item @@ -42,7 +42,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): if pos + 1 >= maximum: return -1 - ch = state.ords[pos] + ch = state.srcCharCode[pos] pos += 1 # /* 0 */ /* 9 */ @@ -54,7 +54,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): if pos >= maximum: return -1 - ch = state.ords[pos] + ch = state.srcCharCode[pos] pos += 1 # /* 0 */ /* 9 */ @@ -74,7 +74,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int): return -1 if pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if not isSpace(ch): # " 1.test " - is not a list item @@ -156,7 +156,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): return False # We should terminate list on style change. Remember first one to compare. - markerCharCode = state.ords[posAfterMarker - 1] + markerCharCode = state.srcCharCode[posAfterMarker - 1] # For validation mode we can terminate immediately if silent: @@ -198,7 +198,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): ) while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if ch == 0x09: offset += 4 - (offset + state.bsCount[nextLine]) % 4 @@ -317,7 +317,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): if posAfterMarker < 0: break - if markerCharCode != state.ords[posAfterMarker - 1]: + if markerCharCode != state.srcCharCode[posAfterMarker - 1]: break # Finalize list diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 38872eaa..e44ca0be 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -24,17 +24,17 @@ def reference(state: StateBlock, startLine, _endLine, silent): if state.sCount[startLine] - state.blkIndent >= 4: return False - if state.ords[pos] != 0x5B: # /* [ */ + if state.srcCharCode[pos] != 0x5B: # /* [ */ return False # Simple check to quickly interrupt scan on [link](url) at the start of line. # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 while pos < maximum: # /* ] */ /* \ */ /* : */ - if state.ords[pos] == 0x5D and state.ords[pos - 1] != 0x5C: + if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C: if pos + 1 == maximum: return False - if state.ords[pos + 1] != 0x3A: + if state.srcCharCode[pos + 1] != 0x3A: return False break pos += 1 diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index e96c3423..adf6245c 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -6,13 +6,15 @@ class StateBlock(StateBase): - def __init__(self, src: str, md, env, tokens: List[Token], ords: List[int] = None): + def __init__( + self, src: str, md, env, tokens: List[Token], srcCharCode: List[int] = None + ): self.src = src - if ords is not None: - self.ords = ords + if srcCharCode is not None: + self.srcCharCode = srcCharCode else: - self.ords = [ord(c) for c in src] if src is not None else [] + self.srcCharCode = [ord(c) for c in src] if src is not None else [] # link to parser instance self.md = md @@ -68,7 +70,7 @@ def __init__(self, src: str, md, env, tokens: List[Token], ords: List[int] = Non start = pos = indent = offset = 0 length = len(self.src) - for pos, character in enumerate(self.ords): + for pos, character in enumerate(self.srcCharCode): if not indent_found: if isSpace(character): indent += 1 @@ -137,7 +139,7 @@ def skipEmptyLines(self, from_pos): def skipSpaces(self, pos: int): """Skip spaces from given position.""" while pos < len(self.src): - if not isSpace(self.ords[pos]): + if not isSpace(self.srcCharCode[pos]): break pos += 1 return pos @@ -147,7 +149,7 @@ def skipSpacesBack(self, pos: int, minimum: int): if pos <= minimum: return pos while pos > minimum: - if not isSpace(self.ords[pos]): + if not isSpace(self.srcCharCode[pos]): return pos + 1 pos -= 1 return pos @@ -155,7 +157,7 @@ def skipSpacesBack(self, pos: int, minimum: int): def skipChars(self, pos: int, code: int): """Skip char codes from given position.""" while pos < len(self.src): - if self.ords[pos] != code: + if self.srcCharCode[pos] != code: break pos += 1 return pos @@ -165,7 +167,7 @@ def skipCharsBack(self, pos, code, minimum): if pos <= minimum: return pos while pos > minimum: - if code != self.ords[pos]: + if code != self.srcCharCode[pos]: return pos + 1 pos -= 1 return pos @@ -188,7 +190,7 @@ def getLines(self, begin: int, end: int, indent, keepLastLF): last = self.eMarks[line] while (first < last) and (lineIndent < indent): - ch = self.ords[first] + ch = self.srcCharCode[first] if isSpace(ch): if ch == 0x09: lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index d931a3bd..32e7db7b 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -85,14 +85,14 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool): if pos >= state.eMarks[nextLine]: return False - ch = state.ords[pos] + ch = state.srcCharCode[pos] pos += 1 # /* | */ /* - */ /* : */ if ch != 0x7C and ch != 0x2D and ch != 0x3A: return False while pos < state.eMarks[nextLine]: - ch = state.ords[pos] + ch = state.srcCharCode[pos] # /* | */ /* - */ /* : */ if ch != 0x7C and ch != 0x2D and ch != 0x3A and not isSpace(ch): diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index 0a37ea72..b47a5d8f 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -11,4 +11,6 @@ def block(state: StateCore): token.children = [] state.tokens.append(token) else: - state.md.block.parse(state.src, state.md, state.env, state.tokens, state.ords) + state.md.block.parse( + state.src, state.md, state.env, state.tokens, state.srcCharCode + ) diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py index 059c2cbf..1a93bd86 100644 --- a/markdown_it/rules_core/state_core.py +++ b/markdown_it/rules_core/state_core.py @@ -7,7 +7,7 @@ class StateCore(StateBase): def __init__(self, src: str, md, env, tokens=None): self.src = src - self.ords = [ord(c) for c in src] if src is not None else [] + self.srcCharCode = [ord(c) for c in src] if src is not None else [] self.md = md # link to parser instance self.env = env self.tokens: List[Token] = tokens or [] diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index d1da5ff0..c1f8d0f1 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -13,7 +13,7 @@ def autolink(state: StateBase, silent: bool): pos = state.pos - if state.ords[pos] != 0x3C: # /* < */ + if state.srcCharCode[pos] != 0x3C: # /* < */ return False tail = state.src[pos:] diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 0b7d6590..b843c94e 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -9,7 +9,7 @@ def backtick(state: StateInline, silent: bool): pos = state.pos - ch = state.ords[pos] + ch = state.srcCharCode[pos] # /* ` */ if ch != 0x60: @@ -20,7 +20,7 @@ def backtick(state: StateInline, silent: bool): maximum = state.posMax # /* ` */ - while pos < maximum and (state.ords[pos] == 0x60): + while pos < maximum and (state.srcCharCode[pos] == 0x60): pos += 1 marker = state.src[start:pos] @@ -34,7 +34,7 @@ def backtick(state: StateInline, silent: bool): break matchEnd = matchStart + 1 # /* ` */ - while matchEnd < maximum and (state.ords[matchEnd] == 0x60): + while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): matchEnd += 1 if matchEnd - matchStart == len(marker): diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index bbae1345..ef32c8d9 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -7,7 +7,7 @@ def tokenize(state: StateInline, silent: bool): """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.ords[start] + marker = state.srcCharCode[start] if silent: return False diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index a6ebd7a4..753baf27 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -14,11 +14,11 @@ def entity(state: StateInline, silent: bool): pos = state.pos maximum = state.posMax - if state.ords[pos] != 0x26: # /* & */ + if state.srcCharCode[pos] != 0x26: # /* & */ return False if (pos + 1) < maximum: - ch = state.ords[pos + 1] + ch = state.srcCharCode[pos + 1] if ch == 0x23: # /* # */ match = DIGITAL_RE.search(state.src[pos:]) diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 3bc0e3b5..64d9a678 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -15,13 +15,13 @@ def escape(state: StateInline, silent: bool): maximum = state.posMax # /* \ */ - if state.ords[pos] != 0x5C: + if state.srcCharCode[pos] != 0x5C: return False pos += 1 if pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if ch < 256 and ESCAPED[ch] != 0: if not silent: @@ -36,7 +36,7 @@ def escape(state: StateInline, silent: bool): pos += 1 # skip leading whitespaces from next line while pos < maximum: - ch = state.ords[pos] + ch = state.srcCharCode[pos] if not isSpace(ch): break pos += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 9791f1bf..7333e370 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -18,11 +18,11 @@ def html_inline(state: StateInline, silent: bool): # Check start maximum = state.posMax - if state.ords[pos] != 0x3C or pos + 2 >= maximum: # /* < */ + if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */ return False # Quick fail on second char - ch = state.ords[pos + 1] + ch = state.srcCharCode[pos + 1] if ( ch != 0x21 and ch != 0x3F # /* ! */ diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index ddf9b7cc..df0aa4e5 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -14,10 +14,10 @@ def image(state: StateInline, silent: bool): max = state.posMax # /* ! */ - if state.ords[state.pos] != 0x21: + if state.srcCharCode[state.pos] != 0x21: return False # /* [ */ - if state.pos + 1 < state.posMax and state.ords[state.pos + 1] != 0x5B: + if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B: return False labelStart = state.pos + 2 @@ -29,7 +29,7 @@ def image(state: StateInline, silent: bool): pos = labelEnd + 1 # /* ( */ - if pos < max and state.ords[pos] == 0x28: + if pos < max and state.srcCharCode[pos] == 0x28: # # Inline link # @@ -38,7 +38,7 @@ def image(state: StateInline, silent: bool): # ^^ skipping these spaces pos += 1 while pos < max: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -61,7 +61,7 @@ def image(state: StateInline, silent: bool): # ^^ skipping these spaces start = pos while pos < max: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -76,7 +76,7 @@ def image(state: StateInline, silent: bool): # [link]( "title" ) # ^^ skipping these spaces while pos < max: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -84,7 +84,7 @@ def image(state: StateInline, silent: bool): title = "" # /* ) */ - if pos >= max or state.ords[pos] != 0x29: + if pos >= max or state.srcCharCode[pos] != 0x29: state.pos = oldPos return False @@ -98,7 +98,7 @@ def image(state: StateInline, silent: bool): return False # /* [ */ - if pos < max and state.ords[pos] == 0x5B: + if pos < max and state.srcCharCode[pos] == 0x5B: start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 7bf56f9d..cdacfe5c 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -14,7 +14,7 @@ def link(state: StateInline, silent: bool): start = state.pos parseReference = True - if state.ords[state.pos] != 0x5B: # /* [ */ + if state.srcCharCode[state.pos] != 0x5B: # /* [ */ return False labelStart = state.pos + 1 @@ -26,7 +26,7 @@ def link(state: StateInline, silent: bool): pos = labelEnd + 1 - if pos < maximum and state.ords[pos] == 0x28: # /* ( */ + if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */ # # Inline link # @@ -38,7 +38,7 @@ def link(state: StateInline, silent: bool): # ^^ skipping these spaces pos += 1 while pos < maximum: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -61,7 +61,7 @@ def link(state: StateInline, silent: bool): # ^^ skipping these spaces start = pos while pos < maximum: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 @@ -76,14 +76,14 @@ def link(state: StateInline, silent: bool): # [link]( "title" ) # ^^ skipping these spaces while pos < maximum: - code = state.ords[pos] + code = state.srcCharCode[pos] if not isSpace(code) and code != 0x0A: break pos += 1 else: title = "" - if pos >= maximum or state.ords[pos] != 0x29: # /* ) */ + if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */ # parsing a valid shortcut link failed, fallback to reference parseReference = True @@ -96,7 +96,7 @@ def link(state: StateInline, silent: bool): if "references" not in state.env: return False - if pos < maximum and state.ords[pos] == 0x5B: # /* [ */ + if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */ start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index f0894351..b4b8a67f 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -11,7 +11,7 @@ def newline(state: StateInline, silent: bool): pos = state.pos # /* \n */ - if state.ords[pos] != 0x0A: + if state.srcCharCode[pos] != 0x0A: return False pmax = len(state.pending) - 1 @@ -36,7 +36,7 @@ def newline(state: StateInline, silent: bool): pos += 1 # skip heading spaces for next line - while pos < maximum and isSpace(state.ords[pos]): + while pos < maximum and isSpace(state.srcCharCode[pos]): pos += 1 state.pos = pos diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index a96772ea..ff43d0ce 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -45,7 +45,7 @@ class Delimiter: class StateInline(StateBase): def __init__(self, src: str, md, env, outTokens: List[Token]): self.src = src - self.ords = [ord(c) for c in src] if src is not None else [] + self.srcCharCode = [ord(c) for c in src] if src is not None else [] self.env = env self.md = md self.tokens = outTokens @@ -123,18 +123,18 @@ def scanDelims(self, start, canSplitWord): left_flanking = True right_flanking = True maximum = self.posMax - marker = self.ords[start] + marker = self.srcCharCode[start] # treat beginning of the line as a whitespace - lastChar = self.ords[start - 1] if start > 0 else 0x20 + lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 - while pos < maximum and self.ords[pos] == marker: + while pos < maximum and self.srcCharCode[pos] == marker: pos += 1 count = pos - start # treat end of the line as a whitespace - nextChar = self.ords[pos] if pos < maximum else 0x20 + nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index f14edb86..ccb68877 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -6,7 +6,7 @@ def tokenize(state: StateInline, silent: bool): """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.ords[start] + marker = state.srcCharCode[start] if silent: return False diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index 1b2bdbb5..f36f069a 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -42,7 +42,7 @@ def isTerminatorChar(ch): def text(state: StateInline, silent: bool, **args): pos = state.pos posMax = state.posMax - while (pos < posMax) and not isTerminatorChar(state.ords[pos]): + while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): pos += 1 if pos == state.pos: From c2364d99439d59b6bc7b81f8cb22dca9c05c072f Mon Sep 17 00:00:00 2001 From: Remi Bois Date: Mon, 17 Aug 2020 19:02:51 +0200 Subject: [PATCH 6/6] Add some explanation for the use of srcCharCode attribute in the port.yaml file --- markdown_it/port.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 6a9e2927..84ec2f87 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -11,6 +11,10 @@ this is generally the main difference between the codes, because in python you can't do e.g. `for {i=1;i