diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py index 7f141bb0..d150ca36 100644 --- a/markdown_it/common/normalize_url.py +++ b/markdown_it/common/normalize_url.py @@ -67,7 +67,7 @@ def unescape_normalize_uri(x: str) -> str: return normalize_uri(unescape_string(x)) -def normalizeLink(url): +def normalizeLink(url: str) -> str: """Normalize destination URLs in links :: @@ -117,7 +117,7 @@ def unescape_unquote(x: str) -> str: return unquote(unescape_string(x)) -def normalizeLinkText(link): +def normalizeLinkText(link: str) -> str: """Normalize autolink content :: @@ -172,9 +172,6 @@ def validateLink(url: str, validator: Optional[Callable] = None) -> bool: This validator can prohibit more than really needed to prevent XSS. It's a tradeoff to keep code simple and to be secure by default. - If you need different setup - override validator method as you wish. - Or replace it with dummy function and use external sanitizer. - Note: url should be normalized at this point, and existing entities decoded. """ if validator is not None: diff --git a/markdown_it/main.py b/markdown_it/main.py index 58403aa4..b96d52b1 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -12,7 +12,7 @@ ) from . import helpers, presets # noqa F401 -from .common import utils # noqa F401 +from .common import normalize_url, utils # noqa F401 from .token import Token from .parser_core import ParserCore # noqa F401 from .parser_block import ParserBlock # noqa F401 @@ -288,3 +288,35 @@ def renderInline(self, src: str, env: Optional[AttrDict] = None) -> Any: """ env = AttrDict() if env is None else env return self.renderer.render(self.parseInline(src, env), self.options, env) + + # link methods + + def validateLink(self, url: str) -> bool: + """Validate if the URL link is allowed in output. + + This validator can prohibit more than really needed to prevent XSS. + It's a tradeoff to keep code simple and to be secure by default. + + Note: the url should be normalized at this point, and existing entities decoded. + """ + return normalize_url.validateLink(url) + + def normalizeLink(self, url: str) -> str: + """Normalize destination URLs in links + + :: + + [label]: destination 'title' + ^^^^^^^^^^^ + """ + return normalize_url.normalizeLink(url) + + def normalizeLinkText(self, link: str) -> str: + """Normalize autolink content + + :: + + + ~~~~~~~~~~~ + """ + return normalize_url.normalizeLinkText(link) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index 57338aea..5cfb82a7 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -19,16 +19,6 @@ Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl) and [punycode](https://www.npmjs.com/package/punycode) - - | - Remove indirect references within `MarkdownIt`; - - self.validateLink = validateLink - self.normalizeLink = normalizeLink - self.normalizeLinkText = normalizeLinkText - - in favour of using them directly through: - - from markdown_it.common.normalize_url import normalizeLinkText - | In markdown_it/rules_block/reference.py, record line range in state.env["references"] and add state.env["duplicate_refs"] diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index d976c1e3..882118a1 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -1,7 +1,6 @@ import logging from ..common.utils import isSpace, normalizeReference, charCodeAt -from ..common.normalize_url import normalizeLink, validateLink from ..utils import AttrDict from .state_block import StateBlock @@ -115,8 +114,8 @@ def reference(state: StateBlock, startLine, _endLine, silent): if not res.ok: return False - href = normalizeLink(res.str) - if not validateLink(href): + href = state.md.normalizeLink(res.str) + if not state.md.validateLink(href): return False pos = res.pos diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index b8bf2e1a..69ad0c4d 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -1,15 +1,18 @@ -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, TYPE_CHECKING from ..token import Token from ..ruler import StateBase from ..common.utils import isSpace +if TYPE_CHECKING: + from markdown_it.main import MarkdownIt + class StateBlock(StateBase): def __init__( self, src: str, - md, + md: "MarkdownIt", env, tokens: List[Token], srcCharCode: Optional[Tuple[int, ...]] = None, diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py index eac7aa65..47e726df 100644 --- a/markdown_it/rules_core/linkify.py +++ b/markdown_it/rules_core/linkify.py @@ -1,7 +1,6 @@ import re from ..common.utils import arrayReplaceAt -from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink from .state_core import StateCore from ..token import Token @@ -82,8 +81,8 @@ def linkify(state: StateCore) -> None: for ln in range(len(links)): url = links[ln].url - fullUrl = normalizeLink(url) - if not validateLink(fullUrl): + fullUrl = state.md.normalizeLink(url) + if not state.md.validateLink(fullUrl): continue urlText = links[ln].text @@ -93,16 +92,16 @@ def linkify(state: StateCore) -> None: # and remove it afterwards. if not links[ln].schema: urlText = HTTP_RE.sub( - "", normalizeLinkText("http://" + urlText) + "", state.md.normalizeLinkText("http://" + urlText) ) elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search( urlText ): urlText = MAILTO_RE.sub( - "", normalizeLinkText("mailto:" + urlText) + "", state.md.normalizeLinkText("mailto:" + urlText) ) else: - urlText = normalizeLinkText(urlText) + urlText = state.md.normalizeLinkText(urlText) pos = links[ln].index diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index b07396e2..087c50ce 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -1,7 +1,6 @@ # Process autolinks '' import re from .state_inline import StateInline -from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink EMAIL_RE = re.compile( r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501 @@ -25,8 +24,8 @@ def autolink(state: StateInline, silent: bool) -> bool: if linkMatch is not None: url = linkMatch.group(0)[1:-1] - fullUrl = normalizeLink(url) - if not validateLink(fullUrl): + fullUrl = state.md.normalizeLink(url) + if not state.md.validateLink(fullUrl): return False if not silent: @@ -36,7 +35,7 @@ def autolink(state: StateInline, silent: bool) -> bool: token.info = "auto" token = state.push("text", "", 0) - token.content = normalizeLinkText(url) + token.content = state.md.normalizeLinkText(url) token = state.push("link_close", "a", -1) token.markup = "autolink" @@ -49,8 +48,8 @@ def autolink(state: StateInline, silent: bool) -> bool: if emailMatch is not None: url = emailMatch.group(0)[1:-1] - fullUrl = normalizeLink("mailto:" + url) - if not validateLink(fullUrl): + fullUrl = state.md.normalizeLink("mailto:" + url) + if not state.md.validateLink(fullUrl): return False if not silent: @@ -60,7 +59,7 @@ def autolink(state: StateInline, silent: bool) -> bool: token.info = "auto" token = state.push("text", "", 0) - token.content = normalizeLinkText(url) + token.content = state.md.normalizeLinkText(url) token = state.push("link_close", "a", -1) token.markup = "autolink" diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index fc23a69d..d33d8cb1 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -5,7 +5,6 @@ from .state_inline import StateInline from ..token import Token from ..common.utils import isSpace, normalizeReference -from ..common.normalize_url import normalizeLink, validateLink def image(state: StateInline, silent: bool): @@ -53,8 +52,8 @@ def image(state: StateInline, silent: bool): start = pos res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax) if res.ok: - href = normalizeLink(res.str) - if validateLink(href): + href = state.md.normalizeLink(res.str) + if state.md.validateLink(href): pos = res.pos else: href = "" diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index 4c0e8575..8dbe0bd0 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,7 +1,6 @@ # Process [link]( "stuff") from ..common.utils import normalizeReference, isSpace -from ..common.normalize_url import normalizeLink, validateLink from .state_inline import StateInline @@ -51,8 +50,8 @@ def link(state: StateInline, silent: bool): start = pos res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax) if res.ok: - href = normalizeLink(res.str) - if validateLink(href): + href = state.md.normalizeLink(res.str) + if state.md.validateLink(href): pos = res.pos else: href = ""