Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions markdown_it/common/normalize_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def unescape_normalize_uri(x: str) -> str:
return normalize_uri(unescape_string(x))


def normalizeLink(url):
def normalizeLink(url: str) -> str:
"""Normalize destination URLs in links

::
Expand Down Expand Up @@ -117,7 +117,7 @@ def unescape_unquote(x: str) -> str:
return unquote(unescape_string(x))


def normalizeLinkText(link):
def normalizeLinkText(link: str) -> str:
"""Normalize autolink content

::
Expand Down Expand Up @@ -172,9 +172,6 @@ def validateLink(url: str, validator: Optional[Callable] = None) -> bool:
This validator can prohibit more than really needed to prevent XSS.
It's a tradeoff to keep code simple and to be secure by default.

If you need different setup - override validator method as you wish.
Or replace it with dummy function and use external sanitizer.

Note: url should be normalized at this point, and existing entities decoded.
"""
if validator is not None:
Expand Down
34 changes: 33 additions & 1 deletion markdown_it/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)

from . import helpers, presets # noqa F401
from .common import utils # noqa F401
from .common import normalize_url, utils # noqa F401
from .token import Token
from .parser_core import ParserCore # noqa F401
from .parser_block import ParserBlock # noqa F401
Expand Down Expand Up @@ -288,3 +288,35 @@ def renderInline(self, src: str, env: Optional[AttrDict] = None) -> Any:
"""
env = AttrDict() if env is None else env
return self.renderer.render(self.parseInline(src, env), self.options, env)

# link methods

def validateLink(self, url: str) -> bool:
"""Validate if the URL link is allowed in output.

This validator can prohibit more than really needed to prevent XSS.
It's a tradeoff to keep code simple and to be secure by default.

Note: the url should be normalized at this point, and existing entities decoded.
"""
return normalize_url.validateLink(url)

def normalizeLink(self, url: str) -> str:
"""Normalize destination URLs in links

::

[label]: destination 'title'
^^^^^^^^^^^
"""
return normalize_url.normalizeLink(url)

def normalizeLinkText(self, link: str) -> str:
"""Normalize autolink content

::

<destination>
~~~~~~~~~~~
"""
return normalize_url.normalizeLinkText(link)
10 changes: 0 additions & 10 deletions markdown_it/port.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@
Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for
the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl)
and [punycode](https://www.npmjs.com/package/punycode)
- |
Remove indirect references within `MarkdownIt`;

self.validateLink = validateLink
self.normalizeLink = normalizeLink
self.normalizeLinkText = normalizeLinkText

in favour of using them directly through:

from markdown_it.common.normalize_url import normalizeLinkText
- |
In markdown_it/rules_block/reference.py,
record line range in state.env["references"] and add state.env["duplicate_refs"]
Expand Down
5 changes: 2 additions & 3 deletions markdown_it/rules_block/reference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging

from ..common.utils import isSpace, normalizeReference, charCodeAt
from ..common.normalize_url import normalizeLink, validateLink
from ..utils import AttrDict
from .state_block import StateBlock

Expand Down Expand Up @@ -115,8 +114,8 @@ def reference(state: StateBlock, startLine, _endLine, silent):
if not res.ok:
return False

href = normalizeLink(res.str)
if not validateLink(href):
href = state.md.normalizeLink(res.str)
if not state.md.validateLink(href):
return False

pos = res.pos
Expand Down
7 changes: 5 additions & 2 deletions markdown_it/rules_block/state_block.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, TYPE_CHECKING

from ..token import Token
from ..ruler import StateBase
from ..common.utils import isSpace

if TYPE_CHECKING:
from markdown_it.main import MarkdownIt


class StateBlock(StateBase):
def __init__(
self,
src: str,
md,
md: "MarkdownIt",
env,
tokens: List[Token],
srcCharCode: Optional[Tuple[int, ...]] = None,
Expand Down
11 changes: 5 additions & 6 deletions markdown_it/rules_core/linkify.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re

from ..common.utils import arrayReplaceAt
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink
from .state_core import StateCore
from ..token import Token

Expand Down Expand Up @@ -82,8 +81,8 @@ def linkify(state: StateCore) -> None:

for ln in range(len(links)):
url = links[ln].url
fullUrl = normalizeLink(url)
if not validateLink(fullUrl):
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
continue

urlText = links[ln].text
Expand All @@ -93,16 +92,16 @@ def linkify(state: StateCore) -> None:
# and remove it afterwards.
if not links[ln].schema:
urlText = HTTP_RE.sub(
"", normalizeLinkText("http://" + urlText)
"", state.md.normalizeLinkText("http://" + urlText)
)
elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
urlText
):
urlText = MAILTO_RE.sub(
"", normalizeLinkText("mailto:" + urlText)
"", state.md.normalizeLinkText("mailto:" + urlText)
)
else:
urlText = normalizeLinkText(urlText)
urlText = state.md.normalizeLinkText(urlText)

pos = links[ln].index

Expand Down
13 changes: 6 additions & 7 deletions markdown_it/rules_inline/autolink.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Process autolinks '<protocol:...>'
import re
from .state_inline import StateInline
from ..common.normalize_url import normalizeLinkText, normalizeLink, validateLink

EMAIL_RE = re.compile(
r"^<([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>" # noqa: E501
Expand All @@ -25,8 +24,8 @@ def autolink(state: StateInline, silent: bool) -> bool:
if linkMatch is not None:

url = linkMatch.group(0)[1:-1]
fullUrl = normalizeLink(url)
if not validateLink(fullUrl):
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
return False

if not silent:
Expand All @@ -36,7 +35,7 @@ def autolink(state: StateInline, silent: bool) -> bool:
token.info = "auto"

token = state.push("text", "", 0)
token.content = normalizeLinkText(url)
token.content = state.md.normalizeLinkText(url)

token = state.push("link_close", "a", -1)
token.markup = "autolink"
Expand All @@ -49,8 +48,8 @@ def autolink(state: StateInline, silent: bool) -> bool:
if emailMatch is not None:

url = emailMatch.group(0)[1:-1]
fullUrl = normalizeLink("mailto:" + url)
if not validateLink(fullUrl):
fullUrl = state.md.normalizeLink("mailto:" + url)
if not state.md.validateLink(fullUrl):
return False

if not silent:
Expand All @@ -60,7 +59,7 @@ def autolink(state: StateInline, silent: bool) -> bool:
token.info = "auto"

token = state.push("text", "", 0)
token.content = normalizeLinkText(url)
token.content = state.md.normalizeLinkText(url)

token = state.push("link_close", "a", -1)
token.markup = "autolink"
Expand Down
5 changes: 2 additions & 3 deletions markdown_it/rules_inline/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from .state_inline import StateInline
from ..token import Token
from ..common.utils import isSpace, normalizeReference
from ..common.normalize_url import normalizeLink, validateLink


def image(state: StateInline, silent: bool):
Expand Down Expand Up @@ -53,8 +52,8 @@ def image(state: StateInline, silent: bool):
start = pos
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
if res.ok:
href = normalizeLink(res.str)
if validateLink(href):
href = state.md.normalizeLink(res.str)
if state.md.validateLink(href):
pos = res.pos
else:
href = ""
Expand Down
5 changes: 2 additions & 3 deletions markdown_it/rules_inline/link.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Process [link](<to> "stuff")

from ..common.utils import normalizeReference, isSpace
from ..common.normalize_url import normalizeLink, validateLink
from .state_inline import StateInline


Expand Down Expand Up @@ -51,8 +50,8 @@ def link(state: StateInline, silent: bool):
start = pos
res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
if res.ok:
href = normalizeLink(res.str)
if validateLink(href):
href = state.md.normalizeLink(res.str)
if state.md.validateLink(href):
pos = res.pos
else:
href = ""
Expand Down