diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py
index 6bf9a36f..0d11e3e3 100644
--- a/markdown_it/common/utils.py
+++ b/markdown_it/common/utils.py
@@ -304,3 +304,15 @@ def normalizeReference(string: str) -> str:
# most notably, `__proto__`)
#
return string.lower().upper()
+
+
+LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE)
+LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE)
+
+
+def isLinkOpen(string: str) -> bool:
+ return bool(LINK_OPEN_RE.search(string))
+
+
+def isLinkClose(string: str) -> bool:
+ return bool(LINK_CLOSE_RE.search(string))
diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
index 88140d3d..febe4e6e 100644
--- a/markdown_it/parser_inline.py
+++ b/markdown_it/parser_inline.py
@@ -16,6 +16,7 @@
# Parser rules
_rules: list[tuple[str, RuleFunc]] = [
("text", rules_inline.text),
+ ("linkify", rules_inline.linkify),
("newline", rules_inline.newline),
("escape", rules_inline.escape),
("backticks", rules_inline.backtick),
diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py
index f1cb0507..1e6796a2 100644
--- a/markdown_it/presets/__init__.py
+++ b/markdown_it/presets/__init__.py
@@ -21,7 +21,7 @@ def make() -> PresetType:
config = commonmark.make()
config["components"]["core"]["rules"].append("linkify")
config["components"]["block"]["rules"].append("table")
- config["components"]["inline"]["rules"].append("strikethrough")
+ config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"])
config["components"]["inline"]["rules2"].append("strikethrough")
config["options"]["linkify"] = True
config["options"]["html"] = True
diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py
index 49bb4ef3..efbc9d4c 100644
--- a/markdown_it/rules_core/linkify.py
+++ b/markdown_it/rules_core/linkify.py
@@ -1,41 +1,32 @@
+from __future__ import annotations
+
import re
+from typing import Protocol
-from ..common.utils import arrayReplaceAt
+from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
from ..token import Token
from .state_core import StateCore
-LINK_OPEN_RE = re.compile(r"^\s]", flags=re.IGNORECASE)
-LINK_CLOSE_RE = re.compile(r"^", flags=re.IGNORECASE)
-
HTTP_RE = re.compile(r"^http://")
MAILTO_RE = re.compile(r"^mailto:")
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
-def isLinkOpen(string: str) -> bool:
- return bool(LINK_OPEN_RE.search(string))
-
-
-def isLinkClose(string: str) -> bool:
- return bool(LINK_CLOSE_RE.search(string))
-
-
def linkify(state: StateCore) -> None:
- blockTokens = state.tokens
-
+ """Rule for identifying plain-text links."""
if not state.md.options.linkify:
return
if not state.md.linkify:
raise ModuleNotFoundError("Linkify enabled but not installed.")
- for j in range(len(blockTokens)):
- if blockTokens[j].type != "inline" or not state.md.linkify.pretest(
- blockTokens[j].content
+ for inline_token in state.tokens:
+ if inline_token.type != "inline" or not state.md.linkify.pretest(
+ inline_token.content
):
continue
- tokens = blockTokens[j].children
+ tokens = inline_token.children
htmlLinkLevel = 0
@@ -71,38 +62,47 @@ def linkify(state: StateCore) -> None:
currentToken.content
):
text = currentToken.content
- links = state.md.linkify.match(text)
+ links: list[_LinkType] = state.md.linkify.match(text) or []
# Now split string to nodes
nodes = []
level = currentToken.level
lastPos = 0
- for ln in range(len(links)):
- url = links[ln].url
+ # forbid escape sequence at the start of the string,
+ # this avoids http\://example.com/ from being linkified as
+ # http://example.com/
+ if (
+ links
+ and links[0].index == 0
+ and i > 0
+ and tokens[i - 1].type == "text_special"
+ ):
+ links = links[1:]
+
+ for link in links:
+ url = link.url
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
continue
- urlText = links[ln].text
+ urlText = link.text
# Linkifier might send raw hostnames like "example.com", where url
# starts with domain name. So we prepend http:// in those cases,
# and remove it afterwards.
- if not links[ln].schema:
+ if not link.schema:
urlText = HTTP_RE.sub(
"", state.md.normalizeLinkText("http://" + urlText)
)
- elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
- urlText
- ):
+ elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
urlText = MAILTO_RE.sub(
"", state.md.normalizeLinkText("mailto:" + urlText)
)
else:
urlText = state.md.normalizeLinkText(urlText)
- pos = links[ln].index
+ pos = link.index
if pos > lastPos:
token = Token("text", "", 0)
@@ -130,7 +130,7 @@ def linkify(state: StateCore) -> None:
token.info = "auto"
nodes.append(token)
- lastPos = links[ln].last_index
+ lastPos = link.last_index
if lastPos < len(text):
token = Token("text", "", 0)
@@ -138,4 +138,12 @@ def linkify(state: StateCore) -> None:
token.level = level
nodes.append(token)
- blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes)
+ inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)
+
+
+class _LinkType(Protocol):
+ url: str
+ text: str
+ index: int
+ last_index: int
+ schema: str | None
diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py
index dde97d34..3a8026ec 100644
--- a/markdown_it/rules_inline/__init__.py
+++ b/markdown_it/rules_inline/__init__.py
@@ -3,6 +3,7 @@
"text",
"fragments_join",
"link_pairs",
+ "linkify",
"escape",
"newline",
"backtick",
@@ -24,6 +25,7 @@
from .html_inline import html_inline
from .image import image
from .link import link
+from .linkify import linkify
from .newline import newline
from .state_inline import StateInline
from .text import text
diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py
index 3c8b5331..9065e1d0 100644
--- a/markdown_it/rules_inline/html_inline.py
+++ b/markdown_it/rules_inline/html_inline.py
@@ -1,5 +1,6 @@
# Process html tags
from ..common.html_re import HTML_TAG_RE
+from ..common.utils import isLinkClose, isLinkOpen
from .state_inline import StateInline
@@ -33,5 +34,10 @@ def html_inline(state: StateInline, silent: bool) -> bool:
token = state.push("html_inline", "", 0)
token.content = state.src[pos : pos + len(match.group(0))]
+ if isLinkOpen(token.content):
+ state.linkLevel += 1
+ if isLinkClose(token.content):
+ state.linkLevel -= 1
+
state.pos += len(match.group(0))
return True
diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py
index 18c0736c..78cf9122 100644
--- a/markdown_it/rules_inline/link.py
+++ b/markdown_it/rules_inline/link.py
@@ -140,7 +140,9 @@ def link(state: StateInline, silent: bool) -> bool:
if label and state.md.options.get("store_labels", False):
token.meta["label"] = label
+ state.linkLevel += 1
state.md.inline.tokenize(state)
+ state.linkLevel -= 1
token = state.push("link_close", "a", -1)
diff --git a/markdown_it/rules_inline/linkify.py b/markdown_it/rules_inline/linkify.py
new file mode 100644
index 00000000..a8a18153
--- /dev/null
+++ b/markdown_it/rules_inline/linkify.py
@@ -0,0 +1,61 @@
+"""Process links like https://example.org/"""
+import re
+
+from .state_inline import StateInline
+
+# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE)
+
+
+def linkify(state: StateInline, silent: bool) -> bool:
+ """Rule for identifying plain-text links."""
+ if not state.md.options.linkify:
+ return False
+ if state.linkLevel > 0:
+ return False
+ if not state.md.linkify:
+ raise ModuleNotFoundError("Linkify enabled but not installed.")
+
+ pos = state.pos
+ maximum = state.posMax
+
+ if (
+ (pos + 3) > maximum
+ or state.src[pos] != ":"
+ or state.src[pos + 1] != "/"
+ or state.src[pos + 2] != "/"
+ ):
+ return False
+
+ if not (match := SCHEME_RE.match(state.pending)):
+ return False
+
+ proto = match.group(1)
+ if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])):
+ return False
+ url: str = link.url
+
+ # disallow '*' at the end of the link (conflicts with emphasis)
+ url = url.rstrip("*")
+
+ full_url = state.md.normalizeLink(url)
+ if not state.md.validateLink(full_url):
+ return False
+
+ if not silent:
+ state.pending = state.pending[: -len(proto)]
+
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": full_url}
+ token.markup = "linkify"
+ token.info = "auto"
+
+ token = state.push("text", "", 0)
+ token.content = state.md.normalizeLinkText(url)
+
+ token = state.push("link_close", "a", -1)
+ token.markup = "linkify"
+ token.info = "auto"
+
+ state.pos += len(url) - len(proto)
+ return True
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
index 143ab33e..c0c491c4 100644
--- a/markdown_it/rules_inline/state_inline.py
+++ b/markdown_it/rules_inline/state_inline.py
@@ -70,6 +70,10 @@ def __init__(
self.backticks: dict[int, int] = {}
self.backticksScanned = False
+ # Counter used to disable inline linkify-it execution
+ # inside and markdown links
+ self.linkLevel = 0
+
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py
index 64a2bbe8..178d717e 100644
--- a/tests/test_api/test_main.py
+++ b/tests/test_api/test_main.py
@@ -30,6 +30,7 @@ def test_get_rules():
],
"inline": [
"text",
+ "linkify",
"newline",
"escape",
"backticks",
diff --git a/tests/test_port/fixtures/linkify.md b/tests/test_port/fixtures/linkify.md
index c9755c03..f51bb6b9 100644
--- a/tests/test_port/fixtures/linkify.md
+++ b/tests/test_port/fixtures/linkify.md
@@ -29,6 +29,84 @@ don't touch text in html tags
.
+entities inside raw links
+.
+https://example.com/foo&bar
+.
+ https://example.com/foo&bar https://example.com/foo*bar*baz http://example.org/foo._bar_-_baz https://example.com/foo`bar`baz https://example.com/foo[123](456)bar \https://example.com https://example.com https://aa.org https://bb.org https://cc.org http:/example.com/