|
3 | 3 | // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
|
5 | 5 | import '../ast.dart'; |
| 6 | +import '../charcode.dart'; |
6 | 7 | import '../inline_parser.dart'; |
7 | 8 | import '../util.dart'; |
8 | 9 | import 'inline_syntax.dart'; |
9 | 10 |
|
10 | | -/// Matches autolinks like `http://foo.com`. |
| 11 | +/// Matches autolinks like `http://foo.com` and `foo@bar.com`. |
11 | 12 | class AutolinkExtensionSyntax extends InlineSyntax { |
12 | | - /// Broken up parts of the autolink regex for reusability and readability |
13 | | -
|
14 | | - // Autolinks can only come at the beginning of a line, after whitespace, or |
15 | | - // any of the delimiting characters *, _, ~, and (. |
16 | | - static const start = r'(?:^|[\s*_~(>])'; |
17 | | - |
18 | | - // An extended url autolink will be recognized when one of the schemes |
19 | | - // http://, https://, or ftp://, followed by a valid domain |
20 | | - static const scheme = r'(?:(?:https?|ftp):\/\/|www\.)'; |
21 | | - |
22 | | - // A valid domain consists of alphanumeric characters, underscores (_), |
23 | | - // hyphens (-) and periods (.). There must be at least one period, and no |
24 | | - // underscores may be present in the last two segments of the domain. |
25 | | - static const domainPart = r'\w\-'; |
26 | | - static const domain = '[$domainPart][$domainPart.]+'; |
27 | | - |
28 | | - // A valid domain consists of alphanumeric characters, underscores (_), |
29 | | - // hyphens (-) and periods (.). |
30 | | - static const path = r'[^\s<]*'; |
31 | | - |
32 | | - // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not |
33 | | - // be considered part of the autolink |
34 | | - static const truncatingPunctuationPositive = '[?!.,:*_~]'; |
35 | | - |
36 | | - static final regExpTrailingPunc = RegExp('$truncatingPunctuationPositive*\$'); |
37 | | - static final regExpEndsWithColon = RegExp(r'\&[a-zA-Z0-9]+;$'); |
38 | | - static final regExpWhiteSpace = RegExp(r'\s'); |
39 | | - |
40 | | - AutolinkExtensionSyntax() : super('$start(($scheme)($domain)($path))'); |
| 13 | + static const _linkPattern = |
| 14 | + // Autolinks can only come at the beginning of a line, after whitespace, |
| 15 | + // or any of the delimiting characters *, _, ~, and (. |
| 16 | + r'(?<=^|[\s*_~(>])' |
| 17 | + |
| 18 | + // An extended url autolink will be recognised when one of the schemes |
| 19 | + // http://, or https://, followed by a valid domain. See |
| 20 | + // https://github.github.com/gfm/#extended-url-autolink. |
| 21 | + r'(?:(?:https?|ftp):\/\/|www\.)' |
| 22 | + |
| 23 | + // A valid domain consists of segments of alphanumeric characters, |
| 24 | + // underscores (_) and hyphens (-) separated by periods (.). There must |
| 25 | + // be at least one period, and no underscores may be present in the last |
| 26 | + // two segments of the domain. See |
| 27 | + // https://github.github.com/gfm/#valid-domain. |
| 28 | + r'(?:[-_a-z0-9]+\.)*(?:[-a-z0-9]+\.[-a-z0-9]+)' |
| 29 | + |
| 30 | + // After a valid domain, zero or more non-space non-< characters may |
| 31 | + // follow. |
| 32 | + r'[^\s<]*' |
| 33 | + |
| 34 | + // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will |
| 35 | + // not be considered part of the autolink, though they may be included in |
| 36 | + // the interior of the link. See |
| 37 | + // https://github.github.com/gfm/#extended-autolink-path-validation. |
| 38 | + '(?<![?!.,:*_~])'; |
| 39 | + |
| 40 | + // An extended email autolink, see |
| 41 | + // https://github.github.com/gfm/#extended-email-autolink. |
| 42 | + static const _emailPattern = |
| 43 | + r'[-_.+a-z0-9]+@(?:[-_a-z0-9]+\.)+[-_a-z0-9]*[a-z0-9](?![-_])'; |
| 44 | + |
| 45 | + AutolinkExtensionSyntax() |
| 46 | + : super( |
| 47 | + '($_linkPattern)|($_emailPattern)', |
| 48 | + caseSensitive: false, |
| 49 | + ); |
41 | 50 |
|
42 | 51 | @override |
43 | 52 | bool tryMatch(InlineParser parser, [int? startMatchPos]) { |
44 | | - return super.tryMatch(parser, parser.pos > 0 ? parser.pos - 1 : 0); |
| 53 | + startMatchPos ??= parser.pos; |
| 54 | + final startMatch = pattern.matchAsPrefix(parser.source, startMatchPos); |
| 55 | + if (startMatch == null) { |
| 56 | + return false; |
| 57 | + } |
| 58 | + parser.writeText(); |
| 59 | + return onMatch(parser, startMatch); |
45 | 60 | } |
46 | 61 |
|
47 | 62 | @override |
48 | 63 | bool onMatch(InlineParser parser, Match match) { |
49 | | - var url = match[1]!; |
50 | | - var href = url; |
51 | | - var matchLength = url.length; |
52 | | - |
53 | | - if (url[0] == '>' || url.startsWith(regExpWhiteSpace)) { |
54 | | - url = url.substring(1, url.length - 1); |
55 | | - href = href.substring(1, href.length - 1); |
56 | | - parser.pos++; |
57 | | - matchLength--; |
58 | | - } |
| 64 | + int consumeLength; |
59 | 65 |
|
60 | | - // Prevent accidental standard autolink matches |
61 | | - if (url.endsWith('>') && parser.source[parser.pos - 1] == '<') { |
62 | | - return false; |
| 66 | + final isEmailLink = match[2] != null; |
| 67 | + if (isEmailLink) { |
| 68 | + consumeLength = match.match.length; |
| 69 | + } else { |
| 70 | + consumeLength = _getConsumeLength(match.match); |
63 | 71 | } |
64 | 72 |
|
65 | | - // When an autolink ends in ), we scan the entire autolink for the total |
66 | | - // number of parentheses. If there is a greater number of closing |
67 | | - // parentheses than opening ones, we don’t consider the last character |
68 | | - // part of the autolink, in order to facilitate including an autolink |
69 | | - // inside a parenthesis: |
70 | | - // https://github.github.com/gfm/#example-600 |
71 | | - if (url.endsWith(')')) { |
72 | | - final opening = _countChars(url, '('); |
73 | | - final closing = _countChars(url, ')'); |
74 | | - |
75 | | - if (closing > opening) { |
76 | | - url = url.substring(0, url.length - 1); |
77 | | - href = href.substring(0, href.length - 1); |
78 | | - matchLength--; |
79 | | - } |
80 | | - } |
| 73 | + var text = match.match.substring(0, consumeLength); |
| 74 | + text = parser.encodeHtml ? escapeHtml(text) : text; |
81 | 75 |
|
82 | | - // Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will |
83 | | - // not be considered part of the autolink, though they may be included |
84 | | - // in the interior of the link: |
85 | | - // https://github.github.com/gfm/#example-599 |
86 | | - final trailingPunc = regExpTrailingPunc.firstMatch(url); |
87 | | - if (trailingPunc != null) { |
88 | | - final trailingLength = trailingPunc.match.length; |
89 | | - url = url.substring(0, url.length - trailingLength); |
90 | | - href = href.substring(0, href.length - trailingLength); |
91 | | - matchLength -= trailingLength; |
92 | | - } |
93 | | - |
94 | | - // If an autolink ends in a semicolon (;), we check to see if it appears |
95 | | - // to resemble an |
96 | | - // [entity reference](https://github.github.com/gfm/#entity-references); |
97 | | - // if the preceding text is & followed by one or more alphanumeric |
98 | | - // characters. If so, it is excluded from the autolink: |
99 | | - // https://github.github.com/gfm/#example-602 |
100 | | - if (url.endsWith(';')) { |
101 | | - final entityRef = regExpEndsWithColon.firstMatch(url); |
102 | | - if (entityRef != null) { |
103 | | - // Strip out HTML entity reference |
104 | | - final entityRefLength = entityRef.match.length; |
105 | | - url = url.substring(0, url.length - entityRefLength); |
106 | | - href = href.substring(0, href.length - entityRefLength); |
107 | | - matchLength -= entityRefLength; |
108 | | - } |
| 76 | + var destination = text; |
| 77 | + if (isEmailLink) { |
| 78 | + destination = 'mailto:$destination'; |
| 79 | + } else if (destination[0] == 'w') { |
| 80 | + // When there is no scheme specified, insert the scheme `http`. |
| 81 | + destination = 'http://$destination'; |
109 | 82 | } |
110 | 83 |
|
111 | | - // The scheme http will be inserted automatically |
112 | | - if (!href.startsWith('http://') && |
113 | | - !href.startsWith('https://') && |
114 | | - !href.startsWith('ftp://')) { |
115 | | - href = 'http://$href'; |
116 | | - } |
| 84 | + final anchor = Element.text('a', text) |
| 85 | + ..attributes['href'] = Uri.encodeFull(destination); |
117 | 86 |
|
118 | | - final text = parser.encodeHtml ? escapeHtml(url) : url; |
119 | | - final anchor = Element.text('a', text); |
120 | | - anchor.attributes['href'] = Uri.encodeFull(href); |
121 | | - parser.addNode(anchor); |
| 87 | + parser |
| 88 | + ..addNode(anchor) |
| 89 | + ..consume(consumeLength); |
122 | 90 |
|
123 | | - parser.consume(matchLength); |
124 | | - return false; |
| 91 | + return true; |
125 | 92 | } |
126 | 93 |
|
127 | | - int _countChars(String input, String char) { |
128 | | - var count = 0; |
129 | | - |
130 | | - for (var i = 0; i < input.length; i++) { |
131 | | - if (input[i] == char) count++; |
| 94 | + int _getConsumeLength(String text) { |
| 95 | + var excludedLength = 0; |
| 96 | + |
| 97 | + // When an autolink ends in `)`, see |
| 98 | + // https://github.github.com/gfm/#example-625. |
| 99 | + if (text.endsWith(')')) { |
| 100 | + final match = RegExp(r'(\(.*)?(\)+)$').firstMatch(text)!; |
| 101 | + |
| 102 | + if (match[1] == null) { |
| 103 | + excludedLength = match[2]!.length; |
| 104 | + } else { |
| 105 | + var parenCount = 0; |
| 106 | + for (var i = 0; i < text.length; i++) { |
| 107 | + final char = text.codeUnitAt(i); |
| 108 | + if (char == $lparen) { |
| 109 | + parenCount++; |
| 110 | + } else if (char == $rparen) { |
| 111 | + parenCount--; |
| 112 | + } |
| 113 | + } |
| 114 | + if (parenCount < 0) { |
| 115 | + excludedLength = parenCount.abs(); |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + // If an autolink ends in a semicolon `;`, see |
| 120 | + // https://github.github.com/gfm/#example-627 |
| 121 | + else if (text.endsWith(';')) { |
| 122 | + final match = RegExp(r'&[0-9a-z]+;$').firstMatch(text); |
| 123 | + if (match != null) { |
| 124 | + excludedLength = match.match.length; |
| 125 | + } |
132 | 126 | } |
133 | 127 |
|
134 | | - return count; |
| 128 | + return text.length - excludedLength; |
135 | 129 | } |
136 | 130 | } |
0 commit comments