Skip to content
This repository was archived by the owner on Feb 24, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 90 additions & 50 deletions lib/src/block_syntaxes/fenced_code_block_syntax.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import '../ast.dart';
import '../block_parser.dart';
import '../charcode.dart';
import '../patterns.dart';
import '../util.dart';
import 'block_syntax.dart';
Expand All @@ -19,77 +18,118 @@ class FencedCodeBlockSyntax extends BlockSyntax {
const FencedCodeBlockSyntax();

@override
bool canParse(BlockParser parser) {
final match = pattern.firstMatch(parser.current);
if (match == null) return false;
final codeFence = match.group(1)!;
final infoString = match.group(2);
// From the CommonMark spec:
//
// > If the info string comes after a backtick fence, it may not contain
// > any backtick characters.
return codeFence.codeUnitAt(0) != $backquote ||
!infoString!.codeUnits.contains($backquote);
Node parse(BlockParser parser) {
final openingFence = _FenceMatch.fromMatch(pattern.firstMatch(
escapePunctuation(parser.current),
)!);

var text = parseChildLines(
parser,
openingFence.marker,
openingFence.indent,
).join('\n');

if (parser.document.encodeHtml) {
text = escapeHtml(text);
}
if (text.isNotEmpty) {
text = '$text\n';
}

final code = Element.text('code', text);
if (openingFence.hasLanguage) {
var language = decodeHtmlCharacters(openingFence.language);
if (parser.document.encodeHtml) {
language = escapeHtmlAttribute(language);
}
code.attributes['class'] = 'language-$language';
}

return Element('pre', [code]);
}

@override
List<String> parseChildLines(BlockParser parser, [String? endBlock]) {
endBlock ??= '';
String _removeIndentation(String content, int length) {
final text = content.replaceFirst(RegExp('^\\s{0,$length}'), '');
return content.substring(content.length - text.length);
}

@override
List<String> parseChildLines(
BlockParser parser, [
String openingMarker = '',
int indent = 0,
]) {
final childLines = <String>[];

parser.advance();

_FenceMatch? closingFence;
while (!parser.isDone) {
final match = pattern.firstMatch(parser.current);
if (match == null || !match[1]!.startsWith(endBlock)) {
childLines.add(parser.current);
closingFence = match == null ? null : _FenceMatch.fromMatch(match);

// Closing code fences cannot have info strings:
// https://spec.commonmark.org/0.30/#example-147
if (closingFence == null ||
!closingFence.marker.startsWith(openingMarker) ||
closingFence.hasInfo) {
childLines.add(_removeIndentation(parser.current, indent));
parser.advance();
} else {
parser.advance();
break;
}
}

// https://spec.commonmark.org/0.30/#example-127
// https://spec.commonmark.org/0.30/#example-128
if (closingFence == null && childLines.last.trim().isEmpty) {
childLines.removeLast();
}

return childLines;
}
}

@override
Node parse(BlockParser parser) {
// Get the syntax identifier, if there is one.
final match = pattern.firstMatch(parser.current)!;
final endBlock = match.group(1);
var infoString = match.group(2)!;

final childLines = parseChildLines(parser, endBlock);
class _FenceMatch {
_FenceMatch._({
required this.indent,
required this.marker,
required this.info,
});

// The Markdown tests expect a trailing newline.
childLines.add('');
factory _FenceMatch.fromMatch(RegExpMatch match) {
String marker;
String info;

var text = childLines.join('\n');
if (parser.document.encodeHtml) {
text = escapeHtml(text);
if (match.namedGroup('backtick') != null) {
marker = match.namedGroup('backtick')!;
info = match.namedGroup('backtickInfo')!;
} else {
marker = match.namedGroup('tilde')!;
info = match.namedGroup('tildeInfo')!;
}
final code = Element.text('code', text);

// the info-string should be trimmed
// http://spec.commonmark.org/0.22/#example-100
infoString = infoString.trim();
if (infoString.isNotEmpty) {
// only use the first word in the syntax
// http://spec.commonmark.org/0.22/#example-100
final firstSpace = infoString.indexOf(' ');
if (firstSpace >= 0) {
infoString = infoString.substring(0, firstSpace);
}
infoString = decodeHtmlCharacters(infoString);
if (parser.document.encodeHtml) {
infoString = escapeHtmlAttribute(infoString);
}
code.attributes['class'] = 'language-$infoString';
}
return _FenceMatch._(
indent: match[1]!.length,
marker: marker,
info: info.trim(),
);
}

final element = Element('pre', [code]);
final int indent;
final String marker;

return element;
}
// The info-string should be trimmed,
// https://spec.commonmark.org/0.30/#info-string.
final String info;

// The first word of the info string is typically used to specify the language
// of the code sample,
// https://spec.commonmark.org/0.30/#example-143.
String get language => info.split(' ').first;

bool get hasInfo => info.isNotEmpty;

bool get hasLanguage => language.isNotEmpty;
}
8 changes: 7 additions & 1 deletion lib/src/patterns.dart
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ final blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
final indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');

/// Fenced code block.
final codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
final codeFencePattern = RegExp(
r'^([ ]{0,3})(?:(?<backtick>`{3,})(?<backtickInfo>[^`]*)|(?<tilde>~{3,})(?<tildeInfo>.*))$',
);

/// Fenced blockquotes.
final blockquoteFencePattern = RegExp(r'^>{3}\s*$');
Expand Down Expand Up @@ -199,6 +201,10 @@ final htmlBlockPattern = RegExp(
'(?<condition_7>(?:$namedTagDefinition)\\s*\$))',
caseSensitive: false);

/// ASCII punctuation characters.
// see https://spec.commonmark.org/0.30/#unicode-whitespace-character.
const asciiPunctuationCharacters = r'''!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~''';

/// A pattern to match HTML entity references and numeric character references.
// https://spec.commonmark.org/0.30/#entity-and-numeric-character-references
final htmlCharactersPattern = RegExp(
Expand Down
17 changes: 17 additions & 0 deletions lib/src/util.dart
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,20 @@ extension MatchExtensions on Match {
/// Returns the whole match String
String get match => this[0]!;
}

/// Escapes the ASCII punctuation characters after backslash(`\`).
String escapePunctuation(String input) {
final buffer = StringBuffer();

for (var i = 0; i < input.length; i++) {
if (input.codeUnitAt(i) == $backslash) {
final next = i + 1 < input.length ? input[i + 1] : null;
if (next != null && asciiPunctuationCharacters.contains(next)) {
i++;
}
}
buffer.write(input[i]);
}

return buffer.toString();
}
2 changes: 1 addition & 1 deletion test/common_mark/backslash_escapes.unit
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ bar</p>
foo
```
<<<
<pre><code class="language-foo\+bar">foo
<pre><code class="language-foo+bar">foo
</code></pre>
3 changes: 1 addition & 2 deletions test/common_mark/block_quotes.unit
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ foo
<pre><code>foo
</code></pre>
</blockquote>
<pre><code>
</code></pre>
<pre><code></code></pre>
>>> Block quotes - 238
> foo
- bar
Expand Down
19 changes: 7 additions & 12 deletions test/common_mark/fenced_code_blocks.unit
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ aaa
>>> Fenced code blocks - 126
```
<<<
<pre><code>
</code></pre>
<pre><code></code></pre>
>>> Fenced code blocks - 127
`````

Expand All @@ -72,7 +71,6 @@ aaa
<pre><code>
```
aaa

</code></pre>
>>> Fenced code blocks - 128
> ```
Expand Down Expand Up @@ -105,7 +103,7 @@ bbb
aaa
```
<<<
<pre><code> aaa
<pre><code>aaa
aaa
</code></pre>
>>> Fenced code blocks - 132
Expand All @@ -116,7 +114,7 @@ aaa
```
<<<
<pre><code>aaa
aaa
aaa
aaa
</code></pre>
>>> Fenced code blocks - 133
Expand All @@ -126,9 +124,9 @@ aaa
aaa
```
<<<
<pre><code> aaa
aaa
aaa
<pre><code>aaa
aaa
aaa
</code></pre>
>>> Fenced code blocks - 134
```
Expand Down Expand Up @@ -160,7 +158,6 @@ aaa
<<<
<pre><code>aaa
```

</code></pre>
>>> Fenced code blocks - 138
``` ```
Expand All @@ -175,7 +172,6 @@ aaa
<<<
<pre><code>aaa
~~~ ~~

</code></pre>
>>> Fenced code blocks - 140
foo
Expand Down Expand Up @@ -245,6 +241,5 @@ foo
``` aaa
```
<<<
<pre><code></code></pre>
<pre><code>
<pre><code>``` aaa
</code></pre>
1 change: 0 additions & 1 deletion test/common_mark/lists.unit
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ The number of windows in my house is
</li>
</ul>
<pre><code>- c

</code></pre>
>>> Lists - 319
- a
Expand Down
2 changes: 1 addition & 1 deletion test/gfm/backslash_escapes.unit
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ bar</p>
foo
```
<<<
<pre><code class="language-foo\+bar">foo
<pre><code class="language-foo+bar">foo
</code></pre>
3 changes: 1 addition & 2 deletions test/gfm/block_quotes.unit
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ foo
<pre><code>foo
</code></pre>
</blockquote>
<pre><code>
</code></pre>
<pre><code></code></pre>
>>> Block quotes - 216
> foo
- bar
Expand Down
Loading