Skip to content

Commit 55b995d

Browse files
leofeyerm-voausi
authored
Merge pull request from GHSA-j55w-hjpj-825g
* harden BBCode parsing * Apply suggestions from code review Co-authored-by: Martin Auswöger <martin@auswoeger.com> * CS * Update comments-bundle/tests/Util/BbCodeTest.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/src/Util/BbCode.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/tests/Util/BbCodeTest.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/tests/Util/BbCodeTest.php * CS * Run the tools --------- Co-authored-by: M. Vondano <m-vo@users.noreply.github.com> Co-authored-by: Martin Auswöger <martin@auswoeger.com>
1 parent 110d32f commit 55b995d

File tree

5 files changed

+483
-58
lines changed

5 files changed

+483
-58
lines changed

Diff for: comments-bundle/contao/classes/Comments.php

+2-58
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
namespace Contao;
1212

13+
use Contao\CommentsBundle\Util\BbCode;
1314
use Contao\CoreBundle\EventListener\Widget\HttpUrlListener;
1415
use Contao\CoreBundle\Exception\PageNotFoundException;
1516
use Contao\CoreBundle\Util\UrlUtil;
@@ -408,70 +409,13 @@ protected function renderCommentForm(FrontendTemplate $objTemplate, \stdClass $o
408409
/**
409410
* Replace bbcode and return the HTML string
410411
*
411-
* Supports the following tags:
412-
*
413-
* * [b][/b] bold
414-
* * [i][/i] italic
415-
* * [u][/u] underline
416-
* * [img][/img]
417-
* * [code][/code]
418-
* * [color=#ff0000][/color]
419-
* * [quote][/quote]
420-
* * [quote=tim][/quote]
421-
* * [url][/url]
422-
* * [url=http://][/url]
423-
* * [email][/email]
424-
* * [email=name@example.com][/email]
425-
*
426412
* @param string $strComment
427413
*
428414
* @return string
429415
*/
430416
public function parseBbCode($strComment)
431417
{
432-
$arrSearch = array
433-
(
434-
'@\[b\](.*)\[/b\]@Uis',
435-
'@\[i\](.*)\[/i\]@Uis',
436-
'@\[u\](.*)\[/u\]@Uis',
437-
'@\s*\[code\](.*)\[/code\]\s*@Uis',
438-
'@\[color=([^\]" ]+)\](.*)\[/color\]@Uis',
439-
'@\s*\[quote\](.*)\[/quote\]\s*@Uis',
440-
'@\s*\[quote=([^\]]+)\](.*)\[/quote\]\s*@Uis',
441-
'@\[img\]\s*([^\[" ]+\.(jpe?g|png|gif|bmp|tiff?|ico))\s*\[/img\]@i',
442-
'@\[url\]\s*([^\[" ]+)\s*\[/url\]@i',
443-
'@\[url=([^\]" ]+)\](.*)\[/url\]@Uis',
444-
'@\[email\]\s*([^\[" ]+)\s*\[/email\]@i',
445-
'@\[email=([^\]" ]+)\](.*)\[/email\]@Uis',
446-
'@href="(([a-z0-9]+\.)*[a-z0-9]+\.([a-z]{2}|asia|biz|com|info|name|net|org|tel)(/|"))@i'
447-
);
448-
449-
$arrReplace = array
450-
(
451-
'<strong>$1</strong>',
452-
'<em>$1</em>',
453-
'<span style="text-decoration:underline">$1</span>',
454-
"\n\n" . '<div class="code"><p>' . $GLOBALS['TL_LANG']['MSC']['com_code'] . '</p><pre>$1</pre></div>' . "\n\n",
455-
'<span style="color:$1">$2</span>',
456-
"\n\n" . '<blockquote>$1</blockquote>' . "\n\n",
457-
"\n\n" . '<blockquote><p>' . sprintf($GLOBALS['TL_LANG']['MSC']['com_quote'], '$1') . '</p>$2</blockquote>' . "\n\n",
458-
'<img src="$1" alt="" />',
459-
'<a href="$1">$1</a>',
460-
'<a href="$1">$2</a>',
461-
'<a href="mailto:$1">$1</a>',
462-
'<a href="mailto:$1">$2</a>',
463-
'href="http://$1'
464-
);
465-
466-
$strComment = preg_replace($arrSearch, $arrReplace, $strComment);
467-
468-
// Encode e-mail addresses
469-
if (str_contains($strComment, 'mailto:'))
470-
{
471-
$strComment = StringUtil::encodeEmail($strComment);
472-
}
473-
474-
return $strComment;
418+
return (new BbCode())->toHtml($strComment);
475419
}
476420

477421
/**

Diff for: comments-bundle/src/Util/BbCode.php

+229
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of Contao.
7+
*
8+
* (c) Leo Feyer
9+
*
10+
* @license LGPL-3.0-or-later
11+
*/
12+
13+
namespace Contao\CommentsBundle\Util;
14+
15+
use Contao\Idna;
16+
use Contao\StringUtil;
17+
use Contao\Validator;
18+
19+
/**
20+
* @internal
21+
*/
22+
final class BbCode
23+
{
24+
/**
25+
* Converts text containing BBCode to HTML.
26+
*
27+
* Supports the following tags:
28+
*
29+
* * [b][/b] bold
30+
* * [i][/i] italic
31+
* * [u][/u] underline
32+
* * [code][/code]
33+
* * [quote][/quote]
34+
* * [quote=author][/quote]
35+
* * [url][/url]
36+
* * [url=https://…][/url]
37+
* * [email][/email]
38+
* * [email=name@example.com][/email]
39+
*/
40+
public function toHtml(string $bbCode): string
41+
{
42+
return str_replace(['{', '}'], ['&#123;', '&#125;'], $this->compile($this->parse($this->tokenize($bbCode), $bbCode)));
43+
}
44+
45+
/**
46+
* Find BBCode tokens and annotate them with their position/tag/type and
47+
* attribute. We're only matching tokens in the form '[tag]', '[/tag]' and
48+
* '[tag=attr]'.
49+
*/
50+
private function tokenize(string $input): array
51+
{
52+
if (false === preg_match_all('%\[(/?)(b|i|u|quote|code|url|email|img|color)(?:=([^\[\]]*))?]%', $input, $matches, PREG_OFFSET_CAPTURE)) {
53+
throw new \InvalidArgumentException('Could not tokenize input.');
54+
}
55+
56+
$tokens = [];
57+
58+
foreach ($matches[0] as $index => [$token, $position]) {
59+
$tokens[] = [
60+
'start' => $position,
61+
'end' => $position + \strlen($token),
62+
'closing' => '/' === $matches[1][$index][0],
63+
'tag' => $matches[2][$index][0],
64+
'attr' => $matches[3][$index][0] ?: null,
65+
];
66+
}
67+
68+
return $tokens;
69+
}
70+
71+
/**
72+
* Parses tokens into a node tree. Input before/after tokens is treated as
73+
* text.
74+
*/
75+
private function parse(array $tokens, string $input): Node
76+
{
77+
$root = new Node();
78+
$node = $root;
79+
$tags = [];
80+
$position = 0;
81+
82+
$addNode = static function (Node $parent, $type): Node {
83+
$node = new Node($parent, $type);
84+
$parent->children[] = $node;
85+
86+
return $node;
87+
};
88+
89+
$advance = static function (array $token) use (&$position): void {
90+
$position = $token['end'];
91+
};
92+
93+
$numTokens = \count($tokens);
94+
95+
for ($i = 0; $i < $numTokens; ++$i) {
96+
$current = $tokens[$i];
97+
98+
// Text before token
99+
if (($length = $current['start'] - $position) > 0) {
100+
$addNode($node, Node::TYPE_TEXT)->setValue(substr($input, $position, $length));
101+
}
102+
103+
// Code
104+
if (('code' === $current['tag']) && !$current['closing']) {
105+
for ($j = $i + 1; $j < $numTokens; ++$j) {
106+
if ('code' === $tokens[$j]['tag'] && $tokens[$j]['closing']) {
107+
$addNode($root, Node::TYPE_CODE)->setValue(substr($input, $current['end'], $tokens[$j]['start'] - $current['end']));
108+
$advance($tokens[$j]);
109+
$i = $j;
110+
continue 2;
111+
}
112+
}
113+
}
114+
115+
// Blocks
116+
$onTagStack = \in_array($current['tag'], $tags, true);
117+
118+
if (\in_array($current['tag'], ['b', 'i', 'u', 'url', 'email'], true)) {
119+
if (!$current['closing'] && !$onTagStack) {
120+
$node = $addNode($node, Node::TYPE_BLOCK)->setTag($current['tag'])->setValue($current['attr']);
121+
$tags[] = $current['tag'];
122+
} elseif ($current['closing'] && $onTagStack) {
123+
do {
124+
$node = $node->parent;
125+
} while ($current['tag'] !== array_pop($tags));
126+
}
127+
} elseif ('quote' === $current['tag']) {
128+
if (!$current['closing'] && !$onTagStack) {
129+
$node = $addNode($root, Node::TYPE_BLOCK)->setTag($current['tag'])->setValue($current['attr']);
130+
$tags = [$current['tag']];
131+
} elseif ($current['closing'] && $onTagStack) {
132+
$node = $node->parent;
133+
$tags = [];
134+
}
135+
}
136+
137+
$advance($current);
138+
}
139+
140+
// Text after last token
141+
if ('' !== ($text = substr($input, $position))) {
142+
$addNode($root, Node::TYPE_TEXT)->setValue($text);
143+
}
144+
145+
return $root;
146+
}
147+
148+
/**
149+
* Compiles a node (tree) back into a string.
150+
*/
151+
private function compile(Node $node): string
152+
{
153+
if (Node::TYPE_ROOT === $node->type) {
154+
return $this->subCompile($node->children);
155+
}
156+
157+
if (Node::TYPE_BLOCK === $node->type) {
158+
if ('' === ($children = $this->subCompile($node->children))) {
159+
return '';
160+
}
161+
162+
switch ($node->tag) {
163+
case 'b':
164+
return sprintf('<strong>%s</strong>', $children);
165+
166+
case 'i':
167+
return sprintf('<em>%s</em>', $children);
168+
169+
case 'u':
170+
return sprintf('<span style="text-decoration: underline">%s</span>', $children);
171+
172+
case 'quote':
173+
if (null !== $node->value) {
174+
return sprintf(
175+
'<blockquote><p>%s</p>%s</blockquote>',
176+
sprintf($GLOBALS['TL_LANG']['MSC']['com_quote'], StringUtil::specialchars($node->value, true)),
177+
$children,
178+
);
179+
}
180+
181+
return sprintf('<blockquote>%s</blockquote>', $children);
182+
183+
case 'email':
184+
$uri = $node->value ?: $node->getFirstChildValue() ?? '';
185+
$title = $node->value ? $children : $uri;
186+
187+
try {
188+
if (Validator::isEmail($uri)) {
189+
return sprintf('<a href="mailto:%s">%s</a>', StringUtil::specialchars(Idna::encodeEmail($uri), true), StringUtil::specialchars($title, true));
190+
}
191+
} catch (\InvalidArgumentException) {
192+
}
193+
194+
return StringUtil::specialchars($title, true);
195+
196+
case 'url':
197+
$uri = $node->value ?: $node->getFirstChildValue() ?? '';
198+
$title = $node->value ? $children : $uri;
199+
200+
try {
201+
if (Validator::isUrl($uri)) {
202+
return sprintf('<a href="%s" rel="noopener noreferrer nofollow">%s</a>', StringUtil::specialchars(Idna::encodeUrl($uri), true), StringUtil::specialchars($title, true));
203+
}
204+
} catch (\InvalidArgumentException) {
205+
}
206+
207+
return StringUtil::specialchars($title, true);
208+
209+
default:
210+
throw new \RuntimeException('Invalid block value.');
211+
}
212+
}
213+
214+
if (Node::TYPE_CODE === $node->type) {
215+
return sprintf('<div class="code"><p>%s</p><pre>%s</pre></div>', $GLOBALS['TL_LANG']['MSC']['com_code'], StringUtil::specialchars($node->value, true));
216+
}
217+
218+
if (Node::TYPE_TEXT === $node->type) {
219+
return StringUtil::specialchars($node->value, true);
220+
}
221+
222+
throw new \RuntimeException('Invalid node type.');
223+
}
224+
225+
private function subCompile(array $nodes): string
226+
{
227+
return implode('', array_map(fn (Node $node): string => $this->compile($node), $nodes));
228+
}
229+
}

Diff for: comments-bundle/src/Util/Node.php

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of Contao.
7+
*
8+
* (c) Leo Feyer
9+
*
10+
* @license LGPL-3.0-or-later
11+
*/
12+
13+
namespace Contao\CommentsBundle\Util;
14+
15+
/**
16+
* @internal
17+
*/
18+
final class Node
19+
{
20+
public const TYPE_ROOT = 0;
21+
22+
public const TYPE_TEXT = 1;
23+
24+
public const TYPE_BLOCK = 2;
25+
26+
public const TYPE_CODE = 3;
27+
28+
public string|null $tag = null;
29+
30+
public string|null $value = null;
31+
32+
/**
33+
* @var array<Node>
34+
*/
35+
public array $children = [];
36+
37+
public function __construct(
38+
public self|null $parent = null,
39+
public int $type = self::TYPE_ROOT,
40+
) {
41+
}
42+
43+
public function setTag(string $tag): self
44+
{
45+
$this->tag = $tag;
46+
47+
return $this;
48+
}
49+
50+
public function setValue(string|null $value): self
51+
{
52+
$this->value = $value;
53+
54+
return $this;
55+
}
56+
57+
public function getFirstChildValue(): string|null
58+
{
59+
if ([] === $this->children) {
60+
return null;
61+
}
62+
63+
return $this->children[0]->value;
64+
}
65+
}

0 commit comments

Comments
 (0)