Skip to content

Commit 6d42e66

Browse files
m-voausileofeyer
authored
Merge pull request from GHSA-j55w-hjpj-825g
* harden BBCode parsing * Apply suggestions from code review Co-authored-by: Martin Auswöger <martin@auswoeger.com> * CS * Update comments-bundle/tests/Util/BbCodeTest.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/src/Util/BbCode.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/tests/Util/BbCodeTest.php Co-authored-by: Martin Auswöger <martin@auswoeger.com> * Update comments-bundle/tests/Util/BbCodeTest.php * CS --------- Co-authored-by: Martin Auswöger <martin@auswoeger.com> Co-authored-by: Leo Feyer <1192057+leofeyer@users.noreply.github.com>
1 parent 8c41781 commit 6d42e66

File tree

5 files changed

+496
-58
lines changed

5 files changed

+496
-58
lines changed

Diff for: comments-bundle/src/Resources/contao/classes/Comments.php

+2-58
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
namespace Contao;
1212

13+
use Contao\CommentsBundle\Util\BbCode;
1314
use Contao\CoreBundle\EventListener\Widget\HttpUrlListener;
1415
use Contao\CoreBundle\Exception\PageNotFoundException;
1516
use Nyholm\Psr7\Uri;
@@ -417,70 +418,13 @@ protected function renderCommentForm(FrontendTemplate $objTemplate, \stdClass $o
417418
/**
418419
* Replace bbcode and return the HTML string
419420
*
420-
* Supports the following tags:
421-
*
422-
* * [b][/b] bold
423-
* * [i][/i] italic
424-
* * [u][/u] underline
425-
* * [img][/img]
426-
* * [code][/code]
427-
* * [color=#ff0000][/color]
428-
* * [quote][/quote]
429-
* * [quote=tim][/quote]
430-
* * [url][/url]
431-
* * [url=http://][/url]
432-
* * [email][/email]
433-
* * [email=name@example.com][/email]
434-
*
435421
* @param string $strComment
436422
*
437423
* @return string
438424
*/
439425
public function parseBbCode($strComment)
440426
{
441-
$arrSearch = array
442-
(
443-
'@\[b\](.*)\[/b\]@Uis',
444-
'@\[i\](.*)\[/i\]@Uis',
445-
'@\[u\](.*)\[/u\]@Uis',
446-
'@\s*\[code\](.*)\[/code\]\s*@Uis',
447-
'@\[color=([^\]" ]+)\](.*)\[/color\]@Uis',
448-
'@\s*\[quote\](.*)\[/quote\]\s*@Uis',
449-
'@\s*\[quote=([^\]]+)\](.*)\[/quote\]\s*@Uis',
450-
'@\[img\]\s*([^\[" ]+\.(jpe?g|png|gif|bmp|tiff?|ico))\s*\[/img\]@i',
451-
'@\[url\]\s*([^\[" ]+)\s*\[/url\]@i',
452-
'@\[url=([^\]" ]+)\](.*)\[/url\]@Uis',
453-
'@\[email\]\s*([^\[" ]+)\s*\[/email\]@i',
454-
'@\[email=([^\]" ]+)\](.*)\[/email\]@Uis',
455-
'@href="(([a-z0-9]+\.)*[a-z0-9]+\.([a-z]{2}|asia|biz|com|info|name|net|org|tel)(/|"))@i'
456-
);
457-
458-
$arrReplace = array
459-
(
460-
'<strong>$1</strong>',
461-
'<em>$1</em>',
462-
'<span style="text-decoration:underline">$1</span>',
463-
"\n\n" . '<div class="code"><p>' . $GLOBALS['TL_LANG']['MSC']['com_code'] . '</p><pre>$1</pre></div>' . "\n\n",
464-
'<span style="color:$1">$2</span>',
465-
"\n\n" . '<blockquote>$1</blockquote>' . "\n\n",
466-
"\n\n" . '<blockquote><p>' . sprintf($GLOBALS['TL_LANG']['MSC']['com_quote'], '$1') . '</p>$2</blockquote>' . "\n\n",
467-
'<img src="$1" alt="" />',
468-
'<a href="$1">$1</a>',
469-
'<a href="$1">$2</a>',
470-
'<a href="mailto:$1">$1</a>',
471-
'<a href="mailto:$1">$2</a>',
472-
'href="http://$1'
473-
);
474-
475-
$strComment = preg_replace($arrSearch, $arrReplace, $strComment);
476-
477-
// Encode e-mail addresses
478-
if (strpos($strComment, 'mailto:') !== false)
479-
{
480-
$strComment = StringUtil::encodeEmail($strComment);
481-
}
482-
483-
return $strComment;
427+
return (new BbCode())->toHtml($strComment);
484428
}
485429

486430
/**

Diff for: comments-bundle/src/Util/BbCode.php

+229
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of Contao.
7+
*
8+
* (c) Leo Feyer
9+
*
10+
* @license LGPL-3.0-or-later
11+
*/
12+
13+
namespace Contao\CommentsBundle\Util;
14+
15+
use Contao\Idna;
16+
use Contao\StringUtil;
17+
use Contao\Validator;
18+
19+
/**
20+
* @internal
21+
*/
22+
final class BbCode
23+
{
24+
/**
25+
* Converts text containing BBCode to HTML.
26+
*
27+
* Supports the following tags:
28+
*
29+
* * [b][/b] bold
30+
* * [i][/i] italic
31+
* * [u][/u] underline
32+
* * [code][/code]
33+
* * [quote][/quote]
34+
* * [quote=author][/quote]
35+
* * [url][/url]
36+
* * [url=https://…][/url]
37+
* * [email][/email]
38+
* * [email=name@example.com][/email]
39+
*/
40+
public function toHtml(string $bbCode): string
41+
{
42+
return str_replace(['{', '}'], ['&#123;', '&#125;'], $this->compile($this->parse($this->tokenize($bbCode), $bbCode)));
43+
}
44+
45+
/**
46+
* Find BBCode tokens and annotate them with their position/tag/type and
47+
* attribute. We're only matching tokens in the form '[tag]', '[/tag]' and
48+
* '[tag=attr]'.
49+
*/
50+
private function tokenize(string $input): array
51+
{
52+
if (false === preg_match_all('%\[(/?)(b|i|u|quote|code|url|email|img|color)(?:=([^\[\]]*))?]%', $input, $matches, PREG_OFFSET_CAPTURE)) {
53+
throw new \InvalidArgumentException('Could not tokenize input.');
54+
}
55+
56+
$tokens = [];
57+
58+
foreach ($matches[0] as $index => [$token, $position]) {
59+
$tokens[] = [
60+
'start' => $position,
61+
'end' => $position + \strlen($token),
62+
'closing' => '/' === $matches[1][$index][0],
63+
'tag' => $matches[2][$index][0],
64+
'attr' => $matches[3][$index][0] ?: null,
65+
];
66+
}
67+
68+
return $tokens;
69+
}
70+
71+
/**
72+
* Parses tokens into a node tree. Input before/after tokens is treated as
73+
* text.
74+
*/
75+
private function parse(array $tokens, string $input): Node
76+
{
77+
$root = new Node();
78+
$node = $root;
79+
$tags = [];
80+
$position = 0;
81+
82+
$addNode = static function (Node $parent, $type): Node {
83+
$node = new Node($parent, $type);
84+
$parent->children[] = $node;
85+
86+
return $node;
87+
};
88+
89+
$advance = static function (array $token) use (&$position): void {
90+
$position = $token['end'];
91+
};
92+
93+
$numTokens = \count($tokens);
94+
95+
for ($i = 0; $i < $numTokens; ++$i) {
96+
$current = $tokens[$i];
97+
98+
// Text before token
99+
if (($length = $current['start'] - $position) > 0) {
100+
$addNode($node, Node::TYPE_TEXT)->setValue(substr($input, $position, $length));
101+
}
102+
103+
// Code
104+
if (('code' === $current['tag']) && !$current['closing']) {
105+
for ($j = $i + 1; $j < $numTokens; ++$j) {
106+
if ('code' === $tokens[$j]['tag'] && $tokens[$j]['closing']) {
107+
$addNode($root, Node::TYPE_CODE)->setValue(substr($input, $current['end'], $tokens[$j]['start'] - $current['end']));
108+
$advance($tokens[$j]);
109+
$i = $j;
110+
continue 2;
111+
}
112+
}
113+
}
114+
115+
// Blocks
116+
$onTagStack = \in_array($current['tag'], $tags, true);
117+
118+
if (\in_array($current['tag'], ['b', 'i', 'u', 'url', 'email'], true)) {
119+
if (!$current['closing'] && !$onTagStack) {
120+
$node = $addNode($node, Node::TYPE_BLOCK)->setTag($current['tag'])->setValue($current['attr']);
121+
$tags[] = $current['tag'];
122+
} elseif ($current['closing'] && $onTagStack) {
123+
do {
124+
$node = $node->parent;
125+
} while ($current['tag'] !== array_pop($tags));
126+
}
127+
} elseif ('quote' === $current['tag']) {
128+
if (!$current['closing'] && !$onTagStack) {
129+
$node = $addNode($root, Node::TYPE_BLOCK)->setTag($current['tag'])->setValue($current['attr']);
130+
$tags = [$current['tag']];
131+
} elseif ($current['closing'] && $onTagStack) {
132+
$node = $node->parent;
133+
$tags = [];
134+
}
135+
}
136+
137+
$advance($current);
138+
}
139+
140+
// Text after last token
141+
if ('' !== ($text = substr($input, $position))) {
142+
$addNode($root, Node::TYPE_TEXT)->setValue($text);
143+
}
144+
145+
return $root;
146+
}
147+
148+
/**
149+
* Compiles a node (tree) back into a string.
150+
*/
151+
private function compile(Node $node): string
152+
{
153+
if (Node::TYPE_ROOT === $node->type) {
154+
return $this->subCompile($node->children);
155+
}
156+
157+
if (Node::TYPE_BLOCK === $node->type) {
158+
if ('' === ($children = $this->subCompile($node->children))) {
159+
return '';
160+
}
161+
162+
switch ($node->tag) {
163+
case 'b':
164+
return sprintf('<strong>%s</strong>', $children);
165+
166+
case 'i':
167+
return sprintf('<em>%s</em>', $children);
168+
169+
case 'u':
170+
return sprintf('<span style="text-decoration: underline">%s</span>', $children);
171+
172+
case 'quote':
173+
if (null !== $node->value) {
174+
return sprintf(
175+
'<blockquote><p>%s</p>%s</blockquote>',
176+
sprintf($GLOBALS['TL_LANG']['MSC']['com_quote'], StringUtil::specialchars($node->value, true)),
177+
$children
178+
);
179+
}
180+
181+
return sprintf('<blockquote>%s</blockquote>', $children);
182+
183+
case 'email':
184+
$uri = $node->value ?: $node->getFirstChildValue() ?? '';
185+
$title = empty($node->value) ? $uri : $children;
186+
187+
try {
188+
if (Validator::isEmail($uri)) {
189+
return sprintf('<a href="mailto:%s">%s</a>', StringUtil::specialchars(Idna::encodeEmail($uri), true), StringUtil::specialchars($title, true));
190+
}
191+
} catch (\InvalidArgumentException $e) {
192+
}
193+
194+
return StringUtil::specialchars($title, true);
195+
196+
case 'url':
197+
$uri = $node->value ?: $node->getFirstChildValue() ?? '';
198+
$title = empty($node->value) ? $uri : $children;
199+
200+
try {
201+
if (Validator::isUrl($uri)) {
202+
return sprintf('<a href="%s" rel="noopener noreferrer nofollow">%s</a>', StringUtil::specialchars(Idna::encodeUrl($uri), true), StringUtil::specialchars($title, true));
203+
}
204+
} catch (\InvalidArgumentException $e) {
205+
}
206+
207+
return StringUtil::specialchars($title, true);
208+
209+
default:
210+
throw new \RuntimeException('Invalid block value.');
211+
}
212+
}
213+
214+
if (Node::TYPE_CODE === $node->type) {
215+
return sprintf('<div class="code"><p>%s</p><pre>%s</pre></div>', $GLOBALS['TL_LANG']['MSC']['com_code'], StringUtil::specialchars($node->value, true));
216+
}
217+
218+
if (Node::TYPE_TEXT === $node->type) {
219+
return StringUtil::specialchars($node->value, true);
220+
}
221+
222+
throw new \RuntimeException('Invalid node type.');
223+
}
224+
225+
private function subCompile(array $nodes): string
226+
{
227+
return implode('', array_map(fn (Node $node): string => $this->compile($node), $nodes));
228+
}
229+
}

Diff for: comments-bundle/src/Util/Node.php

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of Contao.
7+
*
8+
* (c) Leo Feyer
9+
*
10+
* @license LGPL-3.0-or-later
11+
*/
12+
13+
namespace Contao\CommentsBundle\Util;
14+
15+
/**
16+
* @internal
17+
*/
18+
final class Node
19+
{
20+
public const TYPE_ROOT = 0;
21+
public const TYPE_TEXT = 1;
22+
public const TYPE_BLOCK = 2;
23+
public const TYPE_CODE = 3;
24+
25+
/**
26+
* @var Node|null
27+
*/
28+
public $parent;
29+
30+
/**
31+
* @var int
32+
*/
33+
public $type;
34+
35+
/**
36+
* @var string|null
37+
*/
38+
public $tag;
39+
40+
/**
41+
* @var string|null
42+
*/
43+
public $value;
44+
45+
/**
46+
* @var array<Node>
47+
*/
48+
public $children = [];
49+
50+
public function __construct(self $parent = null, int $type = self::TYPE_ROOT)
51+
{
52+
$this->parent = $parent;
53+
$this->type = $type;
54+
}
55+
56+
public function setTag(string $tag): self
57+
{
58+
$this->tag = $tag;
59+
60+
return $this;
61+
}
62+
63+
public function setValue(?string $value): self
64+
{
65+
$this->value = $value;
66+
67+
return $this;
68+
}
69+
70+
public function getFirstChildValue(): ?string
71+
{
72+
if (0 === \count($this->children)) {
73+
return null;
74+
}
75+
76+
return $this->children[0]->value;
77+
}
78+
}

0 commit comments

Comments
 (0)