Skip to content
This repository has been archived by the owner on Mar 19, 2020. It is now read-only.

Commit

Permalink
Added support for HTML entities
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyBogdanov committed Jan 23, 2018
1 parent 88a1d5c commit 9eb925d
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 28 deletions.
5 changes: 4 additions & 1 deletion classes/Dom.php
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,10 @@ public function __construct($content = null)
$this->nodes = [];

try {
$tokenCollection = (new HtmlTokenizer())->parse($content);
// the tokenizer does not recognize entities as HTML and represents them as text tokens
// running the content through html_entity_decode prior ensures proper plain text representation
// text nodes would then be run through htmlentities before being output as HTML
$tokenCollection = (new HtmlTokenizer())->parse(html_entity_decode($content, ENT_NOQUOTES));
} catch (\Exception $e) {
throw static::createInvalidContentException($content);
}
Expand Down
2 changes: 1 addition & 1 deletion classes/Node/Text.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public function __construct(string $content)
*/
public function __toString(): string
{
return htmlspecialchars($this->content);
return htmlentities($this->content, ENT_NOQUOTES | ENT_SUBSTITUTE);
}

/**
Expand Down
119 changes: 100 additions & 19 deletions tests/classes/DomTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\CData();
$token->parse((string) $this->demoCData());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoCData(), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -127,7 +132,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Comment();
$token->parse((string) $this->demoComment());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoComment(), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -138,7 +148,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\DocType();
$token->parse((string) $this->demoDocType());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoDocType(), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -149,7 +164,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Text();
$token->parse((string) $this->demoText());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoText(), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -160,7 +180,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Element();
$token->parse((string) $this->demoElement());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -171,7 +196,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Element();
$token->parse((string) $this->demoElement(true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(true), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -182,7 +212,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Element();
$token->parse((string) $this->demoElement(false, true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(false, true), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -193,7 +228,12 @@ public function demoContent(): array
}],
[(function () {
$token = new Tokens\Element();
$token->parse((string) $this->demoElement(true, true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(true, true), ENT_NOQUOTES));

return $token;
})(), function ($content, array $nodes, callable $filter) {
$this->assertSame(array_map(function (NodeInterface $node) {
Expand All @@ -207,44 +247,84 @@ public function demoContent(): array
$index = 0;

$token = new Tokens\CData();
$token->parse((string) $this->demoCData());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoCData(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Comment();
$token->parse((string) $this->demoComment());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoComment(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\DocType();
$token->parse((string) $this->demoDocType());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoDocType(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Text();
$token->parse((string) $this->demoText());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoText(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoElement());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoElement(true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(true), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoElement(false, true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(false, true), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoElement(true, true));

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoElement(true, true), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoVoidElement());

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoVoidElement(), ENT_NOQUOTES));
$tokenCollection[$index++] = $token;

$token = new Tokens\Element();
$token->parse((string) $this->demoVoidElement(true));
$tokenCollection[$index++] = $token;

/**
* @see Dom::__construct (from string)
*/
$token->parse(html_entity_decode((string) $this->demoVoidElement(true), ENT_NOQUOTES));
$tokenCollection[$index + 1] = $token;

return $tokenCollection;
})(), function ($content, array $nodes, callable $filter, bool $ignoreDocType = false) {
Expand Down Expand Up @@ -472,6 +552,7 @@ public function demoInvalidHTML(): array
* @dataProvider demoInvalidContent()
*
* @param $content
* @throws \ReflectionException
*/
public function testCreateInvalidContentException($content)
{
Expand Down
2 changes: 1 addition & 1 deletion tests/classes/Helper/DemoGeneratorTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ protected function demoDocType(): DocType
*/
protected function demoText(): Text
{
return new Text('demo');
return new Text('d"e" — \'m\'o');
}

/**
Expand Down
10 changes: 4 additions & 6 deletions tests/classes/Node/TextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
namespace SDom\Test\Node;

use PHPUnit\Framework\TestCase;
use SDom\Node\Element;
use SDom\Node\NodeInterface;
use SDom\Node\Text;
use SDom\Test\Helper\DemoGeneratorTrait;
Expand All @@ -19,16 +18,15 @@ class TextTest extends TestCase
{
use DemoGeneratorTrait;

const DEMO_RAW = 'demo <em>demo</em>';
const DEMO_ESCAPED = 'demo &lt;em&gt;demo&lt;/em&gt;';
const DEMO_RAW = 'demo "&mdash;" <em>d\'e\'mo</em>';
const DEMO_ESCAPED = 'demo "&amp;mdash;" &lt;em&gt;d\'e\'mo&lt;/em&gt;';

/**
* @param Element|null $parent
* @return Text
*/
protected function demo(Element $parent = null): Text
protected function demo(): Text
{
return new Text(self::DEMO_RAW, $parent);
return new Text(self::DEMO_RAW);
}

/**
Expand Down

0 comments on commit 9eb925d

Please sign in to comment.