Skip to content

Commit

Permalink
imp: Display feeds description as HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
marienfressinaud committed Jul 29, 2023
1 parent 5c4c1f4 commit 28f2308
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 8 deletions.
2 changes: 1 addition & 1 deletion lib/SpiderBits/src/feeds/AtomParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static function parse(\DOMDocument $dom_document): Feed
}

if ($tagName === 'subtitle') {
$feed->description = trim(htmlspecialchars_decode($value, ENT_QUOTES));
$feed->description = $value;
}

if ($tagName === 'link') {
Expand Down
2 changes: 1 addition & 1 deletion lib/SpiderBits/src/feeds/JsonParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public static function parse(array $json_document): Feed
$feed->type = 'json';

$feed->title = self::getSecureString($json_document, 'title');
$feed->description = self::getSecureString($json_document, 'description');
$feed->description = $json_document['description'] ?? '';

$link_url = self::getSecureString($json_document, 'home_page_url');
if ($link_url) {
Expand Down
2 changes: 1 addition & 1 deletion lib/SpiderBits/src/feeds/RdfParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static function parse(\DOMDocument $dom_document): Feed
}

if ($tagName === 'description') {
$feed->description = trim(htmlspecialchars_decode($value, ENT_QUOTES));
$feed->description = $value;
}

if ($tagName === 'link') {
Expand Down
2 changes: 1 addition & 1 deletion lib/SpiderBits/src/feeds/RssParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static function parse(\DOMDocument $dom_document): Feed
}

if ($tagName === 'description') {
$feed->description = trim(htmlspecialchars_decode($value, ENT_QUOTES));
$feed->description = $value;
}

if ($tagName === 'link') {
Expand Down
16 changes: 14 additions & 2 deletions src/models/Collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,19 @@ public function feedWebsite(): string
*/
public function descriptionAsHtml(): string
{
$markdown = new utils\MiniMarkdown();
return $markdown->text($this->description);
if ($this->type === 'collection') {
$markdown = new utils\MiniMarkdown();
return $markdown->text($this->description);
} else {
if ($this->feed_site_url) {
$site_url = $this->feed_site_url;
} elseif ($this->feed_url) {
$site_url = $this->feed_url;
} else {
$site_url = '';
}

return utils\HtmlSanitizer::sanitizeCollectionDescription($this->description, $site_url);
}
}
}
85 changes: 85 additions & 0 deletions src/utils/HtmlSanitizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

namespace flusio\utils;

/**
* @author Marien Fressinaud <dev@marienfressinaud.fr>
* @license http://www.gnu.org/licenses/agpl-3.0.en.html AGPL
*/
class HtmlSanitizer
{
public static function sanitizeCollectionDescription(string $description, string $base_url): string
{
$html_sanitizer = new \SpiderBits\HtmlSanitizer([
'abbr' => [],
'a' => ['href', 'title'],
'blockquote' => [],
'br' => [],
'caption' => [],
'code' => [],
'dd' => [],
'del' => [],
'details' => ['open'],
'div' => [],
'dl' => [],
'dt' => [],
'em' => [],
'figcaption' => [],
'figure' => [],
'i' => [],
'li' => [],
'ol' => [],
'pre' => [],
'p' => [],
'q' => [],
'rp' => [],
'rt' => [],
'ruby' => [],
'small' => [],
'span' => [],
'strong' => [],
'sub' => [],
'summary' => [],
'sup' => [],
'u' => [],
'ul' => [],
], [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
]);

$healthy_html = $html_sanitizer->sanitize($description);

$libxml_options = LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD;
$dom = \SpiderBits\Dom::fromText("<div>{$healthy_html}</div>", $libxml_options);
$anchors = $dom->select('//a');

if ($anchors) {
foreach ($anchors->list() as $node) {
if (!($node instanceof \DOMElement)) {
continue;
}

if ($node->hasAttribute('href')) {
// Absolutize the URL of the href attribute.
$href_node = $node->getAttributeNode('href');
$url = \SpiderBits\Url::absolutize($href_node->value, $base_url);
$href_node->value = \Minz\Output\ViewHelpers::protect($url);

// Make sure to open the URL in a new tab.
$target_node = new \DOMAttr('target', '_blank');
$node->appendChild($target_node);

$rel_node = new \DOMAttr('rel', 'noopener noreferrer');
$node->appendChild($rel_node);
}
}
}

return $dom->html();
}
}
4 changes: 2 additions & 2 deletions tests/lib/SpiderBits/feeds/FeedTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public function testFromTextWithLaverty(): void

$this->assertSame('rss', $feed->type);
$this->assertSame("Stories by L'avertY on Medium", $feed->title);
$this->assertSame("Stories by L'avertY on Medium", $feed->description);
$this->assertSame("Stories by L&#39;avertY on Medium", $feed->description);
$this->assertSame('https://medium.com/@lavertygrenoble?source=rss-644186d8e612------2', $feed->link);
$this->assertSame(
'https://medium.com/@lavertygrenoble?source=rss-644186d8e612------2',
Expand Down Expand Up @@ -130,7 +130,7 @@ public function testFromTextWithLaverty(): void
$this->assertSame(1614070752, $entry->published_at->getTimestamp());
$this->assertSame(5, count($entry->categories));
$this->assertSame('grenoble', $entry->categories['grenoble']);
$this->assertSame('d4fb468a697843eacd24d83a3e416583f5467b449abf6b2aa87530373563fd25', $feed->hash());
$this->assertSame('00bd4b55adb50cfaa2b8dab14d28df25b3ab23186fe9920103df67c38c208585', $feed->hash());
}

public function testFromTextWithVimeoSudWeb(): void
Expand Down
26 changes: 26 additions & 0 deletions tests/utils/HtmlSanitizerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

namespace flusio\utils;

class HtmlSanitizerTest extends \PHPUnit\Framework\TestCase
{
public function testSanitizeCollectionDescription(): void
{
$description = <<<HTML
<h1>Welcome!</h1>
<div>Hello <a href="/world">World</a></div>
<script>alert('oops');</script>
HTML;
$expected_description = <<<HTML
<div>Welcome!
<div>Hello <a href="https://example.com/world" target="_blank" rel="noopener noreferrer">World</a></div>
</div>
HTML;
$base_url = 'https://example.com';

$result = HtmlSanitizer::sanitizeCollectionDescription($description, $base_url);

$this->assertSame($expected_description, trim($result));
}
}

0 comments on commit 28f2308

Please sign in to comment.