Skip to content

Commit

Permalink
Merge pull request #2 from ausi/fix/response-context-encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Toflar committed May 19, 2021
2 parents 6b28659 + 7980cda commit bd10538
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -140,16 +140,16 @@ protected function compile()
}
elseif ($objEvent->title)
{
$responseContext->setTitle(StringUtil::decodeEntities(Controller::replaceInsertTags($objEvent->title)));
$responseContext->setTitle(StringUtil::getRawDecodedValue($objEvent->title));
}

if ($objEvent->description)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objEvent->description)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValue($objEvent->description));
}
elseif ($objEvent->teaser)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objEvent->teaser)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValueFromHtml($objEvent->teaser));
}

if ($objEvent->robots)
Expand Down
71 changes: 71 additions & 0 deletions core-bundle/src/Resources/contao/library/Contao/StringUtil.php
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,77 @@ public static function ampersand($strString, $blnEncode=true): string
{
return preg_replace('/&(amp;)?/i', ($blnEncode ? '&' : '&'), $strString);
}

/**
* Converts an input-encoded string back to its raw UTF-8 value it originated from.
*
* It handles all Contao input encoding specifics like insert tags, basic entities and encoded entities.
*
* @param bool $blnRemoveInsertTags True to remove insert tags instead of replacing them
*/
public static function getRawDecodedValue(string $strValue, bool $blnRemoveInsertTags = false): string
{
if ($blnRemoveInsertTags)
{
$strValue = static::stripInsertTags($strValue);
}
else
{
$strValue = Controller::replaceInsertTags($strValue, false);
}

$strValue = strip_tags($strValue);
$strValue = static::restoreBasicEntities($strValue);
$strValue = static::decodeEntities($strValue);

// Ensure valid UTF-8
if (preg_match('//u', $strValue) !== 1)
{
$substituteCharacter = mb_substitute_character();
mb_substitute_character(0xFFFD);

$strValue = mb_convert_encoding($strValue, 'UTF-8', 'UTF-8');

mb_substitute_character($substituteCharacter);
}

$strValue = str_replace(['{{', '}}'], ['[{]', '[}]'], $strValue);

return $strValue;
}

/**
* Gets the raw text value of an HTML string with normalized white space.
*
* It handles all Contao input encoding specifics like insert tags, basic
* entities and encoded entities and is meant to be used with content from
* fields that have the allowHtml flag enabled.
*
* @see StringUtil::getRawDecodedValue()
*
* @param bool $blnRemoveInsertTags True to remove insert tags instead of replacing them
*/
public static function getRawDecodedValueFromHtml(string $strValue, bool $blnRemoveInsertTags = false): string
{
if (!$blnRemoveInsertTags)
{
$strValue = Controller::replaceInsertTags($strValue, false);
}

// Add new lines before and after block level elements
$strValue = preg_replace(
array('/[\r\n]+/', '/<\/?(?:br|blockquote|div|dl|figcaption|figure|footer|h\d|header|hr|li|p|pre|tr)\b/i'),
array(' ', "\n$0"),
$strValue
);

$strValue = static::getRawDecodedValue($strValue, true);

// Remove duplicate line breaks and spaces
$strValue = trim(preg_replace(array('/[^\S\n]+/', '/\s*\n\s*/'), array(' ', "\n"), $strValue));

return $strValue;
}
}

class_alias(StringUtil::class, 'StringUtil');
4 changes: 2 additions & 2 deletions core-bundle/src/Resources/contao/models/PageModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,11 @@ public function __set($strKey, $varValue)
switch ($strKey)
{
case 'pageTitle':
$responseContext->setTitle(StringUtil::decodeEntities($varValue));
$responseContext->setTitle(StringUtil::getRawDecodedValue($varValue ?? ''));
break;

case 'description':
$responseContext->setMetaDescription(StringUtil::decodeEntities($varValue));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValue($varValue ?? ''));
break;

case 'robots':
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/Resources/contao/modules/ModuleArticle.php
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,11 @@ protected function compile()

if ($responseContext instanceof WebpageResponseContext)
{
$responseContext->setTitle(StringUtil::decodeEntities($this->title));
$responseContext->setTitle(StringUtil::getRawDecodedValue($this->title ?? ''));

if ($this->teaser)
{
$responseContext->setMetaDescription(Controller::replaceInsertTags($this->teaser));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValueFromHtml($this->teaser));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ class ContaoWebpageResponseContext extends WebpageResponseContext
{
public function __construct(PageModel $pageModel)
{
$title = $pageModel->pageTitle ?: StringUtil::decodeEntities($pageModel->title ?: '');
$title = $pageModel->pageTitle ?: StringUtil::getRawDecodedValue($pageModel->title ?: '');

$this
->setTitle($title ?: '')
->setMetaDescription($pageModel->description ?: '')
->setMetaDescription(StringUtil::getRawDecodedValue($pageModel->description ?: ''))
;

if ($pageModel->robots) {
Expand Down
14 changes: 0 additions & 14 deletions core-bundle/src/Routing/ResponseContext/WebpageResponseContext.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@

namespace Contao\CoreBundle\Routing\ResponseContext;

use Contao\StringUtil;

class WebpageResponseContext extends ResponseContext
{
/**
Expand All @@ -37,8 +35,6 @@ public function getTitle(): string

public function setTitle(string $title): self
{
$title = self::cleanString($title);

$this->title = $title;

return $this;
Expand All @@ -51,8 +47,6 @@ public function getMetaDescription(): string

public function setMetaDescription(string $metaDescription): self
{
$metaDescription = self::cleanString($metaDescription);

$this->metaDescription = $metaDescription;

return $this;
Expand All @@ -69,12 +63,4 @@ public function setMetaRobots(string $metaRobots): self

return $this;
}

protected static function cleanString(string $string): string
{
$string = strip_tags($string);
$string = str_replace("\n", ' ', $string);

return trim($string);
}
}
50 changes: 50 additions & 0 deletions core-bundle/tests/Contao/StringUtilTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@

namespace Contao\CoreBundle\Tests\Contao;

use Contao\CoreBundle\Security\Authentication\Token\TokenChecker;
use Contao\CoreBundle\Tests\TestCase;
use Contao\StringUtil;
use Contao\System;
use Psr\Log\NullLogger;
use Symfony\Component\DependencyInjection\ContainerBuilder;
use Symfony\Component\HttpFoundation\RequestStack;

class StringUtilTest extends TestCase
{
Expand All @@ -26,6 +28,10 @@ protected function setUp(): void

$container = new ContainerBuilder();
$container->setParameter('kernel.project_dir', $this->getFixturesDir());
$container->setParameter('kernel.cache_dir', $this->getFixturesDir().'/cache');
$container->setParameter('kernel.debug', false);
$container->set('request_stack', new RequestStack());
$container->set('contao.security.token_checker', $this->createMock(TokenChecker::class));
$container->set('monolog.logger.contao', new NullLogger());

System::setContainer($container);
Expand Down Expand Up @@ -142,4 +148,48 @@ public function trimsplitProvider(): \Generator
['foo', 'bar'],
];
}

/**
* @dataProvider getRawDecodedValueProvider
*/
public function testGetsRawDecodedValues(string $source, string $expected, bool $removeInsertTags = false): void
{
$this->assertSame($expected, StringUtil::getRawDecodedValue($source, $removeInsertTags));
}

public function getRawDecodedValueProvider(): \Generator
{
yield ['foobar', 'foobar'];
yield ['foo{{email::test@example.com}}bar', 'footest@example.combar'];
yield ['foo{{email::test@example.com}}bar', 'foobar', true];
yield ['{{date::...}}', '...'];
yield ['{{date::...}}', '', true];
yield ["&lt;&#62;&\u{A0}[lt][gt][&][nbsp]", "<>&\u{A0}<>&\u{A0}", true];
yield ["I &lt;3 Contao", "I <3 Contao"];
yield ["Remove unexpected <span>HTML tags", "Remove unexpected HTML tags"];
yield ["Keep non-HTML &lt;tags&#62; intact", "Keep non-HTML <tags> intact"];
yield ["Cont\xE4o invalid UTF-8", "Cont\u{FFFD}o invalid UTF-8"];
yield ["&#123;&#123;date&#125;&#125;", "[{]date[}]"];
}

/**
* @dataProvider getRawDecodedValueFromHtmlProvider
*/
public function testGetsRawDecodedValuesFromHtml(string $source, string $expected, bool $removeInsertTags = false): void
{
$this->assertSame($expected, StringUtil::getRawDecodedValueFromHtml($source, $removeInsertTags));
}

public function getRawDecodedValueFromHtmlProvider(): \Generator
{
yield from $this->getRawDecodedValueProvider();

yield ['foo<br>bar{{br}}baz', "foo\nbar\nbaz"];
yield [" \t\r\nfoo \t\r\n \r\n\t bar \t\r\n", "foo bar"];
yield [" \t\r\n<br>foo \t<br>\r\n \r\n\t<br> bar <br>\t\r\n", "foo\nbar"];
yield [
"<h1>Headline</h1>Text<ul><li>List 1</li><li>List 2</li></ul><p>Inline<span>text</span> and <a>link</a></p>",
"Headline\nText\nList 1\nList 2\nInlinetext and link",
];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public function testDecodingAndCleanup(): void

$context = new ContaoWebpageResponseContext($pageModel);

$this->assertSame('We went from Alpha > Omega', $context->getTitle());
$this->assertSame('We went from Alpha > Omega ', $context->getTitle());
$this->assertSame('My description contains HTML.', $context->getMetaDescription());
}
}
6 changes: 3 additions & 3 deletions faq-bundle/src/Resources/contao/modules/ModuleFaqReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,16 @@ protected function compile()
}
elseif ($objFaq->question)
{
$responseContext->setTitle(StringUtil::decodeEntities(Controller::replaceInsertTags($objFaq->title)));
$responseContext->setTitle(StringUtil::getRawDecodedValue($objFaq->question));
}

if ($objFaq->description)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objFaq->description)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValue($objFaq->description));
}
elseif ($objFaq->question)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objFaq->question)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValue($objFaq->question));
}

if ($objFaq->robots)
Expand Down
6 changes: 3 additions & 3 deletions news-bundle/src/Resources/contao/modules/ModuleNewsReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,16 @@ protected function compile()
}
elseif ($objArticle->headline)
{
$responseContext->setTitle(StringUtil::decodeEntities(Controller::replaceInsertTags($objArticle->headline)));
$responseContext->setTitle(StringUtil::getRawDecodedValue($objArticle->headline));
}

if ($objArticle->description)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objArticle->description)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValue($objArticle->description));
}
elseif ($objArticle->teaser)
{
$responseContext->setMetaDescription(StringUtil::decodeEntities(Controller::replaceInsertTags($objArticle->teaser)));
$responseContext->setMetaDescription(StringUtil::getRawDecodedValueFromHtml($objArticle->teaser));
}

if ($objArticle->robots)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ protected function compile()

if ($responseContext instanceof WebpageResponseContext)
{
$responseContext->setTitle(Controller::replaceInsertTags($objNewsletter->subject)); // Already stored decoded
$responseContext->setTitle(StringUtil::getRawDecodedValue($objNewsletter->subject));
}
}

Expand Down

0 comments on commit bd10538

Please sign in to comment.