Skip to content

Commit

Permalink
Ensure clean response context and unify meta handling (see #3014)
Browse files Browse the repository at this point in the history
Description
-----------

This PR

* ensures the new `ResponseContext` does contain clean and unencoded values in preparation for the future (one thing less to migrate)
* unifies the handling of insert tags for meta description. Sometimes it was stripped, sometimes replaced

Commits
-------

3b9922c Ensure clean response context and unify meta handling
6b28659 Move description shortening to template
ee9eedf Add StringUtil::getRawDecodedValue()
f7969ba Add StringUtil::getRawDecodedValueFromHtml()
5821b91 Test and fix getRawDecodedValueFromHtml method
84a80bd Fix tests
7980cda Improve insert tag braces encoding
bd10538 Merge pull request #2 from ausi/fix/response-context-encoding
1054d98 Use our own string util to shorten the description
a5586c0 CS
3bd6984 Improve string util test
cbd11c4 Merge pull request #3 from ausi/fix/improve-string-util-test
4dc2d38 CS
5a5578f Rename to StringUtil methods
207df74 CS
  • Loading branch information
Toflar committed Jun 7, 2021
1 parent e14b8a6 commit 5fa1341
Show file tree
Hide file tree
Showing 14 changed files with 221 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,20 +136,20 @@ protected function compile()
{
if ($objEvent->pageTitle)
{
$responseContext->setTitle($objEvent->pageTitle);
$responseContext->setTitle($objEvent->pageTitle); // Already stored decoded
}
elseif ($objEvent->title)
{
$responseContext->setTitle(strip_tags(StringUtil::stripInsertTags($objEvent->title)));
$responseContext->setTitle(StringUtil::inputEncodedToPlainText($objEvent->title));
}

if ($objEvent->description)
{
$responseContext->setMetaDescription($objEvent->description);
$responseContext->setMetaDescription(StringUtil::inputEncodedToPlainText($objEvent->description));
}
elseif ($objEvent->teaser)
{
$responseContext->setMetaDescription($this->prepareMetaDescription($objEvent->teaser));
$responseContext->setMetaDescription(StringUtil::htmlToPlainText($objEvent->teaser));
}

if ($objEvent->robots)
Expand Down
5 changes: 5 additions & 0 deletions core-bundle/src/Resources/contao/classes/Frontend.php
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,14 @@ public static function getMetaData($strData, $strLanguage)
* @param string $strText
*
* @return string
*
* @deprecated Deprecated since Contao 4.12, to be removed in Contao 5.0.
* Use StringUtil::htmlToPlainText() instead.
*/
protected function prepareMetaDescription($strText)
{
trigger_deprecation('contao/core-bundle', '4.12', 'Using "Contao\Frontend::prepareMetaDescription()" has been deprecated and will no longer work Contao 5.0. Use Contao\StringUtil::htmlToPlainText() instead.');

$strText = $this->replaceInsertTags($strText, false);
$strText = strip_tags($strText);
$strText = str_replace("\n", ' ', $strText);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -739,11 +739,11 @@ protected function doReplace($strBuffer, $blnCache)
switch ($elements[1])
{
case 'pageTitle':
$arrCache[$strTag] = $responseContext->getTitle();
$arrCache[$strTag] = htmlspecialchars($responseContext->getTitle());
break;

case 'description':
$arrCache[$strTag] = $responseContext->getMetaDescription();
$arrCache[$strTag] = htmlspecialchars($responseContext->getMetaDescription());
break;
}
}
Expand Down
84 changes: 84 additions & 0 deletions core-bundle/src/Resources/contao/library/Contao/StringUtil.php
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,90 @@ public static function ampersand($strString, $blnEncode=true): string
{
return preg_replace('/&(amp;)?/i', ($blnEncode ? '&' : '&'), $strString);
}

/**
* Convert an input-encoded string back to the raw UTF-8 value it originated from
*
* It handles all Contao input encoding specifics like basic entities and encoded entities.
*/
public static function revertInputEncoding(string $strValue): string
{
$strValue = static::restoreBasicEntities($strValue);
$strValue = static::decodeEntities($strValue);

// Ensure valid UTF-8
if (preg_match('//u', $strValue) !== 1)
{
$substituteCharacter = mb_substitute_character();
mb_substitute_character(0xFFFD);

$strValue = mb_convert_encoding($strValue, 'UTF-8', 'UTF-8');

mb_substitute_character($substituteCharacter);
}

return $strValue;
}

/**
* Convert an input-encoded string to plain text UTF-8
*
* Strips or replaces insert tags, strips HTML tags, decodes entities, escapes insert tag braces.
*
* @see StringUtil::revertInputEncoding()
*
* @param bool $blnRemoveInsertTags True to remove insert tags instead of replacing them
*/
public static function inputEncodedToPlainText(string $strValue, bool $blnRemoveInsertTags = false): string
{
if ($blnRemoveInsertTags)
{
$strValue = static::stripInsertTags($strValue);
}
else
{
$strValue = Controller::replaceInsertTags($strValue, false);
}

$strValue = strip_tags($strValue);
$strValue = static::revertInputEncoding($strValue);
$strValue = str_replace(array('{{', '}}'), array('[{]', '[}]'), $strValue);

return $strValue;
}

/**
* Convert an HTML string to plain text with normalized white space
*
* It handles all Contao input encoding specifics like insert tags, basic
* entities and encoded entities and is meant to be used with content from
* fields that have the allowHtml flag enabled.
*
* @see StringUtil::inputEncodedToPlainText()
*
* @param bool $blnRemoveInsertTags True to remove insert tags instead of replacing them
*/
public static function htmlToPlainText(string $strValue, bool $blnRemoveInsertTags = false): string
{
if (!$blnRemoveInsertTags)
{
$strValue = Controller::replaceInsertTags($strValue, false);
}

// Add new lines before and after block level elements
$strValue = preg_replace(
array('/[\r\n]+/', '/<\/?(?:br|blockquote|div|dl|figcaption|figure|footer|h\d|header|hr|li|p|pre|tr)\b/i'),
array(' ', "\n$0"),
$strValue
);

$strValue = static::inputEncodedToPlainText($strValue, true);

// Remove duplicate line breaks and spaces
$strValue = trim(preg_replace(array('/[^\S\n]+/', '/\s*\n\s*/'), array(' ', "\n"), $strValue));

return $strValue;
}
}

class_alias(StringUtil::class, 'StringUtil');
4 changes: 2 additions & 2 deletions core-bundle/src/Resources/contao/models/PageModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,11 @@ public function __set($strKey, $varValue)
switch ($strKey)
{
case 'pageTitle':
$responseContext->setTitle($varValue);
$responseContext->setTitle(StringUtil::inputEncodedToPlainText($varValue ?? ''));
break;

case 'description':
$responseContext->setMetaDescription($varValue);
$responseContext->setMetaDescription(StringUtil::inputEncodedToPlainText($varValue ?? ''));
break;

case 'robots':
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/Resources/contao/modules/ModuleArticle.php
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,11 @@ protected function compile()

if ($responseContext instanceof WebpageResponseContext)
{
$responseContext->setTitle(strip_tags(StringUtil::stripInsertTags($this->title)));
$responseContext->setTitle(StringUtil::inputEncodedToPlainText($this->title ?? ''));

if ($this->teaser)
{
$responseContext->setMetaDescription($this->prepareMetaDescription($this->teaser));
$responseContext->setMetaDescription(StringUtil::htmlToPlainText($this->teaser));
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/Resources/contao/pages/PageRegular.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ protected function prepare($objPage)

// Set the page title and description AFTER the modules have been generated
$this->Template->mainTitle = $objPage->rootPageTitle;
$this->Template->pageTitle = $responseContext->getTitle();
$this->Template->pageTitle = htmlspecialchars($responseContext->getTitle());

// Remove shy-entities (see #2709)
$this->Template->mainTitle = str_replace('[-]', '', $this->Template->mainTitle);
Expand All @@ -221,7 +221,7 @@ protected function prepare($objPage)

// Assign the title and description
$this->Template->title = strip_tags($this->replaceInsertTags($objLayout->titleTag));
$this->Template->description = str_replace(array("\n", "\r", '"'), array(' ', '', ''), $responseContext->getMetaDescription());
$this->Template->description = htmlspecialchars($responseContext->getMetaDescription());

// Body onload and body classes
$this->Template->onload = trim($objLayout->onload);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<?php $this->block('meta'); ?>
<meta name="robots" content="<?= $this->robots ?>">
<meta name="description" content="<?= $this->description ?>">
<meta name="description" content="<?= Contao\StringUtil::substr($this->description, 320) ?>">
<meta name="generator" content="Contao Open Source CMS">
<?php $this->endblock(); ?>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@
namespace Contao\CoreBundle\Routing\ResponseContext;

use Contao\PageModel;
use Contao\StringUtil;

class ContaoWebpageResponseContext extends WebpageResponseContext
{
public function __construct(PageModel $pageModel)
{
$title = $pageModel->pageTitle ?: StringUtil::inputEncodedToPlainText($pageModel->title ?: '');

$this
->setTitle($pageModel->pageTitle ?: $pageModel->title ?: '')
->setMetaDescription(str_replace(["\n", "\r", '"'], [' ', '', ''], $pageModel->description ?: ''))
->setTitle($title ?: '')
->setMetaDescription(StringUtil::inputEncodedToPlainText($pageModel->description ?: ''))
;

if ($pageModel->robots) {
Expand Down
92 changes: 92 additions & 0 deletions core-bundle/tests/Contao/StringUtilTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@

namespace Contao\CoreBundle\Tests\Contao;

use Contao\CoreBundle\Security\Authentication\Token\TokenChecker;
use Contao\CoreBundle\Tests\TestCase;
use Contao\Input;
use Contao\StringUtil;
use Contao\System;
use Psr\Log\NullLogger;
use Symfony\Component\DependencyInjection\ContainerBuilder;
use Symfony\Component\HttpFoundation\RequestStack;

class StringUtilTest extends TestCase
{
Expand All @@ -26,6 +29,10 @@ protected function setUp(): void

$container = new ContainerBuilder();
$container->setParameter('kernel.project_dir', $this->getFixturesDir());
$container->setParameter('kernel.cache_dir', $this->getFixturesDir().'/cache');
$container->setParameter('kernel.debug', false);
$container->set('request_stack', new RequestStack());
$container->set('contao.security.token_checker', $this->createMock(TokenChecker::class));
$container->set('monolog.logger.contao', new NullLogger());

System::setContainer($container);
Expand Down Expand Up @@ -142,4 +149,89 @@ public function trimsplitProvider(): \Generator
['foo', 'bar'],
];
}

/**
* @dataProvider getRevertInputEncoding
*/
public function testRevertInputEncoding(string $source, string $expected = null): void
{
Input::setGet('value', $source);
$inputEncoded = Input::get('value');
Input::setGet('value', null);

// Test input encoding round trip
$this->assertSame($expected ?? $source, StringUtil::revertInputEncoding($inputEncoded));
}

public function getRevertInputEncoding(): \Generator
{
yield ['foobar'];
yield ['foo{{email::test@example.com}}bar'];
yield ['{{date::...}}'];
yield ["<>&\u{A0}<>&\u{A0}"];
yield ['I <3 Contao'];
yield ['Remove unexpected <span>HTML tags'];
yield ['Keep non-HTML <tags> intact'];
yield ['Basic [&] entities [nbsp]', "Basic & entities \u{A0}"];
yield ["Cont\xE4o invalid UTF-8", "Cont\u{FFFD}o invalid UTF-8"];
}

/**
* @dataProvider getInputEncodedToPlainText
*/
public function testInputEncodedToPlainText(string $source, string $expected, bool $removeInsertTags = false): void
{
$this->assertSame($expected, StringUtil::inputEncodedToPlainText($source, $removeInsertTags));

Input::setGet('value', $expected);
$inputEncoded = Input::get('value');
Input::setGet('value', null);

// Test input encoding round trip
$this->assertSame($expected, StringUtil::inputEncodedToPlainText($inputEncoded, true));
$this->assertSame($expected, StringUtil::inputEncodedToPlainText($inputEncoded, false));
}

public function getInputEncodedToPlainText(): \Generator
{
yield ['foobar', 'foobar'];
yield ['foo{{email::test@example.com}}bar', 'footest@example.combar'];
yield ['foo{{email::test@example.com}}bar', 'foobar', true];
yield ['{{date::...}}', '...'];
yield ['{{date::...}}', '', true];
yield ["&lt;&#62;&\u{A0}[lt][gt][&][nbsp]", "<>&\u{A0}<>&\u{A0}", true];
yield ['I &lt;3 Contao', 'I <3 Contao'];
yield ['Remove unexpected <span>HTML tags', 'Remove unexpected HTML tags'];
yield ['Keep non-HTML &lt;tags&#62; intact', 'Keep non-HTML <tags> intact'];
yield ["Cont\xE4o invalid UTF-8", "Cont\u{FFFD}o invalid UTF-8"];
yield ['&#123;&#123;date&#125;&#125;', '[{]date[}]'];
}

/**
* @dataProvider getHtmlToPlainText
*/
public function testHtmlToPlainText(string $source, string $expected, bool $removeInsertTags = false): void
{
$this->assertSame($expected, StringUtil::htmlToPlainText($source, $removeInsertTags));

Input::setPost('value', str_replace(['&#123;&#123;', '&#125;&#125;'], ['[{]', '[}]'], $source));
$inputXssStripped = str_replace(['&#123;&#123;', '&#125;&#125;'], ['{{', '}}'], Input::postHtml('value', true));
Input::setPost('value', null);

$this->assertSame($expected, StringUtil::htmlToPlainText($inputXssStripped, $removeInsertTags));
}

public function getHtmlToPlainText(): \Generator
{
yield from $this->getInputEncodedToPlainText();

yield ['foo<br>bar{{br}}baz', "foo\nbar\nbaz"];
yield [" \t\r\nfoo \t\r\n \r\n\t bar \t\r\n", 'foo bar'];
yield [" \t\r\n<br>foo \t<br>\r\n \r\n\t<br> bar <br>\t\r\n", "foo\nbar"];

yield [
'<h1>Headline</h1>Text<ul><li>List 1</li><li>List 2</li></ul><p>Inline<span>text</span> and <a>link</a></p><div><div><div>single newline',
"Headline\nText\nList 1\nList 2\nInlinetext and link\nsingle newline",
];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,17 @@ public function testResponseContext(): void
$this->assertSame('My description', $context->getMetaDescription());
$this->assertSame('noindex,nofollow', $context->getMetaRobots());
}

public function testDecodingAndCleanup(): void
{
/** @var PageModel $pageModel */
$pageModel = $this->mockClassWithProperties(PageModel::class);
$pageModel->title = 'We went from Alpha &#62; Omega ';
$pageModel->description = 'My description <strong>contains</strong> HTML<br>.';

$context = new ContaoWebpageResponseContext($pageModel);

$this->assertSame('We went from Alpha > Omega ', $context->getTitle());
$this->assertSame('My description contains HTML.', $context->getMetaDescription());
}
}
8 changes: 4 additions & 4 deletions faq-bundle/src/Resources/contao/modules/ModuleFaqReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,20 @@ protected function compile()
{
if ($objFaq->pageTitle)
{
$responseContext->setTitle($objFaq->pageTitle);
$responseContext->setTitle($objFaq->pageTitle); // Already stored decoded
}
elseif ($objFaq->question)
{
$responseContext->setTitle(strip_tags(StringUtil::stripInsertTags($objFaq->question)));
$responseContext->setTitle(StringUtil::inputEncodedToPlainText($objFaq->question));
}

if ($objFaq->description)
{
$responseContext->setMetaDescription($objFaq->description);
$responseContext->setMetaDescription(StringUtil::inputEncodedToPlainText($objFaq->description));
}
elseif ($objFaq->question)
{
$responseContext->setMetaDescription($this->prepareMetaDescription($objFaq->question));
$responseContext->setMetaDescription(StringUtil::inputEncodedToPlainText($objFaq->question));
}

if ($objFaq->robots)
Expand Down

0 comments on commit 5fa1341

Please sign in to comment.