Skip to content

Commit

Permalink
Do not index preview URLs for searching
Browse files Browse the repository at this point in the history
  • Loading branch information
leofeyer committed Dec 8, 2020
1 parent 946940d commit 17d0e8a
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 1 deletion.
21 changes: 20 additions & 1 deletion core-bundle/src/EventListener/SearchIndexListener.php
Expand Up @@ -71,7 +71,26 @@ public function __invoke(TerminateEvent $event): void
return;
}

$document = Document::createFromRequestResponse($request, $event->getResponse());
$response = $event->getResponse();

// Do not index if the X-Robots-Tag header contains "noindex"
if (false !== strpos($response->headers->get('X-Robots-Tag', ''), 'noindex')) {
return;
}

$document = Document::createFromRequestResponse($request, $response);

try {
$robots = $document->getContentCrawler()->filterXPath('//head/meta[@name="robots"]')->first()->attr('content');

// Do not index if the meta robots tag contains "noindex"
if (false !== strpos($robots, 'noindex')) {
return;
}
} catch (\Exception $e) {
// No meta robots tag found
}

$lds = $document->extractJsonLdScripts();

// If there are no json ld scripts at all, this should not be handled by our indexer
Expand Down
19 changes: 19 additions & 0 deletions core-bundle/tests/EventListener/SearchIndexListenerTest.php
Expand Up @@ -121,5 +121,24 @@ public function getRequestResponse(): \Generator
false,
false,
];

$response = new Response('<html><body><script type="application/ld+json">{"@context":"https:\/\/contao.org\/","@type":"Page","pageId":2,"noSearch":false,"protected":false,"groups":[],"fePreview":false}</script></body></html>', 403);
$response->headers->set('X-Robots-Tag', 'noindex');

yield 'Should not be handled because the X-Robots-Tag header contains "noindex" ' => [
Request::create('/foobar'),
$response,
SearchIndexListener::FEATURE_DELETE | SearchIndexListener::FEATURE_INDEX,
false,
false,
];

yield 'Should not be handled because the meta robots tag contains "noindex" ' => [
Request::create('/foobar'),
new Response('<html><head><meta name="robots" content="noindex,nofollow"/></head><body><script type="application/ld+json">{"@context":"https:\/\/contao.org\/","@type":"Page","pageId":2,"noSearch":false,"protected":false,"groups":[],"fePreview":false}</script></body></html>', 403),
SearchIndexListener::FEATURE_DELETE | SearchIndexListener::FEATURE_INDEX,
false,
false,
];
}
}
3 changes: 3 additions & 0 deletions manager-bundle/src/Resources/skeleton/web/preview.php
Expand Up @@ -30,6 +30,9 @@
$kernel = ContaoKernel::fromRequest(\dirname(__DIR__), $request);
$response = $kernel->handle($request);

// Prevent preview URLs from being indexed
$response->headers->set('X-Robots-Tag', 'noindex');

// Force no-cache on all responses in the preview front controller
$response->headers->set('Cache-Control', 'no-store');

Expand Down

0 comments on commit 17d0e8a

Please sign in to comment.