Skip to content

Commit

Permalink
Merge pull request from GHSA-9jh5-qf84-x6pr
Browse files Browse the repository at this point in the history
* Ensure confidential headers are not leaked to external URIs when crawling

* CS

---------

Co-authored-by: Leo Feyer <1192057+leofeyer@users.noreply.github.com>
  • Loading branch information
Toflar and leofeyer committed Apr 9, 2024
1 parent 878d28d commit 79b7620
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 22 deletions.
1 change: 1 addition & 0 deletions core-bundle/config/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ services:
- '@database_connection'
- '@contao.framework'
- '@contao.routing.content_url_generator'
- '@request_stack'

contao.crawl.escargot.search_index_subscriber:
class: Contao\CoreBundle\Crawl\Escargot\Subscriber\SearchIndexSubscriber
Expand Down
5 changes: 1 addition & 4 deletions core-bundle/contao/classes/Crawl.php
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,9 @@ public function run()
}
else
{
// TODO: we need a way to authenticate with a token instead of our own cookie
$session = System::getContainer()->get('request_stack')->getSession();
$clientOptions = array('headers' => array('Cookie' => sprintf('%s=%s', $session->getName(), $session->getId())));

// Closing the session is necessary here as otherwise we run into our own session lock
// TODO: we need a way to authenticate with a token instead of our own cookie
$session->save();
}
}
else
Expand Down
93 changes: 82 additions & 11 deletions core-bundle/src/Crawl/Escargot/Factory.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
use Contao\PageModel;
use Doctrine\DBAL\Connection;
use Nyholm\Psr7\Uri;
use Psr\Http\Message\UriInterface;
use Symfony\Component\HttpClient\HttpClient;
use Symfony\Component\HttpClient\ScopingHttpClient;
use Symfony\Component\HttpFoundation\RequestStack;
use Symfony\Component\Routing\Exception\ExceptionInterface;
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
use Symfony\Component\Uid\Uuid;
Expand All @@ -43,15 +46,24 @@ class Factory
private array $subscribers = [];

/**
* @param array<string> $additionalUris
* @var \Closure(array<string, mixed>): HttpClientInterface|null
*/
private readonly \Closure|null $httpClientFactory;

/**
* @param array<string> $additionalUris
* @param \Closure(array<string, mixed>): HttpClientInterface|null $httpClientFactory
*/
public function __construct(
private readonly Connection $connection,
private readonly ContaoFramework $framework,
private readonly ContentUrlGenerator $urlGenerator,
private readonly RequestStack $requestStack,
private readonly array $additionalUris = [],
private readonly array $defaultHttpClientOptions = [],
\Closure|null $httpClientFactory = null,
) {
$this->httpClientFactory = $httpClientFactory ?? static fn (array $defaultOptions) => HttpClient::create($defaultOptions);
}

public function addSubscriber(EscargotSubscriberInterface $subscriber): self
Expand Down Expand Up @@ -162,18 +174,77 @@ public function createFromJobId(string $jobId, QueueInterface $queue, array $sel

private function createHttpClient(array $options = []): HttpClientInterface
{
return HttpClient::create(
array_merge_recursive(
[
'headers' => [
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent' => self::USER_AGENT,
],
'max_duration' => 10, // Ignore requests that take longer than 10 seconds
$options = array_merge_recursive(
[
'headers' => [
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent' => self::USER_AGENT,
],
array_merge_recursive($this->getDefaultHttpClientOptions(), $options),
),
'max_duration' => 10, // Ignore requests that take longer than 10 seconds
],
array_merge_recursive($this->getDefaultHttpClientOptions(), $options),
);

// Make sure confidential headers force a scoped client so external domains do
// not leak data
$cleanOptions = $this->cleanOptionsFromConfidentialData($options);

if ($options === $cleanOptions) {
return ($this->httpClientFactory)($options);
}

$scopedOptionsByRegex = [];

// All options including the confidential headers for our root page collection
foreach ($this->getRootPageUriCollection()->all() as $rootPageUri) {
$scopedOptionsByRegex[preg_quote($this->getOriginFromUri($rootPageUri))] = $options;
}

// Closing the session is necessary here as otherwise we might run into our own
// session lock
if ($this->requestStack->getMainRequest()?->hasSession()) {
$this->requestStack->getMainRequest()->getSession()->save();
}

return new ScopingHttpClient(($this->httpClientFactory)($cleanOptions), $scopedOptionsByRegex);
}

private function getOriginFromUri(UriInterface $uri): string
{
$origin = $uri->getScheme().'://'.$uri->getHost();

if ($uri->getPort()) {
$origin .= ':'.$uri->getPort();
}

return $origin.'/';
}

private function cleanOptionsFromConfidentialData(array $options): array
{
$cleanOptions = [];

foreach ($options as $k => $v) {
if ('headers' === $k) {
foreach ($v as $header => $value) {
if (\in_array(strtolower($header), ['authorization', 'cookie'], true)) {
continue;
}

$cleanOptions['headers'][$header] = $value;
}

continue;
}

if ('basic_auth' === $k || 'bearer_auth' === $k) {
continue;
}

$cleanOptions[$k] = $v;
}

return $cleanOptions;
}

private function registerDefaultSubscribers(Escargot $escargot): void
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/DependencyInjection/ContaoCoreExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ private function handleCrawlConfig(array $config, ContainerBuilder $container):
}

$factory = $container->getDefinition('contao.crawl.escargot.factory');
$factory->setArgument(3, $config['crawl']['additional_uris']);
$factory->setArgument(4, $config['crawl']['default_http_client_options']);
$factory->setArgument(4, $config['crawl']['additional_uris']);
$factory->setArgument(5, $config['crawl']['default_http_client_options']);
}

/**
Expand Down
88 changes: 85 additions & 3 deletions core-bundle/tests/Crawl/Escargot/FactoryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
use Contao\PageModel;
use Doctrine\DBAL\Connection;
use Nyholm\Psr7\Uri;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\MockResponse;
use Symfony\Component\HttpClient\ScopingHttpClient;
use Symfony\Component\HttpFoundation\RequestStack;
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
use Terminal42\Escargot\BaseUriCollection;
use Terminal42\Escargot\Queue\InMemoryQueue;
Expand All @@ -39,7 +43,7 @@ public function testHandlesSubscribersCorrectly(): void
->willReturn('subscriber-2')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class));
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class), new RequestStack());
$factory->addSubscriber($subscriber1);
$factory->addSubscriber($subscriber2);

Expand Down Expand Up @@ -71,6 +75,7 @@ public function testBuildsUriCollectionsCorrectly(): void
$this->createMock(Connection::class),
$this->mockContaoFramework([PageModel::class => $pageModelAdapter]),
$urlGenerator,
new RequestStack(),
['https://example.com'],
);

Expand All @@ -93,7 +98,7 @@ public function testCreatesEscargotCorrectlyWithNewJobId(): void
->willReturn('subscriber-1')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class));
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class), new RequestStack());
$factory->addSubscriber($subscriber1);

$uriCollection = new BaseUriCollection([new Uri('https://contao.org')]);
Expand All @@ -116,7 +121,7 @@ public function testCreatesEscargotCorrectlyWithExistingJobId(): void
->willReturn('subscriber-1')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class));
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), $this->createMock(ContentUrlGenerator::class), new RequestStack());
$factory->addSubscriber($subscriber1);

$queue = new InMemoryQueue();
Expand All @@ -132,4 +137,81 @@ public function testCreatesEscargotCorrectlyWithExistingJobId(): void
$escargot = $factory->createFromJobId($jobId, $queue, ['subscriber-8']);
$this->assertSame(Factory::USER_AGENT, $escargot->getUserAgent());
}

public function testScopesConfidentialHeadersAutomatically(): void
{
$expectedRequests = [
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://contao.org/robots.txt', $url);
$this->assertContains('Cookie: Confidential', $options['headers']);
$this->assertContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://contao.de/robots.txt', $url);
$this->assertContains('Cookie: Confidential', $options['headers']);
$this->assertContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://www.foreign-domain.com/robots.txt', $url);
$this->assertNotContains('Cookie: Confidential', $options['headers']);
$this->assertNotContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
];

$mockClient = new MockHttpClient($expectedRequests);
$clientFactory = static fn (array $defaultOptions) => $mockClient;

$rootPage1 = $this->mockClassWithProperties(PageModel::class, ['dns' => 'contao.org']);
$rootPage2 = $this->mockClassWithProperties(PageModel::class, ['dns' => 'contao.de']);

$urlGenerator = $this->createMock(ContentUrlGenerator::class);
$urlGenerator
->method('generate')
->willReturnCallback(static fn (PageModel $rootPage) => 'https://'.$rootPage->dns)
;

$pageModelAdapter = $this->mockAdapter(['findPublishedRootPages']);
$pageModelAdapter
->method('findPublishedRootPages')
->willReturn([$rootPage1, $rootPage2])
;

$subscriber1 = $this->createMock(EscargotSubscriberInterface::class);
$subscriber1
->method('getName')
->willReturn('subscriber-1')
;

$factory = new Factory(
$this->createMock(Connection::class),
$this->mockContaoFramework([PageModel::class => $pageModelAdapter]),
$urlGenerator,
new RequestStack(),
['https://www.foreign-domain.com'],
[
'headers' => [
'Cookie' => 'Confidential',
],
'auth_basic' => 'username:password',
],
$clientFactory,
);

$factory->addSubscriber($subscriber1);

$escargot = $factory->create($factory->getCrawlUriCollection(), new InMemoryQueue(), ['subscriber-1']);
$escargot->crawl();

$this->assertSame(3, $mockClient->getRequestsCount());
$this->assertInstanceOf(ScopingHttpClient::class, $escargot->getClient());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ public function testSetsTheCrawlOptionsOnTheEscargotFactory(): void

$definition = $container->getDefinition('contao.crawl.escargot.factory');

$this->assertSame(['https://example.com'], $definition->getArgument(3));
$this->assertSame(['proxy' => 'http://localhost:7080', 'headers' => ['Foo' => 'Bar']], $definition->getArgument(4));
$this->assertSame(['https://example.com'], $definition->getArgument(4));
$this->assertSame(['proxy' => 'http://localhost:7080', 'headers' => ['Foo' => 'Bar']], $definition->getArgument(5));
}

public function testConfiguresTheBackupManagerCorrectly(): void
Expand Down

0 comments on commit 79b7620

Please sign in to comment.