Skip to content

Commit

Permalink
Merge pull request from GHSA-9jh5-qf84-x6pr
Browse files Browse the repository at this point in the history
* Ensure confidential headers are not leaked to external URIs when crawling

* CS

---------

Co-authored-by: Leo Feyer <1192057+leofeyer@users.noreply.github.com>
  • Loading branch information
Toflar and leofeyer committed Apr 9, 2024
1 parent b794e14 commit 73a2770
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 22 deletions.
92 changes: 81 additions & 11 deletions core-bundle/src/Crawl/Escargot/Factory.php
Expand Up @@ -17,7 +17,10 @@
use Contao\PageModel;
use Doctrine\DBAL\Connection;
use Nyholm\Psr7\Uri;
use Psr\Http\Message\UriInterface;
use Symfony\Component\HttpClient\HttpClient;
use Symfony\Component\HttpClient\ScopingHttpClient;
use Symfony\Component\HttpFoundation\RequestStack;
use Symfony\Component\Uid\Uuid;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Terminal42\Escargot\BaseUriCollection;
Expand All @@ -36,6 +39,7 @@ class Factory

private Connection $connection;
private ContaoFramework $framework;
private RequestStack $requestStack;
private array $defaultHttpClientOptions;

/**
Expand All @@ -48,12 +52,19 @@ class Factory
*/
private array $subscribers = [];

public function __construct(Connection $connection, ContaoFramework $framework, array $additionalUris = [], array $defaultHttpClientOptions = [])
/**
* @var \Closure(array<string, mixed>): HttpClientInterface|null
*/
private ?\Closure $httpClientFactory;

public function __construct(Connection $connection, ContaoFramework $framework, RequestStack $requestStack, array $additionalUris = [], array $defaultHttpClientOptions = [], \Closure $httpClientFactory = null)
{
$this->connection = $connection;
$this->framework = $framework;
$this->requestStack = $requestStack;
$this->additionalUris = $additionalUris;
$this->defaultHttpClientOptions = $defaultHttpClientOptions;
$this->httpClientFactory = $httpClientFactory ?? static fn (array $defaultOptions) => HttpClient::create($defaultOptions);
}

public function addSubscriber(EscargotSubscriberInterface $subscriber): self
Expand Down Expand Up @@ -163,18 +174,77 @@ public function createFromJobId(string $jobId, QueueInterface $queue, array $sel

private function createHttpClient(array $options = []): HttpClientInterface
{
return HttpClient::create(
array_merge_recursive(
[
'headers' => [
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent' => self::USER_AGENT,
],
'max_duration' => 10, // Ignore requests that take longer than 10 seconds
$options = array_merge_recursive(
[
'headers' => [
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent' => self::USER_AGENT,
],
array_merge_recursive($this->getDefaultHttpClientOptions(), $options)
)
'max_duration' => 10, // Ignore requests that take longer than 10 seconds
],
array_merge_recursive($this->getDefaultHttpClientOptions(), $options)
);

// Make sure confidential headers force a scoped client so external domains do not leak data
$cleanOptions = $this->cleanOptionsFromConfidentialData($options);

if ($options === $cleanOptions) {
return ($this->httpClientFactory)($options);
}

$scopedOptionsByRegex = [];

// All options including the confidential headers for our root page collection
foreach ($this->getRootPageUriCollection()->all() as $rootPageUri) {
$scopedOptionsByRegex[preg_quote($this->getOriginFromUri($rootPageUri))] = $options;
}

// Closing the session is necessary here as otherwise we might run into our own session lock
$request = $this->requestStack->getMainRequest();

if ($request && $request->hasSession()) {
$request->getSession()->save();
}

return new ScopingHttpClient(($this->httpClientFactory)($cleanOptions), $scopedOptionsByRegex);
}

private function getOriginFromUri(UriInterface $uri): string
{
$origin = $uri->getScheme().'://'.$uri->getHost();

if ($uri->getPort()) {
$origin .= ':'.$uri->getPort();
}

return $origin.'/';
}

private function cleanOptionsFromConfidentialData(array $options): array
{
$cleanOptions = [];

foreach ($options as $k => $v) {
if ('headers' === $k) {
foreach ($v as $header => $value) {
if (\in_array(strtolower($header), ['authorization', 'cookie'], true)) {
continue;
}

$cleanOptions['headers'][$header] = $value;
}

continue;
}

if ('basic_auth' === $k || 'bearer_auth' === $k) {
continue;
}

$cleanOptions[$k] = $v;
}

return $cleanOptions;
}

private function registerDefaultSubscribers(Escargot $escargot): void
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/DependencyInjection/ContaoCoreExtension.php
Expand Up @@ -277,8 +277,8 @@ private function handleCrawlConfig(array $config, ContainerBuilder $container):
}

$factory = $container->getDefinition('contao.crawl.escargot.factory');
$factory->setArgument(2, $config['crawl']['additional_uris']);
$factory->setArgument(3, $config['crawl']['default_http_client_options']);
$factory->setArgument(3, $config['crawl']['additional_uris']);
$factory->setArgument(4, $config['crawl']['default_http_client_options']);
}

/**
Expand Down
1 change: 1 addition & 0 deletions core-bundle/src/Resources/config/services.yml
Expand Up @@ -82,6 +82,7 @@ services:
arguments:
- '@database_connection'
- '@contao.framework'
- '@request_stack'

contao.crawl.escargot.search_index_subscriber:
class: Contao\CoreBundle\Crawl\Escargot\Subscriber\SearchIndexSubscriber
Expand Down
5 changes: 1 addition & 4 deletions core-bundle/src/Resources/contao/classes/Crawl.php
Expand Up @@ -125,12 +125,9 @@ public function run()
}
else
{
// TODO: we need a way to authenticate with a token instead of our own cookie
$session = System::getContainer()->get('session');
$clientOptions = array('headers' => array('Cookie' => sprintf('%s=%s', $session->getName(), $session->getId())));

// Closing the session is necessary here as otherwise we run into our own session lock
// TODO: we need a way to authenticate with a token instead of our own cookie
$session->save();
}
}
else
Expand Down
90 changes: 87 additions & 3 deletions core-bundle/tests/Crawl/Escargot/FactoryTest.php
Expand Up @@ -18,6 +18,10 @@
use Contao\PageModel;
use Doctrine\DBAL\Connection;
use Nyholm\Psr7\Uri;
use Symfony\Component\HttpClient\MockHttpClient;
use Symfony\Component\HttpClient\Response\MockResponse;
use Symfony\Component\HttpClient\ScopingHttpClient;
use Symfony\Component\HttpFoundation\RequestStack;
use Terminal42\Escargot\BaseUriCollection;
use Terminal42\Escargot\Queue\InMemoryQueue;

Expand All @@ -37,7 +41,7 @@ public function testHandlesSubscribersCorrectly(): void
->willReturn('subscriber-2')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework());
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), new RequestStack());
$factory->addSubscriber($subscriber1);
$factory->addSubscriber($subscriber2);

Expand Down Expand Up @@ -65,6 +69,7 @@ public function testBuildsUriCollectionsCorrectly(): void
$factory = new Factory(
$this->createMock(Connection::class),
$this->mockContaoFramework([PageModel::class => $pageModelAdapter]),
new RequestStack(),
['https://example.com']
);

Expand All @@ -87,7 +92,7 @@ public function testCreatesEscargotCorrectlyWithNewJobId(): void
->willReturn('subscriber-1')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework());
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), new RequestStack());
$factory->addSubscriber($subscriber1);

$uriCollection = new BaseUriCollection([new Uri('https://contao.org')]);
Expand All @@ -110,7 +115,7 @@ public function testCreatesEscargotCorrectlyWithExistingJobId(): void
->willReturn('subscriber-1')
;

$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework());
$factory = new Factory($this->createMock(Connection::class), $this->mockContaoFramework(), new RequestStack());
$factory->addSubscriber($subscriber1);

$queue = new InMemoryQueue();
Expand All @@ -126,4 +131,83 @@ public function testCreatesEscargotCorrectlyWithExistingJobId(): void
$escargot = $factory->createFromJobId($jobId, $queue, ['subscriber-8']);
$this->assertSame(Factory::USER_AGENT, $escargot->getUserAgent());
}

public function testScopesConfidentialHeadersAutomatically(): void
{
$expectedRequests = [
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://contao.org/robots.txt', $url);
$this->assertContains('Cookie: Confidential', $options['headers']);
$this->assertContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://contao.de/robots.txt', $url);
$this->assertContains('Cookie: Confidential', $options['headers']);
$this->assertContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
function (string $method, string $url, array $options): MockResponse {
$this->assertSame('GET', $method);
$this->assertSame('https://www.foreign-domain.com/robots.txt', $url);
$this->assertNotContains('Cookie: Confidential', $options['headers']);
$this->assertNotContains('Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=', $options['headers']);

return new MockResponse();
},
];

$mockClient = new MockHttpClient($expectedRequests);
$clientFactory = static fn (array $defaultOptions) => $mockClient;

$rootPage1 = $this->createMock(PageModel::class);
$rootPage1
->method('getAbsoluteUrl')
->willReturn('https://contao.org')
;

$rootPage2 = $this->createMock(PageModel::class);
$rootPage2
->method('getAbsoluteUrl')
->willReturn('https://contao.de')
;

$pageModelAdapter = $this->mockAdapter(['findPublishedRootPages']);
$pageModelAdapter
->method('findPublishedRootPages')
->willReturn([$rootPage1, $rootPage2])
;

$subscriber1 = $this->createMock(EscargotSubscriberInterface::class);
$subscriber1
->method('getName')
->willReturn('subscriber-1')
;

$factory = new Factory(
$this->createMock(Connection::class),
$this->mockContaoFramework([PageModel::class => $pageModelAdapter]),
new RequestStack(),
['https://www.foreign-domain.com'],
[
'headers' => [
'Cookie' => 'Confidential',
],
'auth_basic' => 'username:password',
],
$clientFactory
);

$factory->addSubscriber($subscriber1);

$escargot = $factory->create($factory->getCrawlUriCollection(), new InMemoryQueue(), ['subscriber-1']);
$escargot->crawl();

$this->assertSame(3, $mockClient->getRequestsCount());
$this->assertInstanceOf(ScopingHttpClient::class, $escargot->getClient());
}
}
Expand Up @@ -338,8 +338,8 @@ public function testSetsTheCrawlOptionsOnTheEscargotFactory(): void

$definition = $container->getDefinition('contao.crawl.escargot.factory');

$this->assertSame(['https://example.com'], $definition->getArgument(2));
$this->assertSame(['proxy' => 'http://localhost:7080', 'headers' => ['Foo' => 'Bar']], $definition->getArgument(3));
$this->assertSame(['https://example.com'], $definition->getArgument(3));
$this->assertSame(['proxy' => 'http://localhost:7080', 'headers' => ['Foo' => 'Bar']], $definition->getArgument(4));
}

public function testConfiguresTheBackupManagerCorrectly(): void
Expand Down

0 comments on commit 73a2770

Please sign in to comment.