Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Escargot 0.6 compat and skip broken link checker #1788

Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"symfony/var-dumper": "4.4.*",
"symfony/web-profiler-bundle": "4.4.*",
"symfony/yaml": "4.4.*",
"terminal42/escargot": "^0.5.1",
"terminal42/escargot": "^0.6.0",
"terminal42/service-annotation-bundle": "^1.0",
"toflar/psr6-symfony-http-cache-store": "^2.1",
"true/punycode": "^2.1",
Expand Down
2 changes: 1 addition & 1 deletion core-bundle/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
"symfony/twig-bundle": "4.4.*",
"symfony/var-dumper": "4.4.*",
"symfony/yaml": "4.4.*",
"terminal42/escargot": "^0.5.1",
"terminal42/escargot": "^0.6.0",
"terminal42/service-annotation-bundle": "^1.0",
"true/punycode": "^2.1",
"twig/twig": "^2.7",
Expand Down
12 changes: 8 additions & 4 deletions core-bundle/src/Crawl/Escargot/Factory.php
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,10 @@ public function getRootPageUriCollection(): BaseUriCollection
*/
public function create(BaseUriCollection $baseUris, QueueInterface $queue, array $selectedSubscribers, array $clientOptions = []): Escargot
{
$escargot = Escargot::create($baseUris, $queue, $this->createHttpClient($clientOptions));
$escargot = $escargot->withUserAgent(self::USER_AGENT);
$escargot = Escargot::create($baseUris, $queue)
->withHttpClient($this->createHttpClient($clientOptions))
->withUserAgent(self::USER_AGENT)
;

$this->registerDefaultSubscribers($escargot);
$this->registerSubscribers($escargot, $this->validateSubscribers($selectedSubscribers));
Expand All @@ -180,8 +182,10 @@ public function create(BaseUriCollection $baseUris, QueueInterface $queue, array
*/
public function createFromJobId(string $jobId, QueueInterface $queue, array $selectedSubscribers, array $clientOptions = []): Escargot
{
$escargot = Escargot::createFromJobId($jobId, $queue, $this->createHttpClient($clientOptions));
$escargot = $escargot->withUserAgent(self::USER_AGENT);
$escargot = Escargot::createFromJobId($jobId, $queue)
->withHttpClient($this->createHttpClient($clientOptions))
->withUserAgent(self::USER_AGENT)
;

$this->registerDefaultSubscribers($escargot);
$this->registerSubscribers($escargot, $this->validateSubscribers($selectedSubscribers));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class BrokenLinkCheckerSubscriber implements EscargotSubscriberInterface, Escarg
use LoggerAwareTrait;
use SubscriberLoggerTrait;

public const TAG_SKIP = 'skip-broken-link-checker';

/**
* @var TranslatorInterface
*/
Expand All @@ -55,6 +57,16 @@ public function getName(): string

public function shouldRequest(CrawlUri $crawlUri): string
{
if ($crawlUri->hasTag(self::TAG_SKIP)) {
$this->logWithCrawlUri(
$crawlUri,
LogLevel::DEBUG,
'Did not check because it was marked to be skipped using the data-skip-broken-link-checker attribute.'
);

return SubscriberInterface::DECISION_NEGATIVE;
}

// Only check URIs that are part of our base collection or were found on one
$fromBaseUriCollection = $this->escargot->getBaseUris()->containsHost($crawlUri->getUri()->getHost());

Expand Down
12 changes: 9 additions & 3 deletions core-bundle/tests/Command/CrawlCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public function testOptionsConfigureEscargotCorrectly(): void
$client = new MockHttpClient();

// Test defaults
$escargot = Escargot::create($this->createBaseUriCollection(), new InMemoryQueue(), $client);
$escargot = Escargot::create($this->createBaseUriCollection(), new InMemoryQueue())
->withHttpClient($client)
;
$escargotFactory = $this->createValidEscargotFactory($escargot);
$command = new CrawlCommand($escargotFactory, new Filesystem());

Expand All @@ -71,7 +73,9 @@ public function testOptionsConfigureEscargotCorrectly(): void
$this->assertSame(0, $command->getEscargot()->getMaxDepth());

// Test options
$escargot = Escargot::create($this->createBaseUriCollection(), new InMemoryQueue(), $client);
$escargot = Escargot::create($this->createBaseUriCollection(), new InMemoryQueue())
->withHttpClient($client)
;
$escargotFactory = $this->createValidEscargotFactory($escargot);
$command = new CrawlCommand($escargotFactory, new Filesystem());

Expand All @@ -92,7 +96,9 @@ public function testEmitsWarningIfLocalhostIsInCollection(): void

$baseUriCollection = new BaseUriCollection([new Uri('http://localhost')]);

$escargot = Escargot::create($baseUriCollection, new InMemoryQueue(), $client);
$escargot = Escargot::create($baseUriCollection, new InMemoryQueue())
->withHttpClient($client)
;
$escargotFactory = $this->createValidEscargotFactory($escargot, $baseUriCollection);
$command = new CrawlCommand($escargotFactory, new Filesystem());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ public function shouldRequestProvider(): \Generator
(new CrawlUri(new Uri('https://github.com'), 0, true)),
];

yield 'Test skips URIs that were marked to be skipped by the data attribue' => [
(new CrawlUri(new Uri('https://github.com/foobar'), 1, false, new Uri('https://github.com')))->addTag(BrokenLinkCheckerSubscriber::TAG_SKIP),
SubscriberInterface::DECISION_NEGATIVE,
LogLevel::DEBUG,
'Did not check because it was marked to be skipped using the data-skip-broken-link-checker attribute.',
(new CrawlUri(new Uri('https://github.com'), 0, true)),
];

yield 'Test requests if everything is okay' => [
(new CrawlUri(new Uri('https://contao.org/foobar'), 0)),
SubscriberInterface::DECISION_POSITIVE,
Expand Down