Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the back end crawler configurable #6495

Merged
merged 10 commits into from
Nov 9, 2023
48 changes: 45 additions & 3 deletions core-bundle/contao/classes/Crawl.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ public function run()

$factory = System::getContainer()->get('contao.crawl.escargot.factory');
$subscriberNames = $factory->getSubscriberNames();

$subscribersWidget = $this->generateSubscribersWidget($subscriberNames);
$maxDepthWidget = $this->generateMaxDepthWidget();
$memberWidget = null;

if (System::getContainer()->getParameter('contao.search.index_protected'))
Expand All @@ -73,6 +75,7 @@ public function run()
$template = new BackendTemplate('be_crawl');
$template->isActive = $this->isActive();
$template->subscribersWidget = $subscribersWidget;
$template->maxDepthWidget = $maxDepthWidget;
$template->memberWidget = $memberWidget;

if (!$this->isActive())
Expand Down Expand Up @@ -188,13 +191,17 @@ public function run()
Controller::redirect(str_replace('&jobId=' . $jobId, '', Environment::get('requestUri')));
}

$concurrency = System::getContainer()->getParameter('contao.backend.crawl_concurrency');

// Configure with sane defaults for the back end (maybe we should make this configurable one day)
$escargot = $escargot
->withConcurrency(5)
->withMaxDepth(10)
->withMaxRequests(20)
->withConcurrency($concurrency)
->withMaxDepth($maxDepthWidget->value)
->withMaxRequests(20 * $concurrency)
->withLogger($this->createLogger($factory, $activeSubscribers, $jobId, $debugLogPath));

$template->hint = sprintf($GLOBALS['TL_LANG']['tl_maintenance']['crawlHint'], $concurrency, 'contao.backend.crawl_concurrency');

if (Environment::get('isAjaxRequest'))
{
// Start crawling
Expand Down Expand Up @@ -360,6 +367,41 @@ private function generateSubscribersWidget(array $subscriberNames): Widget
return $widget;
}

private function generateMaxDepthWidget(): Widget
{
$name = 'crawl_depth';

$widget = new SelectMenu();
$widget->id = $name;
$widget->name = $name;
$widget->label = $GLOBALS['TL_LANG']['tl_maintenance']['crawlDepth'][0];
$widget->setInputCallback($this->getInputCallback($name));

$options = array();

for ($i = 3; $i <= 10; ++$i)
{
$options[$i] = array(
'value' => $i,
'label' => $i,
);
}

$widget->options = $options;

if ($this->isActive())
{
$widget->validate();

if ($widget->hasErrors())
{
$this->valid = false;
}
}

return $widget;
}

private function generateMemberWidget(): Widget
{
$name = 'crawl_member';
Expand Down
9 changes: 9 additions & 0 deletions core-bundle/contao/languages/en/tl_maintenance.xlf
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,21 @@
<trans-unit id="tl_maintenance.crawlSubscribers.1">
<source>The crawler crawls all URLs it finds. Here you can decide what to do with these results.</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlDepth.0">
<source>Maximum depth</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlDepth.1">
<source>The maximum depth to crawl.</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlMember.0">
<source>Front end member</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlMember.1">
<source>Automatically log in a front end member to index protected pages.</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlHint">
<source>Your website is currently crawled with %d concurrent requests. If your server can handle more than %1$d concurrent requests, ask your system administrator to increase the &lt;code&gt;%s&lt;/code&gt; setting in the system configuration to speed up the crawling process.</source>
</trans-unit>
<trans-unit id="tl_maintenance.crawlWaitToBeFinished">
<source>The crawler is currently working. Please wait for it to finish to see the results.</source>
</trans-unit>
Expand Down
15 changes: 11 additions & 4 deletions core-bundle/contao/templates/backend/be_crawl.html5
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
</div>
<p class="progress-count">0 / 0</p>
<div class="results running">
<p class="crawl-hint show-when-running"><?= $this->hint ?></p>
<?php foreach ($this->activeSubscribers as $subscriber): ?>
<h3><?= $this->trans('tl_maintenance.crawlSubscriberNames.'.$subscriber->getName()) ?></h3>
<div class="result" data-subscriber="<?= $subscriber->getName() ?>">
Expand All @@ -31,22 +32,28 @@
<input type="hidden" name="do" value="maintenance">
<input type="hidden" name="act" value="crawl">
<input type="hidden" name="rt" value="<?= $this->requestToken ?>">
<div class="tl_tbox">
<div>
<fieldset class="tl_tbox">
<div class="widget">
<?= $this->subscribersWidget->parse() ?>
<?php if (!$this->subscribersWidget->hasErrors()): ?>
<p class="tl_help tl_tip"><?= $this->trans('tl_maintenance.crawlSubscribers.1') ?></p>
<?php endif; ?>
</div>
<div class="widget w50">
<?= $this->maxDepthWidget->parse() ?>
<?php if (!$this->maxDepthWidget->hasErrors()): ?>
<p class="tl_help tl_tip"><?= $this->trans('tl_maintenance.crawlDepth.1') ?></p>
<?php endif; ?>
</div>
<?php if ($this->memberWidget): ?>
<div>
<div class="widget w50 clr">
<?= $this->memberWidget->parse() ?>
<?php if (!$this->memberWidget->hasErrors()): ?>
<p class="tl_help tl_tip"><?= $this->trans('tl_maintenance.crawlMember.1') ?></p>
<?php endif; ?>
</div>
<?php endif; ?>
</div>
</fieldset>
</div>
<div class="tl_submit_container">
<button type="submit" class="tl_submit"><?= $this->trans('tl_maintenance.startCrawling') ?></button>
Expand Down
2 changes: 2 additions & 0 deletions core-bundle/contao/themes/flexible/backend.574019e7.css

Large diffs are not rendered by default.

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions core-bundle/contao/themes/flexible/backend.eea42749.css

This file was deleted.

This file was deleted.

2 changes: 1 addition & 1 deletion core-bundle/contao/themes/flexible/entrypoints.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"entrypoints": {
"backend": {
"css": [
"/system/themes/flexible/backend.eea42749.css"
"/system/themes/flexible/backend.574019e7.css"
]
},
"confirm": {
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/contao/themes/flexible/manifest.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"backend.css": "/system/themes/flexible/backend.eea42749.css",
"backend.css": "/system/themes/flexible/backend.574019e7.css",
"confirm.css": "/system/themes/flexible/confirm.5231eaa5.css",
"conflict.css": "/system/themes/flexible/conflict.41a64ff6.css",
"diff.css": "/system/themes/flexible/diff.028ed04c.css",
Expand All @@ -8,7 +8,7 @@
"popup.css": "/system/themes/flexible/popup.751f0537.css",
"tinymce.css": "/system/themes/flexible/tinymce.e5009f94.css",
"tinymce-dark.css": "/system/themes/flexible/tinymce-dark.596023db.css",
"backend.eea42749.css.map": "/system/themes/flexible/backend.eea42749.css.map",
"backend.574019e7.css.map": "/system/themes/flexible/backend.574019e7.css.map",
"confirm.5231eaa5.css.map": "/system/themes/flexible/confirm.5231eaa5.css.map",
"conflict.41a64ff6.css.map": "/system/themes/flexible/conflict.41a64ff6.css.map",
"diff.028ed04c.css.map": "/system/themes/flexible/diff.028ed04c.css.map",
Expand Down
15 changes: 10 additions & 5 deletions core-bundle/contao/themes/flexible/styles/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -834,15 +834,14 @@ h2.sub_headline {
}

#tl_crawl .tl_tbox {
margin-top: 0;
padding-top: 0;
}

#tl_crawl .tl_tbox > div {
max-width: 562px;
padding-right: 0;
padding-left: 0;
}

#tl_crawl .tl_checkbox_container {
margin-top: 9px;
margin-top: 6px;
}

#tl_crawl .inner {
Expand Down Expand Up @@ -891,6 +890,11 @@ h2.sub_headline {
margin-bottom: 6px;
}

#tl_crawl .crawl-hint {
margin-top: -2px;
line-height: 1.3;
}

#tl_crawl .subscriber-log {
display: none;
padding: 5px 0;
Expand Down Expand Up @@ -2646,6 +2650,7 @@ fieldset.tl_tbox, fieldset.tl_box {
border-top: none;
border-left: 0;
border-right: 0;
margin-inline: 0;
}

fieldset.tl_tbox.nolegend, fieldset.tl_box.nolegend {
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/src/Command/CrawlCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ protected function configure(): void
->addArgument('job', InputArgument::OPTIONAL, 'An optional existing job ID')
->addOption('queue', null, InputArgument::OPTIONAL, 'Queue to use ("memory" or "doctrine")', 'memory')
->addOption('subscribers', 's', InputOption::VALUE_REQUIRED | InputOption::VALUE_IS_ARRAY, 'A list of subscribers to enable', $this->escargotFactory->getSubscriberNames())
->addOption('concurrency', 'c', InputOption::VALUE_REQUIRED, 'The number of concurrent requests that are going to be executed', '10')
->addOption('concurrency', 'c', InputOption::VALUE_REQUIRED, 'The number of concurrent requests that are going to be executed', '5')
->addOption('delay', null, InputOption::VALUE_REQUIRED, 'The number of microseconds to wait between requests (0 = throttling is disabled)', '0')
->addOption('max-requests', null, InputOption::VALUE_REQUIRED, 'The maximum number of requests to execute (0 = no limit)', '0')
->addOption('max-depth', null, InputOption::VALUE_REQUIRED, 'The maximum depth to crawl for (0 = no limit)', '10')
->addOption('max-depth', null, InputOption::VALUE_REQUIRED, 'The maximum depth to crawl for (0 = no limit)', '3')
->addOption('no-progress', null, InputOption::VALUE_NONE, 'Disables the progress bar output')
->addOption('enable-debug-csv', null, InputOption::VALUE_NONE, 'Writes the crawl debug log into a separate CSV file')
->addOption('debug-csv-path', null, InputOption::VALUE_REQUIRED, 'The path of the debug log CSV file', Path::join(getcwd(), 'crawl_debug_log.csv'))
Expand Down
5 changes: 5 additions & 0 deletions core-bundle/src/DependencyInjection/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,11 @@ static function (array $attributes): array {
->example('/admin')
->defaultValue('/contao')
->end()
->integerNode('crawl_concurrency')
->info('The number of concurrent requests that are executed. Defaults to 5.')
->min(1)
->defaultValue(5)
->end()
->end()
;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ public function load(array $configs, ContainerBuilder $container): void
$container->setParameter('contao.backend.custom_js', $config['backend']['custom_js']);
$container->setParameter('contao.backend.badge_title', $config['backend']['badge_title']);
$container->setParameter('contao.backend.route_prefix', $config['backend']['route_prefix']);
$container->setParameter('contao.backend.crawl_concurrency', $config['backend']['crawl_concurrency']);
$container->setParameter('contao.intl.locales', $config['intl']['locales']);
$container->setParameter('contao.intl.enabled_locales', $config['intl']['enabled_locales']);
$container->setParameter('contao.intl.countries', $config['intl']['countries']);
Expand Down
4 changes: 2 additions & 2 deletions core-bundle/tests/Command/CrawlCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ public function testOptionsConfigureEscargotCorrectly(): void
$code = $tester->execute([]);

$this->assertSame(0, $code);
$this->assertSame(10, $command->getEscargot()->getConcurrency());
$this->assertSame(5, $command->getEscargot()->getConcurrency());
$this->assertSame(0, $command->getEscargot()->getRequestDelay());
$this->assertSame(0, $command->getEscargot()->getMaxRequests());
$this->assertSame(10, $command->getEscargot()->getMaxDepth());
$this->assertSame(3, $command->getEscargot()->getMaxDepth());

// Test options
$escargot = Escargot::create($this->getBaseUriCollection(), new InMemoryQueue())->withHttpClient($client);
Expand Down