Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IBX-143: Refactored reindexing command to be DBAL 2.13-compatible #3094

Merged
merged 8 commits into from
Apr 9, 2021
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"hautelook/templated-uri-bundle": "^2.1",
"pagerfanta/pagerfanta": "^2.0",
"ocramius/proxy-manager": "^2.1",
"doctrine/dbal": "^2.13.0",
"doctrine/orm": "^2.7",
"doctrine/doctrine-bundle": "~1.6",
"liip/imagine-bundle": "^2.1",
Expand Down
135 changes: 33 additions & 102 deletions eZ/Bundle/EzPublishCoreBundle/Command/ReindexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
namespace eZ\Bundle\EzPublishCoreBundle\Command;

use eZ\Publish\Core\Base\Exceptions\InvalidArgumentException;
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
use eZ\Publish\Core\Search\Common\Indexer;
use eZ\Publish\Core\Search\Common\IncrementalIndexer;
use Doctrine\DBAL\Driver\Statement;
use eZ\Publish\SPI\Persistence\Content\Location\Handler as LocationHandler;
use eZ\Publish\SPI\Search\Content\IndexerGateway;
use Generator;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputInterface;
Expand All @@ -21,16 +22,12 @@
use Symfony\Component\Process\ProcessBuilder;
use RuntimeException;
use DateTime;
use PDO;

class ReindexCommand extends ContainerAwareCommand
{
/** @var \eZ\Publish\Core\Search\Common\Indexer|\eZ\Publish\Core\Search\Common\IncrementalIndexer */
private $searchIndexer;

/** @var \Doctrine\DBAL\Connection */
private $connection;

/** @var string */
private $phpPath;

Expand All @@ -49,6 +46,20 @@ class ReindexCommand extends ContainerAwareCommand
/** @var string */
private $projectDir;

/** @var \eZ\Publish\SPI\Search\Content\IndexerGateway */
private $gateway;

/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
alongosz marked this conversation as resolved.
Show resolved Hide resolved
private $locationHandler;
alongosz marked this conversation as resolved.
Show resolved Hide resolved

public function __construct(IndexerGateway $gateway, LocationHandler $locationHandler)
{
$this->gateway = $gateway;
$this->locationHandler = $locationHandler;

parent::__construct();
}

/**
* Initialize objects required by {@see execute()}.
*
Expand All @@ -59,7 +70,6 @@ public function initialize(InputInterface $input, OutputInterface $output)
{
parent::initialize($input, $output);
$this->searchIndexer = $this->getContainer()->get('ezpublish.spi.search.indexer');
$this->connection = $this->getContainer()->get('ezpublish.api.storage_engine.legacy.connection');
$this->logger = $this->getContainer()->get('logger');
$this->env = $this->getContainer()->getParameter('kernel.environment');
$this->isDebug = $this->getContainer()->getParameter('kernel.debug');
Expand Down Expand Up @@ -200,16 +210,18 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
}

if ($since = $input->getOption('since')) {
$stmt = $this->getStatementContentSince(new DateTime($since));
$count = (int)$this->getStatementContentSince(new DateTime($since), true)->fetchColumn();
$count = $this->gateway->countContentSince(new DateTime($since));
$generator = $this->gateway->getContentSince(new DateTime($since), $iterationCount);
$purge = false;
} elseif ($locationId = (int) $input->getOption('subtree')) {
$stmt = $this->getStatementSubtree($locationId);
$count = (int) $this->getStatementSubtree($locationId, true)->fetchColumn();
/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
$location = $this->locationHandler->load($locationId);
$count = $this->gateway->countContentInSubtree($location->pathString);
$generator = $this->gateway->getContentInSubtree($location->pathString, $iterationCount);
$purge = false;
} else {
$stmt = $this->getStatementContentAll();
$count = (int) $this->getStatementContentAll(true)->fetchColumn();
$count = $this->gateway->countAllContent();
$generator = $this->gateway->getAllContent($iterationCount);
$purge = !$input->getOption('no-purge');
}

Expand Down Expand Up @@ -242,10 +254,15 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
$progress->start($iterations);

if ($processCount > 1) {
$this->runParallelProcess($progress, $stmt, (int) $processCount, (int) $iterationCount, $commit);
$this->runParallelProcess(
$progress,
$generator,
(int)$processCount,
$commit
);
} else {
// if we only have one process, or less iterations to warrant running several, we index it all inline
foreach ($this->fetchIteration($stmt, $iterationCount) as $contentIds) {
foreach ($generator as $contentIds) {
$this->searchIndexer->updateSearchIndex($contentIds, $commit);
$progress->advance(1);
}
Expand All @@ -266,14 +283,12 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
*/
private function runParallelProcess(
ProgressBar $progress,
Statement $stmt,
Generator $generator,
int $processCount,
int $iterationCount,
bool $commit
): void {
/** @var \Symfony\Component\Process\Process[]|null[] */
$processes = array_fill(0, $processCount, null);
$generator = $this->fetchIteration($stmt, $iterationCount);
do {
/** @var \Symfony\Component\Process\Process $process */
foreach ($processes as $key => $process) {
Expand Down Expand Up @@ -312,90 +327,6 @@ private function runParallelProcess(
} while (!empty($processes));
}

/**
* @param DateTime $since
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*/
private function getStatementContentSince(DateTime $since, $count = false)
{
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(c.id)' : 'c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')->andWhere('c.modified >= :since')
->orderBy('c.modified')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
->setParameter('since', $since->getTimestamp(), PDO::PARAM_INT);

return $q->execute();
}

/**
* @param mixed $locationId
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*
* @throws \eZ\Publish\API\Repository\Exceptions\NotFoundException
*/
private function getStatementSubtree($locationId, $count = false)
{
/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
$locationHandler = $this->getContainer()->get('ezpublish.spi.persistence.location_handler');
$location = $locationHandler->load($locationId);
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(DISTINCT c.id)' : 'DISTINCT c.id')
->from('ezcontentobject', 'c')
->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
->where('c.status = :status')
->andWhere('t.path_string LIKE :path')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
->setParameter('path', $location->pathString . '%', PDO::PARAM_STR);

return $q->execute();
}

/**
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*/
private function getStatementContentAll($count = false)
{
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(c.id)' : 'c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT);

return $q->execute();
}

/**
* @param \Doctrine\DBAL\Driver\Statement $stmt
* @param int $iterationCount
*
* @return \Generator Return an array of arrays, each array contains content id's of $iterationCount.
*/
private function fetchIteration(Statement $stmt, $iterationCount)
{
do {
$contentIds = [];
for ($i = 0; $i < $iterationCount; ++$i) {
if ($contentId = $stmt->fetch(PDO::FETCH_COLUMN)) {
$contentIds[] = $contentId;
} elseif (empty($contentIds)) {
return;
} else {
break;
}
}

yield $contentIds;
} while (!empty($contentId));
}

/**
* @param array $contentIds
*
Expand Down
6 changes: 6 additions & 0 deletions eZ/Bundle/EzPublishCoreBundle/Resources/config/commands.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,9 @@ services:
- "@ezpublish.siteaccess"
tags:
- { name: console.command }

eZ\Bundle\EzPublishCoreBundle\Command\ReindexCommand:
autowire: true
autoconfigure: true
arguments:
$locationHandler: '@ezpublish.spi.persistence.location_handler'
3 changes: 3 additions & 0 deletions eZ/Bundle/EzPublishCoreBundle/Resources/config/papi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ services:
- '@ezpublish.spi.search'
tags:
- { name: kernel.event_subscriber }

eZ\Publish\SPI\Search\Content\IndexerGateway:
alias: eZ\Publish\Core\Search\Legacy\Content\IndexerGateway
2 changes: 1 addition & 1 deletion eZ/Publish/API/Repository/Tests/BaseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ public function createUserWithPolicies($login, array $policiesData, RoleLimitati
*
* @throws \ErrorException
*/
protected function getRawDatabaseConnection()
protected function getRawDatabaseConnection(): Connection
{
$connection = $this
->getSetupFactory()
Expand Down
147 changes: 147 additions & 0 deletions eZ/Publish/Core/Search/Legacy/Content/IndexerGateway.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<?php

/**
* @copyright Copyright (C) eZ Systems AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace eZ\Publish\Core\Search\Legacy\Content;

use DateTimeInterface;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Driver\ResultStatement;
use Doctrine\DBAL\ParameterType;
use Doctrine\DBAL\Query\QueryBuilder;
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
use eZ\Publish\SPI\Search\Content\IndexerGateway as SPIIndexerGateway;
use Generator;

/**
* @internal
*/
final class IndexerGateway implements SPIIndexerGateway
{
/** @var \Doctrine\DBAL\Connection */
private $connection;

public function __construct(Connection $connection)
{
$this->connection = $connection;
}

public function getContentSince(DateTimeInterface $since, int $iterationCount): Generator
{
$query = $this->buildQueryForContentSince($since);
$query->orderBy('c.modified');

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countContentSince(DateTimeInterface $since): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForContentSince($since)
);

return (int)$query->execute()->fetchOne();
}

public function getContentInSubtree(string $locationPath, int $iterationCount): Generator
{
$query = $this->buildQueryForContentInSubtree($locationPath);

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countContentInSubtree(string $locationPath): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForContentInSubtree($locationPath)
);

return (int)$query->execute()->fetchOne();
}

public function getAllContent(int $iterationCount): Generator
{
$query = $this->buildQueryForAllContent();

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countAllContent(): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForAllContent()
);

return (int)$query->execute()->fetchOne();
}

private function buildQueryForContentSince(DateTimeInterface $since): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')->andWhere('c.modified >= :since')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER)
->setParameter('since', $since->getTimestamp(), ParameterType::INTEGER);
}

private function buildQueryForContentInSubtree(string $locationPath): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('DISTINCT c.id')
->from('ezcontentobject', 'c')
->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
->where('c.status = :status')
->andWhere('t.path_string LIKE :path')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER)
->setParameter('path', $locationPath . '%', ParameterType::STRING);
}

private function buildQueryForAllContent(): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER);
}

/**
* @throws \Doctrine\DBAL\Exception
*/
private function buildCountingQuery(QueryBuilder $query): QueryBuilder
{
$databasePlatform = $this->connection->getDatabasePlatform();

// wrap existing select part in count expression
$query->select(
$databasePlatform->getCountExpression(
$query->getQueryPart('select')[0]
)
);

return $query;
}

private function fetchIteration(ResultStatement $statement, int $iterationCount): Generator
{
do {
$contentIds = [];
for ($i = 0; $i < $iterationCount; ++$i) {
if ($contentId = $statement->fetchOne()) {
$contentIds[] = $contentId;
} elseif (empty($contentIds)) {
return;
} else {
break;
}
}

yield $contentIds;
} while (!empty($contentId));
}
}
4 changes: 4 additions & 0 deletions eZ/Publish/Core/settings/search_engines/legacy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,7 @@ services:
tags:
- {name: ezpublish.searchEngineIndexer, alias: legacy}
lazy: true

eZ\Publish\Core\Search\Legacy\Content\IndexerGateway:
arguments:
$connection: '@ezpublish.persistence.connection'
Loading