Skip to content

Commit

Permalink
IBX-143: Refactored reindexing command to be DBAL 2.13-compatible
Browse files Browse the repository at this point in the history
For more details see:
* #3094
* https://issues.ibexa.co/browse/IBX-143

Changes:

* Bumped doctrine/dbal to ^2.13.0

* Extracted Indexer Doctrine Gateway from ReindexCommand

* [Tests] Added integration test coverage for IndexerGateway

Co-Authored-By: Adam Wójs <adamwojs@users.noreply.github.com>
Co-Authored-By: Łukasz Serwatka <lserwatka@users.noreply.github.com>
Co-Authored-By: Tomasz Kryszan <ciastektk@users.noreply.github.com>
  • Loading branch information
4 people committed Apr 9, 2021
1 parent a18ac4c commit 9f1c87e
Show file tree
Hide file tree
Showing 11 changed files with 410 additions and 103 deletions.
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"hautelook/templated-uri-bundle": "^2.1",
"pagerfanta/pagerfanta": "^2.0",
"ocramius/proxy-manager": "^2.1",
"doctrine/dbal": "^2.13.0",
"doctrine/orm": "^2.7",
"doctrine/doctrine-bundle": "~1.6",
"liip/imagine-bundle": "^2.1",
Expand Down
135 changes: 33 additions & 102 deletions eZ/Bundle/EzPublishCoreBundle/Command/ReindexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
namespace eZ\Bundle\EzPublishCoreBundle\Command;

use eZ\Publish\Core\Base\Exceptions\InvalidArgumentException;
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
use eZ\Publish\Core\Search\Common\Indexer;
use eZ\Publish\Core\Search\Common\IncrementalIndexer;
use Doctrine\DBAL\Driver\Statement;
use eZ\Publish\SPI\Persistence\Content\Location\Handler as LocationHandler;
use eZ\Publish\SPI\Search\Content\IndexerGateway;
use Generator;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputInterface;
Expand All @@ -21,16 +22,12 @@
use Symfony\Component\Process\ProcessBuilder;
use RuntimeException;
use DateTime;
use PDO;

class ReindexCommand extends ContainerAwareCommand
{
/** @var \eZ\Publish\Core\Search\Common\Indexer|\eZ\Publish\Core\Search\Common\IncrementalIndexer */
private $searchIndexer;

/** @var \Doctrine\DBAL\Connection */
private $connection;

/** @var string */
private $phpPath;

Expand All @@ -49,6 +46,20 @@ class ReindexCommand extends ContainerAwareCommand
/** @var string */
private $projectDir;

/** @var \eZ\Publish\SPI\Search\Content\IndexerGateway */
private $gateway;

/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
private $locationHandler;

public function __construct(IndexerGateway $gateway, LocationHandler $locationHandler)
{
$this->gateway = $gateway;
$this->locationHandler = $locationHandler;

parent::__construct();
}

/**
* Initialize objects required by {@see execute()}.
*
Expand All @@ -59,7 +70,6 @@ public function initialize(InputInterface $input, OutputInterface $output)
{
parent::initialize($input, $output);
$this->searchIndexer = $this->getContainer()->get('ezpublish.spi.search.indexer');
$this->connection = $this->getContainer()->get('ezpublish.api.storage_engine.legacy.connection');
$this->logger = $this->getContainer()->get('logger');
$this->env = $this->getContainer()->getParameter('kernel.environment');
$this->isDebug = $this->getContainer()->getParameter('kernel.debug');
Expand Down Expand Up @@ -200,16 +210,18 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
}

if ($since = $input->getOption('since')) {
$stmt = $this->getStatementContentSince(new DateTime($since));
$count = (int)$this->getStatementContentSince(new DateTime($since), true)->fetchColumn();
$count = $this->gateway->countContentSince(new DateTime($since));
$generator = $this->gateway->getContentSince(new DateTime($since), $iterationCount);
$purge = false;
} elseif ($locationId = (int) $input->getOption('subtree')) {
$stmt = $this->getStatementSubtree($locationId);
$count = (int) $this->getStatementSubtree($locationId, true)->fetchColumn();
/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
$location = $this->locationHandler->load($locationId);
$count = $this->gateway->countContentInSubtree($location->pathString);
$generator = $this->gateway->getContentInSubtree($location->pathString, $iterationCount);
$purge = false;
} else {
$stmt = $this->getStatementContentAll();
$count = (int) $this->getStatementContentAll(true)->fetchColumn();
$count = $this->gateway->countAllContent();
$generator = $this->gateway->getAllContent($iterationCount);
$purge = !$input->getOption('no-purge');
}

Expand Down Expand Up @@ -242,10 +254,15 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
$progress->start($iterations);

if ($processCount > 1) {
$this->runParallelProcess($progress, $stmt, (int) $processCount, (int) $iterationCount, $commit);
$this->runParallelProcess(
$progress,
$generator,
(int)$processCount,
$commit
);
} else {
// if we only have one process, or less iterations to warrant running several, we index it all inline
foreach ($this->fetchIteration($stmt, $iterationCount) as $contentIds) {
foreach ($generator as $contentIds) {
$this->searchIndexer->updateSearchIndex($contentIds, $commit);
$progress->advance(1);
}
Expand All @@ -266,14 +283,12 @@ protected function indexIncrementally(InputInterface $input, OutputInterface $ou
*/
private function runParallelProcess(
ProgressBar $progress,
Statement $stmt,
Generator $generator,
int $processCount,
int $iterationCount,
bool $commit
): void {
/** @var \Symfony\Component\Process\Process[]|null[] */
$processes = array_fill(0, $processCount, null);
$generator = $this->fetchIteration($stmt, $iterationCount);
do {
/** @var \Symfony\Component\Process\Process $process */
foreach ($processes as $key => $process) {
Expand Down Expand Up @@ -312,90 +327,6 @@ private function runParallelProcess(
} while (!empty($processes));
}

/**
* @param DateTime $since
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*/
private function getStatementContentSince(DateTime $since, $count = false)
{
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(c.id)' : 'c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')->andWhere('c.modified >= :since')
->orderBy('c.modified')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
->setParameter('since', $since->getTimestamp(), PDO::PARAM_INT);

return $q->execute();
}

/**
* @param mixed $locationId
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*
* @throws \eZ\Publish\API\Repository\Exceptions\NotFoundException
*/
private function getStatementSubtree($locationId, $count = false)
{
/** @var \eZ\Publish\SPI\Persistence\Content\Location\Handler */
$locationHandler = $this->getContainer()->get('ezpublish.spi.persistence.location_handler');
$location = $locationHandler->load($locationId);
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(DISTINCT c.id)' : 'DISTINCT c.id')
->from('ezcontentobject', 'c')
->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
->where('c.status = :status')
->andWhere('t.path_string LIKE :path')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
->setParameter('path', $location->pathString . '%', PDO::PARAM_STR);

return $q->execute();
}

/**
* @param bool $count
*
* @return \Doctrine\DBAL\Driver\Statement
*/
private function getStatementContentAll($count = false)
{
$q = $this->connection->createQueryBuilder()
->select($count ? 'count(c.id)' : 'c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT);

return $q->execute();
}

/**
* @param \Doctrine\DBAL\Driver\Statement $stmt
* @param int $iterationCount
*
* @return \Generator Return an array of arrays, each array contains content id's of $iterationCount.
*/
private function fetchIteration(Statement $stmt, $iterationCount)
{
do {
$contentIds = [];
for ($i = 0; $i < $iterationCount; ++$i) {
if ($contentId = $stmt->fetch(PDO::FETCH_COLUMN)) {
$contentIds[] = $contentId;
} elseif (empty($contentIds)) {
return;
} else {
break;
}
}

yield $contentIds;
} while (!empty($contentId));
}

/**
* @param array $contentIds
*
Expand Down
6 changes: 6 additions & 0 deletions eZ/Bundle/EzPublishCoreBundle/Resources/config/commands.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,9 @@ services:
- "@ezpublish.siteaccess"
tags:
- { name: console.command }

eZ\Bundle\EzPublishCoreBundle\Command\ReindexCommand:
autowire: true
autoconfigure: true
arguments:
$locationHandler: '@ezpublish.spi.persistence.location_handler'
3 changes: 3 additions & 0 deletions eZ/Bundle/EzPublishCoreBundle/Resources/config/papi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ services:
- '@ezpublish.spi.search'
tags:
- { name: kernel.event_subscriber }

eZ\Publish\SPI\Search\Content\IndexerGateway:
alias: eZ\Publish\Core\Search\Legacy\Content\IndexerGateway
2 changes: 1 addition & 1 deletion eZ/Publish/API/Repository/Tests/BaseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ public function createUserWithPolicies($login, array $policiesData, RoleLimitati
*
* @throws \ErrorException
*/
protected function getRawDatabaseConnection()
protected function getRawDatabaseConnection(): Connection
{
$connection = $this
->getSetupFactory()
Expand Down
147 changes: 147 additions & 0 deletions eZ/Publish/Core/Search/Legacy/Content/IndexerGateway.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<?php

/**
* @copyright Copyright (C) eZ Systems AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace eZ\Publish\Core\Search\Legacy\Content;

use DateTimeInterface;
use Doctrine\DBAL\Connection;
use Doctrine\DBAL\Driver\ResultStatement;
use Doctrine\DBAL\ParameterType;
use Doctrine\DBAL\Query\QueryBuilder;
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
use eZ\Publish\SPI\Search\Content\IndexerGateway as SPIIndexerGateway;
use Generator;

/**
* @internal
*/
final class IndexerGateway implements SPIIndexerGateway
{
/** @var \Doctrine\DBAL\Connection */
private $connection;

public function __construct(Connection $connection)
{
$this->connection = $connection;
}

public function getContentSince(DateTimeInterface $since, int $iterationCount): Generator
{
$query = $this->buildQueryForContentSince($since);
$query->orderBy('c.modified');

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countContentSince(DateTimeInterface $since): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForContentSince($since)
);

return (int)$query->execute()->fetchOne();
}

public function getContentInSubtree(string $locationPath, int $iterationCount): Generator
{
$query = $this->buildQueryForContentInSubtree($locationPath);

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countContentInSubtree(string $locationPath): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForContentInSubtree($locationPath)
);

return (int)$query->execute()->fetchOne();
}

public function getAllContent(int $iterationCount): Generator
{
$query = $this->buildQueryForAllContent();

yield from $this->fetchIteration($query->execute(), $iterationCount);
}

public function countAllContent(): int
{
$query = $this->buildCountingQuery(
$this->buildQueryForAllContent()
);

return (int)$query->execute()->fetchOne();
}

private function buildQueryForContentSince(DateTimeInterface $since): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')->andWhere('c.modified >= :since')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER)
->setParameter('since', $since->getTimestamp(), ParameterType::INTEGER);
}

private function buildQueryForContentInSubtree(string $locationPath): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('DISTINCT c.id')
->from('ezcontentobject', 'c')
->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
->where('c.status = :status')
->andWhere('t.path_string LIKE :path')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER)
->setParameter('path', $locationPath . '%', ParameterType::STRING);
}

private function buildQueryForAllContent(): QueryBuilder
{
return $this->connection->createQueryBuilder()
->select('c.id')
->from('ezcontentobject', 'c')
->where('c.status = :status')
->setParameter('status', ContentInfo::STATUS_PUBLISHED, ParameterType::INTEGER);
}

/**
* @throws \Doctrine\DBAL\Exception
*/
private function buildCountingQuery(QueryBuilder $query): QueryBuilder
{
$databasePlatform = $this->connection->getDatabasePlatform();

// wrap existing select part in count expression
$query->select(
$databasePlatform->getCountExpression(
$query->getQueryPart('select')[0]
)
);

return $query;
}

private function fetchIteration(ResultStatement $statement, int $iterationCount): Generator
{
do {
$contentIds = [];
for ($i = 0; $i < $iterationCount; ++$i) {
if ($contentId = $statement->fetchOne()) {
$contentIds[] = $contentId;
} elseif (empty($contentIds)) {
return;
} else {
break;
}
}

yield $contentIds;
} while (!empty($contentId));
}
}
4 changes: 4 additions & 0 deletions eZ/Publish/Core/settings/search_engines/legacy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,7 @@ services:
tags:
- {name: ezpublish.searchEngineIndexer, alias: legacy}
lazy: true

eZ\Publish\Core\Search\Legacy\Content\IndexerGateway:
arguments:
$connection: '@ezpublish.persistence.connection'
Loading

0 comments on commit 9f1c87e

Please sign in to comment.