Skip to content

Commit

Permalink
Work on indexer (#12253)
Browse files Browse the repository at this point in the history
* Merge some unset()

* Lots of work on the indexer itself:

- Type safe comparisons
- a few simplifications
- a few performance optimizations

* Some Elvis's and a missing @SInCE annotation

* One more StringHelper...

* Codestyle...

* Not to potentially break BC, reversed back to `!empty($var)` operation but with the non inverted style `(bool) $var`

* Formatting

* Fix formatting

* Revert this change for now. Look into it later again.

* Codesniffer...

* Removed repeated `use`

* Some extra work according to @andrepereiradasilva's notes

* Additional changes according to reviewer's comments

* Code style

* Fix misbehavior due to not correctly handling the single object case of mixed variable (array|object)

* Pass float value to db->escape as float may produce problems for some locales.

* "Make values float again" - (pun intended)
  • Loading branch information
frankmayer authored and ReLater committed Sep 1, 2018
1 parent d0798c6 commit 8ac2ff3
Show file tree
Hide file tree
Showing 16 changed files with 178 additions and 158 deletions.
11 changes: 4 additions & 7 deletions administrator/components/com_finder/helpers/indexer/adapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public function __construct(&$subject, $config)
/**
* Method to get the adapter state and push it into the indexer.
*
* @return boolean True on success.
* @return void
*
* @since 2.5
* @throws Exception on error.
Expand Down Expand Up @@ -312,7 +312,7 @@ abstract protected function index(FinderIndexerResult $item);
*
* @param integer $id The ID of the item to reindex.
*
* @return boolean True on success.
* @return void
*
* @since 2.5
* @throws Exception on database error.
Expand Down Expand Up @@ -902,12 +902,9 @@ protected function pluginDisable($pks)
protected function translateState($item, $category = null)
{
// If category is present, factor in its states as well
if ($category !== null)
if ($category !== null && $category == 0)
{
if ($category == 0)
{
$item = 0;
}
$item = 0;
}

// Translate the state
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,7 @@ public function index($item, $format = 'html')
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
$ip = str_replace(array('/', '-'), ' ', $ip);
}

// Tokenize a string of content and add it to the database.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public function index($item, $format = 'html')
$isNew = empty($link->link_id) ? true : false;

// Check the signatures. If they match, the item is up to date.
if (!$isNew && $curSig == $oldSig)
if (!$isNew && $curSig === $oldSig)
{
return $linkId;
}
Expand Down Expand Up @@ -208,8 +208,7 @@ public function index($item, $format = 'html')
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
$ip = str_replace(array('/', '-'), ' ', $ip);
}

// Tokenize a string of content and add it to the database.
Expand Down Expand Up @@ -328,7 +327,7 @@ public function index($item, $format = 'html')
' WHERE ta.term_id = 0'
);

if ($db->loadRow() == null)
if ($db->loadRow() === null)
{
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_terms') .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public function index($item, $format = 'html')
$isNew = empty($link->link_id) ? true : false;

// Check the signatures. If they match, the item is up to date.
if (!$isNew && $curSig == $oldSig)
if (!$isNew && $curSig === $oldSig)
{
return $linkId;
}
Expand Down Expand Up @@ -216,8 +216,7 @@ public function index($item, $format = 'html')
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
$ip = str_replace(array('/', '-'), ' ', $ip);
}

// Tokenize a string of content and add it to the database.
Expand Down
29 changes: 20 additions & 9 deletions administrator/components/com_finder/helpers/indexer/helper.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static function parse($input, $format = 'html')
* @param string $lang The language of the input.
* @param boolean $phrase Flag to indicate whether input could be a phrase. [optional]
*
* @return array An array of FinderIndexerToken objects.
* @return array|FinderIndexerToken An array of FinderIndexerToken objects or a single FinderIndexerToken object.
*
* @since 2.5
*/
Expand Down Expand Up @@ -124,14 +124,14 @@ public static function tokenize($input, $lang, $phrase = false)
for ($i = 0, $n = count($terms); $i < $n; $i++)
{
$charMatches = array();
$charCount = preg_match_all('#[\p{Han}]#mui', $terms[$i], $charMatches);
$charCount = preg_match_all('#[\p{Han}]#mui', $terms[$i], $charMatches);

// Split apart any groups of Chinese characters.
for ($j = 0; $j < $charCount; $j++)
{
$tSplit = StringHelper::str_ireplace($charMatches[0][$j], '', $terms[$i], false);

if (!empty($tSplit))
if ((bool) $tSplit)
{
$terms[$i] = $tSplit;
}
Expand Down Expand Up @@ -177,7 +177,12 @@ public static function tokenize($input, $lang, $phrase = false)
if ($i2 < $n && isset($tokens[$i2]))
{
// Tokenize the two word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $lang, $lang === 'zh' ? '' : ' ');
$token = new FinderIndexerToken(
array(
$tokens[$i]->term,
$tokens[$i2]->term
), $lang, $lang === 'zh' ? '' : ' '
);
$token->derived = true;

// Add the token to the stack.
Expand All @@ -188,7 +193,13 @@ public static function tokenize($input, $lang, $phrase = false)
if ($i3 < $n && isset($tokens[$i3]))
{
// Tokenize the three word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $lang, $lang === 'zh' ? '' : ' ');
$token = new FinderIndexerToken(
array(
$tokens[$i]->term,
$tokens[$i2]->term,
$tokens[$i3]->term
), $lang, $lang === 'zh' ? '' : ' '
);
$token->derived = true;

// Add the token to the stack.
Expand Down Expand Up @@ -265,7 +276,7 @@ public static function addContentType($title, $mime = null)
{
static $types;

$db = JFactory::getDbo();
$db = JFactory::getDbo();
$query = $db->getQuery(true);

// Check if the types are loaded.
Expand Down Expand Up @@ -432,7 +443,7 @@ public static function getContentPath($url)
}

// Build the relative route.
$uri = $router->build($url);
$uri = $router->build($url);
$route = $uri->toString(array('path', 'query', 'fragment'));
$route = str_replace(JUri::base(true) . '/', '', $route);

Expand All @@ -450,7 +461,7 @@ public static function getContentPath($url)
* @since 2.5
* @throws Exception on database error.
*/
public static function getContentExtras(FinderIndexerResult &$item)
public static function getContentExtras(FinderIndexerResult $item)
{
// Get the event dispatcher.
$dispatcher = JEventDispatcher::getInstance();
Expand Down Expand Up @@ -504,7 +515,7 @@ public static function prepareContent($text, $params = null, FinderIndexerResult
}

// Create a mock content object.
$content = JTable::getInstance('Content');
$content = JTable::getInstance('Content');
$content->text = $text;

if ($item)
Expand Down
35 changes: 19 additions & 16 deletions administrator/components/com_finder/helpers/indexer/indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public static function getInstance()
public static function getState()
{
// First, try to load from the internal state.
if (!empty(static::$state))
if ((bool) static::$state)
{
return static::$state;
}
Expand Down Expand Up @@ -443,8 +443,7 @@ protected function tokenizeToDb($input, $context, $lang, $format)
// Parse, tokenise and add tokens to the database.
$count = $this->tokenizeToDbShort($string, $context, $lang, $format, $count);

unset($string);
unset($tokens);
unset($string, $tokens);
}

return $count;
Expand Down Expand Up @@ -513,36 +512,40 @@ protected function addTokensToDb($tokens, $context = '')

$query = clone $this->addTokensToDbQueryTemplate;

// Check if a single FinderIndexerToken object was given and make it to be an array of FinderIndexerToken objects
$tokens = is_array($tokens) ? $tokens : array($tokens);

// Count the number of token values.
$values = 0;

// Break into chunks of no more than 1000 items
$chunks = array_chunk($tokens, 1000);

foreach ($chunks as $tokens)
// Iterate through the tokens to create SQL value sets.
if (!is_a($tokens, 'FinderIndexerToken'))
{
$query->clear('values');

// Iterate through the tokens to create SQL value sets.
foreach ($tokens as $token)
{
$query->values(
$db->quote($token->term) . ', '
. $db->quote($token->stem) . ', '
. (int) $token->common . ', '
. (int) $token->phrase . ', '
. (float) $token->weight . ', '
. $db->escape((float) $token->weight) . ', '
. (int) $context . ', '
. $db->quote($token->language)
);
++$values;
}

$db->setQuery($query)->execute();
}
else
{
$query->values(
$db->quote($tokens->term) . ', '
. $db->quote($tokens->stem) . ', '
. (int) $tokens->common . ', '
. (int) $tokens->phrase . ', '
. $db->escape((float) $tokens->weight) . ', '
. (int) $context . ', '
. $db->quote($tokens->language)
);
++$values;
}
$db->setQuery($query)->execute();

return $values;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ protected function process($input)
* @param string $endTag String representing the end tag.
*
* @return string with blocks removed.
*
* @since 3.4
*/
private function removeBlocks($input, $startTag, $endTag)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ class FinderIndexerParserRtf extends FinderIndexerParser
protected function process($input)
{
// Remove embedded pictures.
$input = preg_replace('#{\\\pict[^}]*}#mis', '', $input);
$input = preg_replace('#{\\\pict[^}]*}#mi', '', $input);

// Remove control characters.
$input = str_replace(array('{', '}', "\\\n"), array(' ', ' ', "\n"), $input);
$input = preg_replace('#\\\([^;]+?);#mis', ' ', $input);
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mis', ' ', $input);
$input = preg_replace('#\\\([^;]+?);#m', ' ', $input);
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mi', ' ', $input);

return $input;
}
Expand Down

0 comments on commit 8ac2ff3

Please sign in to comment.