Skip to content

Commit

Permalink
Smart Search: Use UTF8-aware functions when indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
Hackwar committed Aug 28, 2023
1 parent beec751 commit aec9621
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 14 deletions.
4 changes: 2 additions & 2 deletions administrator/components/com_finder/src/Indexer/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -836,12 +836,12 @@ protected function tokenizeToDb($input, $context, $lang, $format, $count = 0)
*/
if (!feof($input)) {
// Find the last space character.
$ls = strrpos($buffer, ' ');
$ls = StringHelper::strrpos($buffer, ' ');

// Adjust string based on the last space character.
if ($ls) {
// Truncate the string to the last space character.
$string = substr($buffer, 0, $ls);
$string = StringHelper::substr($buffer, 0, $ls);

// Adjust the buffer based on the last space for the next iteration and trim.
$buffer = StringHelper::trim(substr($buffer, $ls));
Expand Down
11 changes: 6 additions & 5 deletions administrator/components/com_finder/src/Indexer/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

use Joomla\CMS\Filter\InputFilter;
use Joomla\CMS\Language\Text;
use Joomla\String\StringHelper;

// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
Expand Down Expand Up @@ -80,26 +81,26 @@ public static function getInstance($format)
public function parse($input)
{
// If the input is less than 2KB we can parse it in one go.
if (strlen($input) <= 2048) {
if (StringHelper::strlen($input) <= 2048) {
return $this->process($input);
}

// Input is longer than 2Kb so parse it in chunks of 2Kb or less.
$start = 0;
$end = strlen($input);
$end = StringHelper::strlen($input);
$chunk = 2048;
$return = null;

while ($start < $end) {
// Setup the string.
$string = substr($input, $start, $chunk);
$string = StringHelper::substr($input, $start, $chunk);

// Find the last space character if we aren't at the end.
$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);
$ls = (($start + $chunk) < $end ? StringHelper::strrpos($string, ' ') : false);

// Truncate to the last space character.
if ($ls !== false) {
$string = substr($string, 0, $ls);
$string = StringHelper::substr($string, 0, $ls);
}

// Adjust the start position for the next iteration.
Expand Down
15 changes: 8 additions & 7 deletions administrator/components/com_finder/src/Indexer/Parser/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespace Joomla\Component\Finder\Administrator\Indexer\Parser;

use Joomla\Component\Finder\Administrator\Indexer\Parser;
use Joomla\String\StringHelper;

// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
Expand Down Expand Up @@ -117,11 +118,11 @@ private function removeBlocks($input, $startTag, $endTag)
{
$return = '';
$offset = 0;
$startTagLength = strlen($startTag);
$endTagLength = strlen($endTag);
$startTagLength = StringHelper::strlen($startTag);
$endTagLength = StringHelper::strlen($endTag);

// Find the first start tag.
$start = stripos($input, $startTag);
$start = StringHelper::stripos($input, $startTag);

// If no start tags were found, return the string unchanged.
if ($start === false) {
Expand All @@ -131,10 +132,10 @@ private function removeBlocks($input, $startTag, $endTag)
// Look for all blocks defined by the start and end tags.
while ($start !== false) {
// Accumulate the substring up to the start tag.
$return .= substr($input, $offset, $start - $offset) . ' ';
$return .= StringHelper::substr($input, $offset, $start - $offset) . ' ';

// Look for an end tag corresponding to the start tag.
$end = stripos($input, $endTag, $start + $startTagLength);
$end = StringHelper::stripos($input, $endTag, $start + $startTagLength);

// If no corresponding end tag, leave the string alone.
if ($end === false) {
Expand All @@ -147,11 +148,11 @@ private function removeBlocks($input, $startTag, $endTag)
$offset = $end + $endTagLength;

// Look for the next start tag and loop.
$start = stripos($input, $startTag, $offset);
$start = StringHelper::stripos($input, $startTag, $offset);
}

// Add in the final substring after the last end tag.
$return .= substr($input, $offset);
$return .= StringHelper::substr($input, $offset);

return $return;
}
Expand Down

0 comments on commit aec9621

Please sign in to comment.