Skip to content

Commit

Permalink
[TASK] Add special wrap character option for term regex
Browse files Browse the repository at this point in the history
  • Loading branch information
featdd committed Feb 3, 2021
1 parent 63ce083 commit df1fdec
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 3 deletions.
19 changes: 16 additions & 3 deletions Classes/Service/ParserService.php
Expand Up @@ -41,13 +41,18 @@ class ParserService implements SingletonInterface
public const REGEX_DELIMITER = '/';

/**
* tags to be always ignored by parsing
* @var string[]
*/
public static $alwaysIgnoreParentTags = [
'a',
'script',
];

/**
* @var string
*/
public static $additionalRegexWrapCharacters = '';

/**
* @var ContentObjectRenderer
*/
Expand Down Expand Up @@ -108,6 +113,14 @@ public function __construct()
$this->typoScriptConfiguration['persistence.']['storagePid'])
);

$parsingSpecialWrapCharacters = GeneralUtility::trimExplode(',', $this->settings['parsingSpecialWrapCharacters'] ?? '', true);

if (0 < count($parsingSpecialWrapCharacters)) {
foreach ($parsingSpecialWrapCharacters as $parsingSpecialWrapCharacter) {
self::$additionalRegexWrapCharacters .= '|' . preg_quote($parsingSpecialWrapCharacter);
}
}

try {
/** @var \TYPO3\CMS\Core\Context\Context $context */
$context = ObjectUtility::makeInstance(Context::class);
Expand Down Expand Up @@ -457,9 +470,9 @@ protected function regexParser(string $text, Term $term, int &$replacements, Clo
* i = ignores camel case
*/
$regex = self::REGEX_DELIMITER .
'(^|\G|[\s\>[:punct:]]|\<br*\>)' .
'(^|\G|[\s\>[:punct:]]|\<br*\>' . self::$additionalRegexWrapCharacters . ')' .
'(' . preg_quote($term->getName(), self::REGEX_DELIMITER) . ')' .
'($|[\s\<[:punct:]]|\<br*\>)' .
'($|[\s\<[:punct:]]|\<br*\>' . self::$additionalRegexWrapCharacters . ')' .
'(?![^<]*>|[^<>]*<\/)' .
self::REGEX_DELIMITER .
(false === $term->isCaseSensitive() ? 'i' : '');
Expand Down
2 changes: 2 additions & 0 deletions Configuration/TypoScript/constants.txt
Expand Up @@ -21,6 +21,8 @@ plugin.tx_dpnglossary {
parsingPids = 0
# cat=dpn_glossary/settings/a; type=string; label=Pages (comma list) which should not be parsed for glossary terms
parsingExcludePidList =
# cat=dpn_glossary/settings/a; type=string; label=Additional special characters to respect in parser regular expression
parsingSpecialWrapCharacters =
# cat=dpn_glossary/settings/a; type=integer; label=Maximum replacements for each term (default: -1 = any)
maxReplacementPerPage = -1
# cat=dpn_glossary/settings/a; type=boolean; label=Respect replacement counter when parsing synonyms (default: off)
Expand Down
2 changes: 2 additions & 0 deletions Configuration/TypoScript/setup.txt
Expand Up @@ -27,6 +27,8 @@ plugin.tx_dpnglossary {
parsingPids = {$plugin.tx_dpnglossary.settings.parsingPids}
# pages where terms should NOT be searched
parsingExcludePidList = {$plugin.tx_dpnglossary.settings.parsingExcludePidList}
# special characters for the regex
parsingSpecialWrapCharacters = {$plugin.tx_dpnglossary.settings.parsingSpecialWrapCharacters}
# max replacemnet per page, default = -1 (any matches)
maxReplacementPerPage = {$plugin.tx_dpnglossary.settings.maxReplacementPerPage}
# respect max replacement when parsing synonyms
Expand Down
25 changes: 25 additions & 0 deletions Documentation/Configuration/ExtensionSettings/Index.rst
Expand Up @@ -221,4 +221,29 @@ plugin.tx_dpnglossary.settings:
Default
1

.. container:: table-row

Constant
settings.priorisedSynonymParsing

Data Type
boolean

Description
Parse for synonyms before the actual term

Default
1

.. container:: table-row

Constant
settings.parsingSpecialWrapCharacters

Data Type
string

Description
Comma list of special characters allowed to wrap the term

.. ###### END~OF~TABLE ######

0 comments on commit df1fdec

Please sign in to comment.