diff --git a/bundle/Command/ConvertXmlTextToRichTextCommand.php b/bundle/Command/ConvertXmlTextToRichTextCommand.php
index ffea3c33..efffff61 100644
--- a/bundle/Command/ConvertXmlTextToRichTextCommand.php
+++ b/bundle/Command/ConvertXmlTextToRichTextCommand.php
@@ -64,7 +64,13 @@ protected function configure()
'disable-duplicate-id-check',
null,
InputOption::VALUE_NONE,
- 'Disable the check for duplicate html ids in every attribute. This might increase execution time on large databases'
+ 'Disable the check for duplicate html ids in every attribute. This might decrease execution time on large databases'
+ )
+ ->addOption(
+ 'disable-id-value-check',
+ null,
+ InputOption::VALUE_NONE,
+ 'Disable the check for non-validating id/name values. This might decrease execution time on large databases'
)
->addOption(
'test-content-object',
@@ -123,7 +129,7 @@ protected function execute(InputInterface $input, OutputInterface $output)
$dryRun = true;
}
- $this->convertFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), $output);
+ $this->convertFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), !$input->getOption('disable-id-value-check'), $output);
}
protected function getContentTypeIds($contentTypeIdentifiers)
@@ -330,7 +336,7 @@ protected function updateFieldRow($dryRun, $id, $version, $datatext)
}
}
- protected function convertFields($dryRun, $contentId, $checkDuplicateIds, OutputInterface $output)
+ protected function convertFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, OutputInterface $output)
{
$count = $this->getRowCountOfContentObjectAttributes('ezxmltext', $contentId);
@@ -345,7 +351,7 @@ protected function convertFields($dryRun, $contentId, $checkDuplicateIds, Output
$inputValue = $row['data_text'];
}
- $converted = $this->converter->convert($this->createDocument($inputValue), $checkDuplicateIds, $row['id']);
+ $converted = $this->converter->convert($this->createDocument($inputValue), $checkDuplicateIds, $checkIdValues, $row['id']);
$this->updateFieldRow($dryRun, $row['id'], $row['version'], $converted);
diff --git a/lib/FieldType/XmlText/Converter/RichText.php b/lib/FieldType/XmlText/Converter/RichText.php
index a6e7c7db..48001378 100644
--- a/lib/FieldType/XmlText/Converter/RichText.php
+++ b/lib/FieldType/XmlText/Converter/RichText.php
@@ -157,9 +157,44 @@ protected function reportNonUniqueIds(DOMDocument $document, $contentFieldId)
$id = $node->attributes->getNamedItem('id')->nodeValue;
// id has format "duplicated_id_foo_bar_idm45226413447104" where "foo_bar" is the duplicated id
$duplicatedId = substr($id, strlen('duplicated_id_'), strrpos($id, '_') - strlen('duplicated_id_'));
- if ($this->logger !== null) {
- $this->logger->warning("Duplicated id in original ezxmltext for contentobject_attribute.id=$contentFieldId, automatically generated new id : $duplicatedId --> $id");
- }
+ $this->logger->warning("Duplicated id in original ezxmltext for contentobject_attribute.id=$contentFieldId, automatically generated new id : $duplicatedId --> $id");
+ }
+ }
+
+ protected function validateAttributeValues(DOMDocument $document, $contentFieldId)
+ {
+ $xpath = new DOMXPath($document);
+ $whitelist1st = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_';
+ $replaceStr1st = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+
+ $whitelist = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-';
+ $replaceStr = '';
+ /*
+ * We want to pick elements which has id value
+ * #1 not starting with a..z or '_'
+ * #2 not a..z, '0..9', '_' or '-' after 1st character
+ * So, no xpath v2 to our disposal...
+ * 1st line : we check the 1st char(substring) in id, converts it to 'a' if it in whitelist(translate), then check if it string now starts with 'a'(starts-with), then we invert result(not)
+ * : So we replace first char with 'a' if it is whitelisted, then we select the element if id value does not start with 'a'
+ * 2nd line: now we check remaining(omit 1st char) part of string (substring), removes any character that *is* whitelisted(translate), then check if there are any non-whitelisted characters left(string-lenght)
+ * 3rd line: Due to the not() in 1st line, we pick all elements not matching that 1st line. That also includes elements not having a xml:id at all..
+ * : So, we want to make sure we only pick elements which has a xml:id attribute.
+ */
+ $nodes = $xpath->query("//*[
+ (
+ not(starts-with(translate(substring(@xml:id, 1, 1), '$whitelist1st', '$replaceStr1st'), 'a'))
+ or string-length(translate(substring(@xml:id, 2), '$whitelist', '$replaceStr')) > 0
+ ) and string-length(@xml:id) > 0]");
+
+ if ($contentFieldId === null) {
+ $contentFieldId = '[unknown]';
+ }
+ foreach ($nodes as $node) {
+ $orgValue = $node->attributes->getNamedItem('id')->nodeValue;
+ $newValue = 'rewrite_' . $node->attributes->getNamedItem('id')->nodeValue;
+ $newValue = preg_replace("/[^$whitelist]/", '_', $newValue);
+ $node->attributes->getNamedItem('id')->nodeValue = $newValue;
+ $this->logger->warning("Replaced non-validating id value in richtext for contentobject_attribute.id=$contentFieldId, changed from : $orgValue --> $newValue");
}
}
@@ -317,10 +352,11 @@ protected function checkEmptyEmbedTags(DOMDocument $inputDocument)
*
* @param DOMDocument $inputDocument
* @param bool $checkDuplicateIds
+ * @param bool $checkIdValues
* @param null|int $contentFieldId
* @return string
*/
- public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $contentFieldId = null)
+ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $checkIdValues = false, $contentFieldId = null)
{
$this->removeComments($inputDocument);
@@ -329,6 +365,9 @@ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false,
if ($checkDuplicateIds) {
$this->reportNonUniqueIds($convertedDocument, $contentFieldId);
}
+ if ($checkIdValues) {
+ $this->validateAttributeValues($convertedDocument, $contentFieldId);
+ }
// Needed by some disabled output escaping (eg. legacy ezxml paragraph elements)
$convertedDocumentNormalized = new DOMDocument();
@@ -339,7 +378,7 @@ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false,
$result = $convertedDocumentNormalized->saveXML();
- if (!empty($errors) && $this->logger !== null) {
+ if (!empty($errors)) {
$this->logger->error(
"Validation errors when converting ezxmltext for contentobject_attribute.id=$contentFieldId",
['result' => $result, 'errors' => $errors, 'xmlString' => $inputDocument->saveXML()]
diff --git a/tests/lib/FieldType/Converter/RichTextTest.php b/tests/lib/FieldType/Converter/RichTextTest.php
index 56cc6b88..4644414a 100644
--- a/tests/lib/FieldType/Converter/RichTextTest.php
+++ b/tests/lib/FieldType/Converter/RichTextTest.php
@@ -137,7 +137,7 @@ public function testConvert($inputFilePath, $outputFilePath)
$richText = new RichText($apiRepositoryStub);
$richText->setImageContentTypes([27]);
- $result = $richText->convert($inputDocument, true);
+ $result = $richText->convert($inputDocument, true, true);
$convertedDocument = $this->createDocument($result, false);
$expectedDocument = $this->createDocument($outputFilePath);
diff --git a/tests/lib/FieldType/Converter/_fixtures/richtext/input/041-anchor_invalid_id.xml b/tests/lib/FieldType/Converter/_fixtures/richtext/input/041-anchor_invalid_id.xml
new file mode 100644
index 00000000..b2743b38
--- /dev/null
+++ b/tests/lib/FieldType/Converter/_fixtures/richtext/input/041-anchor_invalid_id.xml
@@ -0,0 +1,33 @@
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+
diff --git a/tests/lib/FieldType/Converter/_fixtures/richtext/output/041-anchor_invalid_id.xml b/tests/lib/FieldType/Converter/_fixtures/richtext/output/041-anchor_invalid_id.xml
new file mode 100644
index 00000000..889158bc
--- /dev/null
+++ b/tests/lib/FieldType/Converter/_fixtures/richtext/output/041-anchor_invalid_id.xml
@@ -0,0 +1,34 @@
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+ Here is an anchor
+
+
+