Skip to content
Permalink
Browse files Browse the repository at this point in the history
Merge pull request from GHSA-fqx8-v33p-4qcc
Advisory fix 1
  • Loading branch information
darylldoyle committed Feb 13, 2022
2 parents 307b420 + 689e869 commit 17e12ba
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 72 deletions.
123 changes: 98 additions & 25 deletions src/Sanitizer.php
Expand Up @@ -214,13 +214,10 @@ public function sanitize($dirty)
$this->elementReferenceResolver->collect();
$elementsToRemove = $this->elementReferenceResolver->getElementsToRemove();

// Grab all the elements
$allElements = $this->xmlDocument->getElementsByTagName("*");

// remove doctype after node elements have been analyzed
$this->removeDoctype();
// Start the cleaning proccess
$this->startClean($allElements, $elementsToRemove);
$this->startClean($this->xmlDocument->childNodes, $elementsToRemove);

// Save cleaned XML to a variable
if ($this->removeXMLTag) {
Expand Down Expand Up @@ -316,33 +313,63 @@ protected function startClean(\DOMNodeList $elements, array $elementsToRemove)
continue;
}

// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}

$this->cleanHrefs($currentElement);

$this->cleanXlinkHrefs($currentElement);

$this->cleanAttributesOnWhitelist($currentElement);

if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
if ($currentElement instanceof \DOMElement) {
// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}

$this->cleanHrefs( $currentElement );

$this->cleanXlinkHrefs( $currentElement );

$this->cleanAttributesOnWhitelist($currentElement);

if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
}

// Strip out font elements that will break out of foreign content.
if (strtolower($currentElement->tagName) === 'font') {
$breaksOutOfForeignContent = false;
for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) {
// get attribute name
$attrName = $currentElement->attributes->item( $x )->name;

if (in_array($attrName, ['face', 'color', 'size'])) {
$breaksOutOfForeignContent = true;
}
}

if ($breaksOutOfForeignContent) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
}
}

$this->cleanUnsafeNodes($currentElement);

if ($currentElement->hasChildNodes()) {
$this->startClean($currentElement->childNodes, $elementsToRemove);
}
}
}
Expand Down Expand Up @@ -627,4 +654,50 @@ public function setUseNestingLimit($limit)
{
$this->useNestingLimit = (int) $limit;
}

/**
* Determines whether a node is safe or not.
*
* @param \DOMNode $node
* @return bool
*/
protected function isNodeSafe(\DOMNode $node) {
$safeNodes = [
'#text',
];

if (!in_array($node->nodeName, $safeNodes, true)) {
return false;
}

return true;
}

/**
* Remove nodes that are either invalid or malformed.
*
* @param \DOMNode $currentElement The current element.
*/
protected function cleanUnsafeNodes(\DOMNode $currentElement) {
// If the element doesn't have a tagname, remove it and continue with next iteration
if (!property_exists($currentElement, 'tagName')) {
if (!$this->isNodeSafe($currentElement)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'',
'line' => $currentElement->getLineNo(),
);

return;
}
}

if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) {
for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) {
/** @var \DOMElement $childElement */
$childElement = $currentElement->childNodes->item($j);
$this->cleanUnsafeNodes($childElement);
}
}
}
}
46 changes: 0 additions & 46 deletions src/data/AllowedTags.php
Expand Up @@ -29,56 +29,35 @@ public static function getTags()
'article',
'aside',
'audio',
'b',
'bdi',
'bdo',
'big',
'blink',
'blockquote',
'body',
'br',
'button',
'canvas',
'caption',
'center',
'cite',
'code',
'col',
'colgroup',
'content',
'data',
'datalist',
'dd',
'decorator',
'del',
'details',
'dfn',
'dir',
'div',
'dl',
'dt',
'element',
'em',
'fieldset',
'figcaption',
'figure',
'font',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'header',
'hgroup',
'hr',
'html',
'i',
'image',
'img',
'input',
'ins',
'kbd',
Expand All @@ -89,52 +68,27 @@ public static function getTags()
'map',
'mark',
'marquee',
'menu',
'menuitem',
'meter',
'nav',
'nobr',
'ol',
'optgroup',
'option',
'output',
'p',
'pre',
'progress',
'q',
'rp',
'rt',
'ruby',
's',
'samp',
'section',
'select',
'shadow',
'small',
'source',
'spacer',
'span',
'strike',
'strong',
'style',
'sub',
'summary',
'sup',
'table',
'tbody',
'td',
'template',
'textarea',
'tfoot',
'th',
'thead',
'time',
'tr',
'track',
'tt',
'u',
'ul',
'var',
'video',
'wbr',

Expand Down
13 changes: 13 additions & 0 deletions tests/SanitizerTest.php
Expand Up @@ -294,4 +294,17 @@ public function testLargeUseDOSattacksAreNullified()

self::assertXmlStringEqualsXmlString($expected, $cleanData);
}

public function testInvalidNodesAreHandled()
{
$dataDirectory = __DIR__ . '/data';
$initialData = file_get_contents($dataDirectory . '/htmlTest.svg');
$expected = file_get_contents($dataDirectory . '/htmlClean.svg');

$sanitizer = new Sanitizer();
$sanitizer->minify(false);
$cleanData = $sanitizer->sanitize($initialData);

self::assertXmlStringEqualsXmlString($expected, $cleanData);
}
}
2 changes: 1 addition & 1 deletion tests/data/entityClean.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions tests/data/htmlClean.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions tests/data/htmlTest.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 17e12ba

Please sign in to comment.