Skip to content

Commit

Permalink
Use a whitelist to strip out potential issues rather than a regex.
Browse files Browse the repository at this point in the history
The method will leave empty <use/> elements. That’s no real issue though as far as I can see.
  • Loading branch information
darylldoyle committed Nov 6, 2019
1 parent a85dda1 commit 6421560
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 34 deletions.
84 changes: 50 additions & 34 deletions src/Sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
class Sanitizer
{

/**
* Regex to catch script and data values in attributes
*/
const SCRIPT_REGEX = '/(?:\w+script|data)(?:\s)?:/xi';

/**
* @var \DOMDocument
*/
Expand Down Expand Up @@ -372,23 +367,12 @@ protected function cleanAttributesOnWhitelist(\DOMElement $element)
protected function cleanXlinkHrefs(\DOMElement $element)
{
$xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
$xlinks = $this->preFilterHrefValues($xlinks);
if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) {
if (!in_array(substr($xlinks, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);


}
if (false === $this->isHrefSafeValue($xlinks)) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}

Expand All @@ -400,8 +384,7 @@ protected function cleanXlinkHrefs(\DOMElement $element)
protected function cleanHrefs(\DOMElement $element)
{
$href = $element->getAttribute('href');
$href = $this->preFilterHrefValues($href);
if (preg_match(self::SCRIPT_REGEX, $href) === 1) {
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute('href');
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
Expand All @@ -410,16 +393,49 @@ protected function cleanHrefs(\DOMElement $element)
}
}

/**
* Only allow basic chars to run through the Script Regex.
*
* This should stop people from hiding scripts with hidden charachters etc.
*
* @param $value
* @return string|string[]|null
*/
protected function preFilterHrefValues($value) {
return preg_replace( '/[^a-zA-Z0-9:\-\(\)\']/', '', $value );
/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
protected function isHrefSafeValue($value) {

// Allow fragment identifiers.
if ('#' === substr($value, 0, 1)) {
return true;
}

// Allow relative URIs.
if ('/' === substr($value, 0, 1)) {
return true;
}

// Allow HTTPS domains.
if ('https://' === substr($value, 0, 8)) {
return true;
}

// Allow HTTP domains.
if ('http://' === substr($value, 0, 7)) {
return true;
}

// Allow known data URIs.
if (in_array(substr($value, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
return true;
}

return false;
}

/**
Expand Down
1 change: 1 addition & 0 deletions tests/data/useClean.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 6421560

Please sign in to comment.