Skip to content

Commit

Permalink
Merge pull request #34 from darylldoyle/fix/31-xss
Browse files Browse the repository at this point in the history
Use whitelist to fix any issues with scripts/data embeds in href value
  • Loading branch information
darylldoyle committed Nov 7, 2019
2 parents 51ca4b7 + 6421560 commit 4cf8d0f
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 22 deletions.
74 changes: 52 additions & 22 deletions src/Sanitizer.php
Expand Up @@ -18,11 +18,6 @@
class Sanitizer
{

/**
* Regex to catch script and data values in attributes
*/
const SCRIPT_REGEX = '/(?:\w+script|data)(?:\s)?:/xi';

/**
* @var \DOMDocument
*/
Expand Down Expand Up @@ -372,22 +367,12 @@ protected function cleanAttributesOnWhitelist(\DOMElement $element)
protected function cleanXlinkHrefs(\DOMElement $element)
{
$xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) {
if (!in_array(substr($xlinks, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);


}
if (false === $this->isHrefSafeValue($xlinks)) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}

Expand All @@ -399,7 +384,7 @@ protected function cleanXlinkHrefs(\DOMElement $element)
protected function cleanHrefs(\DOMElement $element)
{
$href = $element->getAttribute('href');
if (preg_match(self::SCRIPT_REGEX, $href) === 1) {
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute('href');
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
Expand All @@ -408,6 +393,51 @@ protected function cleanHrefs(\DOMElement $element)
}
}

/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
protected function isHrefSafeValue($value) {

// Allow fragment identifiers.
if ('#' === substr($value, 0, 1)) {
return true;
}

// Allow relative URIs.
if ('/' === substr($value, 0, 1)) {
return true;
}

// Allow HTTPS domains.
if ('https://' === substr($value, 0, 8)) {
return true;
}

// Allow HTTP domains.
if ('http://' === substr($value, 0, 7)) {
return true;
}

// Allow known data URIs.
if (in_array(substr($value, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
return true;
}

return false;
}

/**
* Removes non-printable ASCII characters from string & trims it
*
Expand Down
1 change: 1 addition & 0 deletions tests/data/hrefCleanOne.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions tests/data/hrefTestOne.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions tests/data/useClean.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 4cf8d0f

Please sign in to comment.