Skip to content

Commit

Permalink
fix(core): traverse and sanitize content of unsafe elements
Browse files Browse the repository at this point in the history
In the past, the sanitizer would remove unsafe elements, but still
traverse and sanitize (and potentially preserve) their content. This was
problematic in the case of `<style></style>` tags, whose content would
be converted to HTML text nodes.

In order to fix this, the sanitizer's behavior was changed in #25879 to
ignore the content of _all_ unsafe elements. While this fixed the
problem with `<style></style>` tags, it unnecessarily removed the
contents for _any_ unsafe element. This was an unneeded breaking change.

This commit partially restores the old sanitizer behavior (namely
traversing content of unsafe elements), but introduces a list of
elements whose content should not be traversed if the elements
themselves are considered unsafe. Currently, this list contains `style`
and `script`.

Related to #25879 and #26007.

Fixes #28427
  • Loading branch information
gkalpak committed Feb 18, 2019
1 parent 6b511a3 commit 37b6644
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 24 deletions.
25 changes: 16 additions & 9 deletions packages/core/src/sanitization/html_sanitizer.ts
Expand Up @@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(

export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);

// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
//
// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
// don't want to preserve the content, if the elements themselves are going to be removed.
const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style');

/**
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
* attributes.
Expand All @@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
// However this code never accesses properties off of `document` before deleting its contents
// again, so it shouldn't be vulnerable to DOM clobbering.
let current: Node = el.firstChild !;
let elementValid = true;
let traverseContent = true;
while (current) {
if (current.nodeType === Node.ELEMENT_NODE) {
elementValid = this.startElement(current as Element);
traverseContent = this.startElement(current as Element);
} else if (current.nodeType === Node.TEXT_NODE) {
this.chars(current.nodeValue !);
} else {
// Strip non-element, non-text nodes.
this.sanitizedSomething = true;
}
if (elementValid && current.firstChild) {
if (traverseContent && current.firstChild) {
current = current.firstChild !;
continue;
}
Expand All @@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
}

/**
* Outputs only valid Elements.
*
* Invalid elements are skipped.
* Sanitizes an opening element tag (if valid) and returns whether the element's contents should
* be traversed. Element content must always be traversed (even if the element itself is not
* valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
*
* @param element element to sanitize
* Returns true if the element is valid.
* @param element The element to sanitize.
* @return True if the element's contents should be traversed.
*/
private startElement(element: Element): boolean {
const tagName = element.nodeName.toLowerCase();
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.sanitizedSomething = true;
return false;
return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
}
this.buf.push('<');
this.buf.push(tagName);
Expand Down
31 changes: 16 additions & 15 deletions packages/core/test/sanitization/html_sanitizer_spec.ts
Expand Up @@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p>Hello <br> World</p>');
});

it('supports removal of namespaced elements',
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); });
it('supports namespaced elements',
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });

it('supports namespaced attributes', () => {
expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
Expand Down Expand Up @@ -85,42 +85,36 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p alt="% &amp; &#34; !">Hello</p>'); // NB: quote encoded as ASCII &#34;.
});

describe('should strip dangerous elements and its content', () => {
describe('should strip dangerous elements (but traverse its content)', () => {
const dangerousTags = [
'form',
'frame',
'object',
'textarea',
'button',
'option',
'select',
'script',
'style',
];

for (const tag of dangerousTags) {
it(`${tag}`,
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
}
const dangerousSelfClosingTags = [
'frameset',
'embed',
'input',
'param',
'base',
'basefont',
'param',
'embed',
'frameset',
'input',
'link',
'param',
];

for (const tag of dangerousSelfClosingTags) {
it(`${tag}`, () => {
expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
});
}

it(`swallows frame entirely`, () => {
expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
});
});

describe('should strip dangerous attributes', () => {
Expand All @@ -133,6 +127,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
}
});

it('ignores content of script elements', () => {
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
.toEqual('<div>hi</div>');
expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
});

it('ignores content of style elements', () => {
expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
.toEqual('<div>hi</div>');
Expand Down

0 comments on commit 37b6644

Please sign in to comment.