fix(core): traverse and sanitize content of unsafe elements

In the past, the sanitizer would remove unsafe elements, but still traverse and sanitize (and potentially preserve) their content. This was problematic in the case of `<style></style>` tags, whose content would be converted to HTML text nodes. In order to fix this, the sanitizer's behavior was changed in #25879 to ignore the content of _all_ unsafe elements. While this fixed the problem with `<style></style>` tags, it unnecessarily removed the contents for _any_ unsafe element. This was an unneeded breaking change. This commit partially restores the old sanitizer behavior (namely traversing content of unsafe elements), but introduces a list of elements whose content should not be traversed if the elements themselves are considered unsafe. Currently, this list contains `style` and `script`. Related to #25879 and #26007. Fixes #28427
angular · Feb 18, 2019 · 37b6644 · 37b6644
1 parent 6b511a3
commit 37b6644
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 24 deletions.
diff --git a/packages/core/src/sanitization/html_sanitizer.ts b/packages/core/src/sanitization/html_sanitizer.ts
@@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(
 
 export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);
 
+// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
+//
+// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
+// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
+// don't want to preserve the content, if the elements themselves are going to be removed.
+const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style');
+
 /**
  * SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
  * attributes.
@@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
     // However this code never accesses properties off of `document` before deleting its contents
     // again, so it shouldn't be vulnerable to DOM clobbering.
     let current: Node = el.firstChild !;
-    let elementValid = true;
+    let traverseContent = true;
     while (current) {
       if (current.nodeType === Node.ELEMENT_NODE) {
-        elementValid = this.startElement(current as Element);
+        traverseContent = this.startElement(current as Element);
       } else if (current.nodeType === Node.TEXT_NODE) {
         this.chars(current.nodeValue !);
       } else {
         // Strip non-element, non-text nodes.
         this.sanitizedSomething = true;
       }
-      if (elementValid && current.firstChild) {
+      if (traverseContent && current.firstChild) {
         current = current.firstChild !;
         continue;
       }
@@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
   }
 
   /**
-   * Outputs only valid Elements.
-   *
-   * Invalid elements are skipped.
+   * Sanitizes an opening element tag (if valid) and returns whether the element's contents should
+   * be traversed. Element content must always be traversed (even if the element itself is not
+   * valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
    *
-   * @param element element to sanitize
-   * Returns true if the element is valid.
+   * @param element The element to sanitize.
+   * @return True if the element's contents should be traversed.
    */
   private startElement(element: Element): boolean {
     const tagName = element.nodeName.toLowerCase();
     if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
       this.sanitizedSomething = true;
-      return false;
+      return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
     }
     this.buf.push('<');
     this.buf.push(tagName);

diff --git a/packages/core/test/sanitization/html_sanitizer_spec.ts b/packages/core/test/sanitization/html_sanitizer_spec.ts
@@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
           .toEqual('<p>Hello <br> World</p>');
     });
 
-    it('supports removal of namespaced elements',
-       () => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); });
+    it('supports namespaced elements',
+       () => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });
 
     it('supports namespaced attributes', () => {
       expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
@@ -85,42 +85,36 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
           .toEqual('<p alt="% &amp; &#34; !">Hello</p>');  // NB: quote encoded as ASCII &#34;.
     });
 
-    describe('should strip dangerous elements and its content', () => {
+    describe('should strip dangerous elements (but traverse its content)', () => {
       const dangerousTags = [
         'form',
+        'frame',
         'object',
         'textarea',
         'button',
         'option',
         'select',
-        'script',
-        'style',
       ];
 
       for (const tag of dangerousTags) {
         it(`${tag}`,
-           () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
+           () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
       }
       const dangerousSelfClosingTags = [
-        'frameset',
-        'embed',
-        'input',
-        'param',
         'base',
         'basefont',
-        'param',
+        'embed',
+        'frameset',
+        'input',
         'link',
+        'param',
       ];
 
       for (const tag of dangerousSelfClosingTags) {
         it(`${tag}`, () => {
           expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
         });
       }
-
-      it(`swallows frame entirely`, () => {
-        expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
-      });
     });
 
     describe('should strip dangerous attributes', () => {
@@ -133,6 +127,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
       }
     });
 
+    it('ignores content of script elements', () => {
+      expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
+      expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
+          .toEqual('<div>hi</div>');
+      expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
+    });
+
     it('ignores content of style elements', () => {
       expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
           .toEqual('<div>hi</div>');