Skip to content

Commit

Permalink
Merge pull request #9927 from ckeditor/ck/9861
Browse files Browse the repository at this point in the history
Other (engine): Fixed parsing leading HTML comments by `HtmlDataProcessor.toView()`. Closes #9861.
  • Loading branch information
ma2ciek committed Jun 25, 2021
2 parents c72859f + 0acdb82 commit 12dc7ba
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 5 deletions.
37 changes: 33 additions & 4 deletions packages/ckeditor5-engine/src/dataprocessor/htmldataprocessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* @module engine/dataprocessor/htmldataprocessor
*/

/* globals document, DOMParser */
/* globals document, DOMParser, Node */

import BasicHtmlWriter from './basichtmlwriter';
import DomConverter from '../view/domconverter';
Expand Down Expand Up @@ -119,10 +119,39 @@ export default class HtmlDataProcessor {
_toDom( data ) {
const document = this._domParser.parseFromString( data, 'text/html' );
const fragment = document.createDocumentFragment();
const nodes = document.body.childNodes;

while ( nodes.length > 0 ) {
fragment.appendChild( nodes[ 0 ] );
// The rules for parsing an HTML string can be read on https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml.
//
// In short, parsing tokens in an HTML string starts with the so-called "initial" insertion mode. When a DOM parser is in this
// state and encounters a comment node, it inserts this comment node as the last child of the newly-created `HTMLDocument` object.
// The parser then proceeds to successive insertion modes during parsing subsequent tokens and appends in the `HTMLDocument` object
// other nodes (like <html>, <head>, <body>). This causes that the first leading comments from HTML string become the first nodes
// in the `HTMLDocument` object, but not in the <body> collection, because they are ultimately located before the <html> element.
//
// Therefore, so that such leading comments do not disappear, they all are moved from the `HTMLDocument` object to the document
// fragment, until the <html> element is encountered.
//
// See: https://github.com/ckeditor/ckeditor5/issues/9861.
let documentChildNode = document.firstChild;

while ( !documentChildNode.isSameNode( document.documentElement ) ) {
const node = documentChildNode;

documentChildNode = documentChildNode.nextSibling;

// It seems that `DOMParser#parseFromString()` adds only comment nodes directly to the `HTMLDocument` object, before the <html>
// node. The condition below is just to be sure we are moving only comment nodes.

/* istanbul ignore else */
if ( node.nodeType == Node.COMMENT_NODE ) {
fragment.appendChild( node );
}
}

const bodyChildNodes = document.body.childNodes;

while ( bodyChildNodes.length > 0 ) {
fragment.appendChild( bodyChildNodes[ 0 ] );
}

return fragment;
Expand Down
150 changes: 149 additions & 1 deletion packages/ckeditor5-engine/tests/dataprocessor/htmldataprocessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
*/

/* globals setTimeout, window */
/* globals setTimeout, window, Node */

import HtmlDataProcessor from '../../src/dataprocessor/htmldataprocessor';
import xssTemplates from '../../tests/dataprocessor/_utils/xsstemplates';
Expand Down Expand Up @@ -94,6 +94,154 @@ describe( 'HtmlDataProcessor', () => {
} );
} );

describe( '_toDom()', () => {
it( 'should insert nested comment nodes into <body> collection', () => {
const bodyDocumentFragment = dataProcessor._toDom(
'<div>' +
'<!-- Comment 1 -->' +
'<p>' +
'<!-- Comment 2 -->' +
'Paragraph' +
'<!-- Comment 3 -->' +
'</p>' +
'<!-- Comment 4 -->' +
'</div>'
);

const [ div ] = bodyDocumentFragment.childNodes;
const [ comment1, paragraph, comment4 ] = div.childNodes;
const [ comment2, text, comment3 ] = paragraph.childNodes;

expect( bodyDocumentFragment.childNodes.length ).to.equal( 1 );
expect( div.childNodes.length ).to.equal( 3 );
expect( paragraph.childNodes.length ).to.equal( 3 );

expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment1.data ).to.equal( ' Comment 1 ' );

expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment2.data ).to.equal( ' Comment 2 ' );

expect( comment3.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment3.data ).to.equal( ' Comment 3 ' );

expect( comment4.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment4.data ).to.equal( ' Comment 4 ' );

expect( text.nodeType ).to.equal( Node.TEXT_NODE );
expect( text.data ).to.equal( 'Paragraph' );

expect( div.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( div.outerHTML ).to.equal(
'<div>' +
'<!-- Comment 1 -->' +
'<p>' +
'<!-- Comment 2 -->' +
'Paragraph' +
'<!-- Comment 3 -->' +
'</p>' +
'<!-- Comment 4 -->' +
'</div>'
);

expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( paragraph.outerHTML ).to.equal(
'<p>' +
'<!-- Comment 2 -->' +
'Paragraph' +
'<!-- Comment 3 -->' +
'</p>'
);
} );

it( 'should insert leading comment nodes from HTML string into <body> collection #1', () => {
const bodyDocumentFragment = dataProcessor._toDom(
'<!-- Comment 1 -->' +
'<!-- Comment 2 -->' +
'<h2>Heading</h2>' +
'<p>Paragraph</p>' +
'<!-- Comment 3 -->' +
'<!-- Comment 4 -->'
);

const [
comment1,
comment2,
heading,
paragraph,
comment3,
comment4
] = bodyDocumentFragment.childNodes;

expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 );

expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment1.data ).to.equal( ' Comment 1 ' );

expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment2.data ).to.equal( ' Comment 2 ' );

expect( comment3.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment3.data ).to.equal( ' Comment 3 ' );

expect( comment4.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment4.data ).to.equal( ' Comment 4 ' );

expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( heading.outerHTML ).to.equal( '<h2>Heading</h2>' );

expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( paragraph.outerHTML ).to.equal( '<p>Paragraph</p>' );
} );

it( 'should insert leading comment nodes from HTML string into <body> collection #2', () => {
// The existence of the <meta> tag causes that DOMParser inserts this element into the <head>. Moreover, all subsequent comment
// nodes (up until the node, that is not valid inside the <head>, which is the <h2> in our case) are also inserted into the
// <head>. So both <!-- Comment 3 --> and <!-- Comment 4 --> nodes, that are located between the <meta> and <h2> in the HTML
// string, are insterted into the <head>.
const bodyDocumentFragment = dataProcessor._toDom(
'<!-- Comment 1 -->' +
'<!-- Comment 2 -->' +
'<meta>' + // inserted into the <head> by DOMParser#parseFromString()
'<!-- Comment 3 -->' + // inserted into the <head> by DOMParser#parseFromString()
'<!-- Comment 4 -->' + // inserted into the <head> by DOMParser#parseFromString()
'<h2>Heading</h2>' +
'<p>Paragraph</p>' +
'<!-- Comment 5 -->' +
'<!-- Comment 6 -->'
);

const [
comment1,
comment2,
heading,
paragraph,
comment5,
comment6
] = bodyDocumentFragment.childNodes;

expect( bodyDocumentFragment.childNodes.length ).to.equal( 6 );

expect( comment1.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment1.data ).to.equal( ' Comment 1 ' );

expect( comment2.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment2.data ).to.equal( ' Comment 2 ' );

expect( comment5.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment5.data ).to.equal( ' Comment 5 ' );

expect( comment6.nodeType ).to.equal( Node.COMMENT_NODE );
expect( comment6.data ).to.equal( ' Comment 6 ' );

expect( heading.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( heading.outerHTML ).to.equal( '<h2>Heading</h2>' );

expect( paragraph.nodeType ).to.equal( Node.ELEMENT_NODE );
expect( paragraph.outerHTML ).to.equal( '<p>Paragraph</p>' );
} );
} );

describe( 'toData()', () => {
it( 'should return empty string when empty DocumentFragment is passed', () => {
const fragment = new ViewDocumentFragment( viewDocument );
Expand Down

0 comments on commit 12dc7ba

Please sign in to comment.