Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Commit

Permalink
Merge pull request #30 from ckeditor/t/29
Browse files Browse the repository at this point in the history
Internal: Moved parsing and spacing normalization functions to separate files. Closes #29.
  • Loading branch information
Reinmar authored Nov 6, 2018
2 parents d2ccc38 + 3c89aa8 commit 7aec088
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 55 deletions.
46 changes: 3 additions & 43 deletions src/filters/utils.js → src/filters/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
*/

/**
* @module paste-from-office/filters/utils
* @module paste-from-office/filters/parse
*/

/* globals DOMParser */

import DomConverter from '@ckeditor/ckeditor5-engine/src/view/domconverter';
import { NBSP_FILLER } from '@ckeditor/ckeditor5-engine/src/view/filler';

import { normalizeSpacing, normalizeSpacerunSpans } from './space';

/**
* Parses provided HTML extracting contents of `<body>` and `<style>` tags.
*
Expand Down Expand Up @@ -89,45 +91,3 @@ function extractStyles( htmlDocument ) {
stylesString: stylesString.join( ' ' )
};
}

// Replaces last space preceding elements closing tag with `&nbsp;`. Such operation prevents spaces from being removed
// during further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
// This method also takes into account Word specific `<o:p></o:p>` empty tags.
//
// @param {String} htmlString HTML string in which spacing should be normalized.
// @returns {String} Input HTML with spaces normalized.
function normalizeSpacing( htmlString ) {
return normalizeSafariSpaceSpans( normalizeSafariSpaceSpans( htmlString ) ) // Run normalization two times to cover nested spans.
.replace( / <\//g, '\u00A0</' )
.replace( / <o:p><\/o:p>/g, '\u00A0<o:p></o:p>' );
}

// Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
// by replacing all spaces sequences longer than 1 space with `&nbsp; ` pairs. This prevents spaces from being removed during
// further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
//
// This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
// regular spaces / &nbsp; sequence for replacement.
//
// @param {String} htmlString HTML string in which spacing should be normalized
// @returns {String} Input HTML with spaces normalized.
function normalizeSafariSpaceSpans( htmlString ) {
return htmlString.replace( /<span(?: class="Apple-converted-space"|)>(\s+)<\/span>/g, ( fullMatch, spaces ) => {
return spaces.length === 1 ? ' ' : Array( spaces.length + 1 ).join( '\u00A0 ' ).substr( 0, spaces.length );
} );
}

// Normalizes spacing in special Word `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
// all spaces with `&nbsp; ` pairs. This prevents spaces from being removed during further DOM/View processing
// (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
//
// @param {Document} htmlDocument Native `Document` object in which spacing should be normalized.
function normalizeSpacerunSpans( htmlDocument ) {
htmlDocument.querySelectorAll( 'span[style*=spacerun]' ).forEach( el => {
// Use `el.childNodes[ 0 ].data.length` instead of `el.innerText.length`. For `el.innerText.length` which
// contains spaces mixed with `&nbsp;` Edge browser returns incorrect length.
const innerTextLength = el.childNodes[ 0 ].data.length;

el.innerHTML = Array( innerTextLength + 1 ).join( '\u00A0 ' ).substr( 0, innerTextLength );
} );
}
55 changes: 55 additions & 0 deletions src/filters/space.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module paste-from-office/filters/space
*/

/**
* Replaces last space preceding elements closing tag with `&nbsp;`. Such operation prevents spaces from being removed
* during further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
* This method also takes into account Word specific `<o:p></o:p>` empty tags.
*
* @param {String} htmlString HTML string in which spacing should be normalized.
* @returns {String} Input HTML with spaces normalized.
*/
export function normalizeSpacing( htmlString ) {
return normalizeSafariSpaceSpans( normalizeSafariSpaceSpans( htmlString ) ) // Run normalization two times to cover nested spans.
.replace( / <\//g, '\u00A0</' )
.replace( / <o:p><\/o:p>/g, '\u00A0<o:p></o:p>' );
}

/**
* Normalizes spacing in special Word `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
* all spaces with `&nbsp; ` pairs. This prevents spaces from being removed during further DOM/View processing
* (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
*
* @param {Document} htmlDocument Native `Document` object in which spacing should be normalized.
*/
export function normalizeSpacerunSpans( htmlDocument ) {
htmlDocument.querySelectorAll( 'span[style*=spacerun]' ).forEach( el => {
// Use `el.childNodes[ 0 ].data.length` instead of `el.innerText.length`. For `el.innerText.length` which
// contains spaces mixed with `&nbsp;` Edge browser returns incorrect length.
const innerTextLength = el.childNodes[ 0 ].data.length;

el.innerHTML = Array( innerTextLength + 1 ).join( '\u00A0 ' ).substr( 0, innerTextLength );
} );
}

// Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
// by replacing all spaces sequences longer than 1 space with `&nbsp; ` pairs. This prevents spaces from being removed during
// further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
//
// This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
// regular spaces / &nbsp; sequence for replacement.
//
// @param {String} htmlString HTML string in which spacing should be normalized
// @returns {String} Input HTML with spaces normalized.
function normalizeSafariSpaceSpans( htmlString ) {
return htmlString.replace( /<span(?: class="Apple-converted-space"|)>(\s+)<\/span>/g, ( fullMatch, spaces ) => {
return spaces.length === 1 ? ' ' : Array( spaces.length + 1 ).join( '\u00A0 ' ).substr( 0, spaces.length );
} );
}

2 changes: 1 addition & 1 deletion src/pastefromoffice.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import Plugin from '@ckeditor/ckeditor5-core/src/plugin';
import Clipboard from '@ckeditor/ckeditor5-clipboard/src/clipboard';

import { parseHtml } from './filters/utils';
import { parseHtml } from './filters/parse';
import { transformListItemLikeElementsIntoLists } from './filters/list';

/**
Expand Down
22 changes: 11 additions & 11 deletions tests/filters/utils.js → tests/filters/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

import DocumentFragment from '@ckeditor/ckeditor5-engine/src/view/documentfragment';

import { parseHtml } from '../../src/filters/utils';
import { parseHtml } from '../../src/filters/parse';

describe( 'Filters', () => {
describe( 'Utils', () => {
describe( 'parse', () => {
describe( 'parseHtml', () => {
it( 'correctly parses HTML with body and one style tag', () => {
const html = '<head><style>p { color: red; } a { font-size: 12px; }</style></head><body><p>Foo Bar</p></body>';
Expand Down Expand Up @@ -77,20 +77,20 @@ describe( 'Filters', () => {

expect( stylesString ).to.equal( '' );
} );
} );

it( 'correctly parses HTML with body contents and empty style tag', () => {
const html = '<head><style></style></head><body><p>Foo Bar</p></body>';
const { body, bodyString, styles, stylesString } = parseHtml( html );
it( 'correctly parses HTML with body contents and empty style tag', () => {
const html = '<head><style></style></head><body><p>Foo Bar</p></body>';
const { body, bodyString, styles, stylesString } = parseHtml( html );

expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );
expect( body ).to.instanceof( DocumentFragment );
expect( body.childCount ).to.equal( 1 );

expect( bodyString ).to.equal( '<p>Foo Bar</p>' );
expect( bodyString ).to.equal( '<p>Foo Bar</p>' );

expect( styles.length ).to.equal( 0 );
expect( styles.length ).to.equal( 0 );

expect( stylesString ).to.equal( '' );
expect( stylesString ).to.equal( '' );
} );
} );
} );
} );
61 changes: 61 additions & 0 deletions tests/filters/space.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/* globals DOMParser */

import { normalizeSpacing, normalizeSpacerunSpans } from '../../src/filters/space';

describe( 'Filters', () => {
describe( 'space', () => {
describe( 'normalizeSpacing', () => {
it( 'should replace last space before closing tag with NBSP', () => {
const input = '<p>Foo </p><p><span> Bar </span> Baz </p>';
const expected = '<p>Foo\u00A0</p><p><span> Bar \u00A0</span> Baz\u00A0</p>';

expect( normalizeSpacing( input ) ).to.equal( expected );
} );

it( 'should replace last space before special "o:p" tag with NBSP', () => {
const input = '<p>Foo <o:p></o:p><span> <o:p></o:p> Bar</span></p>';
const expected = '<p>Foo \u00A0<o:p></o:p><span>\u00A0<o:p></o:p> Bar</span></p>';

expect( normalizeSpacing( input ) ).to.equal( expected );
} );

it( 'should normalize Safari "space spans"', () => {
const input = '<p>Foo <span class="Apple-converted-space"> </span> Baz <span> </span></p>';
const expected = '<p>Foo \u00A0 \u00A0 Baz \u00A0\u00A0</p>';

expect( normalizeSpacing( input ) ).to.equal( expected );
} );

it( 'should normalize nested Safari "space spans"', () => {
const input = '<p> Foo <span class="Apple-converted-space"> <span class="Apple-converted-space"> </span></span> Baz</p>';
const expected = '<p> Foo \u00A0 \u00A0 \u00A0 Baz</p>';

expect( normalizeSpacing( input ) ).to.equal( expected );
} );
} );

describe( 'normalizeSpacerunSpans', () => {
it( 'should normalize spaces inside special "span.spacerun" elements', () => {
const input = '<p> <span style=\'mso-spacerun:yes\'> </span>Foo</p>' +
'<p> Baz <span style=\'mso-spacerun:yes\'> </span></p>';

const expected = '<p> <span style="mso-spacerun:yes">&nbsp; &nbsp;</span>Foo</p>' +
'<p> Baz <span style="mso-spacerun:yes">&nbsp; &nbsp; &nbsp; </span></p>';

const domParser = new DOMParser();
const htmlDocument = domParser.parseFromString( input, 'text/html' );

expect( htmlDocument.body.innerHTML.replace( /'/g, '"' ).replace( /: /g, ':' ) ).to.not.equal( expected );

normalizeSpacerunSpans( htmlDocument );

expect( htmlDocument.body.innerHTML.replace( /'/g, '"' ).replace( /: /g, ':' ) ).to.equal( expected );
} );
} );
} );
} );

0 comments on commit 7aec088

Please sign in to comment.