WordPress · ellatrix · Mar 28, 2024 · Feb 29, 2024 · Feb 29, 2024 · Mar 28, 2024
diff --git a/packages/blocks/src/api/raw-handling/blockquote-normaliser.js b/packages/blocks/src/api/raw-handling/blockquote-normaliser.js
@@ -3,10 +3,12 @@
  */
 import normaliseBlocks from './normalise-blocks';
 
-export default function blockquoteNormaliser( node ) {
-	if ( node.nodeName !== 'BLOCKQUOTE' ) {
-		return;
-	}
+export default function blockquoteNormaliser( options ) {
+	return ( node ) => {
+		if ( node.nodeName !== 'BLOCKQUOTE' ) {
+			return;
+		}
 
-	node.innerHTML = normaliseBlocks( node.innerHTML );
+		node.innerHTML = normaliseBlocks( node.innerHTML, options );
+	};
 }
diff --git a/packages/blocks/src/api/raw-handling/index.js b/packages/blocks/src/api/raw-handling/index.js
@@ -65,11 +65,11 @@ export function rawHandler( { HTML = '' } ) {
 				figureContentReducer,
 				// Needed to create the quote block, which cannot handle text
 				// without wrapper paragraphs.
-				blockquoteNormaliser,
+				blockquoteNormaliser( { raw: true } ),
 			];
 
 			piece = deepFilterHTML( piece, filters, blockContentSchema );
-			piece = normaliseBlocks( piece );
+			piece = normaliseBlocks( piece, { raw: true } );
 
 			return htmlToBlocks( piece, rawHandler );
 		} )

diff --git a/packages/blocks/src/api/raw-handling/normalise-blocks.js b/packages/blocks/src/api/raw-handling/normalise-blocks.js
@@ -3,7 +3,7 @@
  */
 import { isEmpty, isPhrasingContent } from '@wordpress/dom';
 
-export default function normaliseBlocks( HTML ) {
+export default function normaliseBlocks( HTML, options = {} ) {
 	const decuDoc = document.implementation.createHTMLDocument( '' );
 	const accuDoc = document.implementation.createHTMLDocument( '' );
 
@@ -47,7 +47,7 @@ export default function normaliseBlocks( HTML ) {
 				}
 			} else if ( node.nodeName === 'P' ) {
 				// Only append non-empty paragraph nodes.
-				if ( isEmpty( node ) ) {
+				if ( isEmpty( node ) && ! options.raw ) {
 					decu.removeChild( node );
 				} else {
 					accu.appendChild( node );

diff --git a/packages/blocks/src/api/raw-handling/paste-handler.js b/packages/blocks/src/api/raw-handling/paste-handler.js
@@ -191,7 +191,7 @@ export function pasteHandler( {
 				commentRemover,
 				iframeRemover,
 				figureContentReducer,
-				blockquoteNormaliser,
+				blockquoteNormaliser(),
 				divNormaliser,
 			];
 

diff --git a/packages/blocks/src/api/raw-handling/shortcode-converter.js b/packages/blocks/src/api/raw-handling/shortcode-converter.js
@@ -14,6 +14,9 @@ import { applyBuiltInValidationFixes } from '../parser/apply-built-in-validation
 const castArray = ( maybeArray ) =>
 	Array.isArray( maybeArray ) ? maybeArray : [ maybeArray ];
 
+const beforeLineRegexp = /(\n|<p>)\s*$/;
+const afterLineRegexp = /^\s*(\n|<\/p>)/;
+
 function segmentHTMLToShortcodeBlock(
 	HTML,
 	lastIndex = 0,
@@ -56,8 +59,8 @@ function segmentHTMLToShortcodeBlock(
 		if (
 			! match.shortcode.content?.includes( '<' ) &&
 			! (
-				/(\n|<p>)\s*$/.test( beforeHTML ) &&
-				/^\s*(\n|<\/p>)/.test( afterHTML )
+				beforeLineRegexp.test( beforeHTML ) &&
+				afterLineRegexp.test( afterHTML )
 			)
 		) {
 			return segmentHTMLToShortcodeBlock( HTML, lastIndex );
@@ -143,9 +146,13 @@ function segmentHTMLToShortcodeBlock(
 		}
 
 		return [
-			...segmentHTMLToShortcodeBlock( beforeHTML ),
+			...segmentHTMLToShortcodeBlock(
+				beforeHTML.replace( beforeLineRegexp, '' )
+			),
 			...blocks,
-			...segmentHTMLToShortcodeBlock( afterHTML ),
+			...segmentHTMLToShortcodeBlock(
+				afterHTML.replace( afterLineRegexp, '' )
+			),
 		];
 	}
 

diff --git a/packages/blocks/src/api/raw-handling/special-comment-converter.js b/packages/blocks/src/api/raw-handling/special-comment-converter.js
@@ -23,58 +23,20 @@ export default function specialCommentConverter( node, doc ) {
 		return;
 	}
 
-	if ( node.nodeValue === 'nextpage' ) {
-		replace( node, createNextpage( doc ) );
+	if (
+		node.nodeValue !== 'nextpage' &&
+		node.nodeValue.indexOf( 'more' ) !== 0
+	) {
 		return;
 	}
 
-	if ( node.nodeValue.indexOf( 'more' ) === 0 ) {
-		moreCommentConverter( node, doc );
-	}
-}
-
-/**
- * Convert `<!--more-->` as well as the `<!--more Some text-->` variant
- * and its `<!--noteaser-->` companion into the custom element
- * described in `specialCommentConverter()`.
- *
- * @param {Node}     node The node to be processed.
- * @param {Document} doc  The document of the node.
- * @return {void}
- */
-function moreCommentConverter( node, doc ) {
-	// Grab any custom text in the comment.
-	const customText = node.nodeValue.slice( 4 ).trim();
-
-	/*
-	 * When a `<!--more-->` comment is found, we need to look for any
-	 * `<!--noteaser-->` sibling, but it may not be a direct sibling
-	 * (whitespace typically lies in between)
-	 */
-	let sibling = node;
-	let noTeaser = false;
-	while ( ( sibling = sibling.nextSibling ) ) {
-		if (
-			sibling.nodeType === sibling.COMMENT_NODE &&
-			sibling.nodeValue === 'noteaser'
-		) {
-			noTeaser = true;
-			remove( sibling );
-			break;
-		}
-	}
-
-	const moreBlock = createMore( customText, noTeaser, doc );
+	const block = createBlock( node, doc );
 
 	// If our `<!--more-->` comment is in the middle of a paragraph, we should
 	// split the paragraph in two and insert the more block in between. If not,
 	// the more block will eventually end up being inserted after the paragraph.
-	if (
-		! node.parentNode ||
-		node.parentNode.nodeName !== 'P' ||
-		node.parentNode.childNodes.length === 1
-	) {
-		replace( node, moreBlock );
+	if ( ! node.parentNode || node.parentNode.nodeName !== 'P' ) {
+		replace( node, block );
 	} else {
 		const childNodes = Array.from( node.parentNode.childNodes );
 		const nodeIndex = childNodes.indexOf( node );
@@ -93,7 +55,7 @@ function moreCommentConverter( node, doc ) {
 		// Split the original parent node and insert our more block
 		[
 			childNodes.slice( 0, nodeIndex ).reduce( paragraphBuilder, null ),
-			moreBlock,
+			block,
 			childNodes.slice( nodeIndex + 1 ).reduce( paragraphBuilder, null ),
 		].forEach(
 			( element ) =>
@@ -105,7 +67,35 @@ function moreCommentConverter( node, doc ) {
 	}
 }
 
-function createMore( customText, noTeaser, doc ) {
+function createBlock( commentNode, doc ) {
+	if ( commentNode.nodeValue === 'nextpage' ) {
+		const node = doc.createElement( 'wp-block' );
+		node.dataset.block = 'core/nextpage';
+
+		return node;
+	}
+
+	// Grab any custom text in the comment.
+	const customText = commentNode.nodeValue.slice( 4 ).trim();
+
+	/*
+	 * When a `<!--more-->` comment is found, we need to look for any
+	 * `<!--noteaser-->` sibling, but it may not be a direct sibling
+	 * (whitespace typically lies in between)
+	 */
+	let sibling = commentNode;
+	let noTeaser = false;
+	while ( ( sibling = sibling.nextSibling ) ) {
+		if (
+			sibling.nodeType === sibling.COMMENT_NODE &&
+			sibling.nodeValue === 'noteaser'
+		) {
+			noTeaser = true;
+			remove( sibling );
+			break;
+		}
+	}
+
 	const node = doc.createElement( 'wp-block' );
 	node.dataset.block = 'core/more';
 	if ( customText ) {
@@ -117,10 +107,3 @@ function createMore( customText, noTeaser, doc ) {
 	}
 	return node;
 }
-
-function createNextpage( doc ) {
-	const node = doc.createElement( 'wp-block' );
-	node.dataset.block = 'core/nextpage';
-
-	return node;
-}
diff --git a/packages/blocks/src/api/raw-handling/test/blockquote-normaliser.js b/packages/blocks/src/api/raw-handling/test/blockquote-normaliser.js
@@ -8,7 +8,7 @@ describe( 'blockquoteNormaliser', () => {
 	it( 'should normalise blockquote', () => {
 		const input = '<blockquote>test</blockquote>';
 		const output = '<blockquote><p>test</p></blockquote>';
-		expect( deepFilterHTML( input, [ blockquoteNormaliser ] ) ).toEqual(
+		expect( deepFilterHTML( input, [ blockquoteNormaliser() ] ) ).toEqual(
 			output
 		);
 	} );

diff --git a/packages/blocks/src/api/raw-handling/test/special-comment-converter.js b/packages/blocks/src/api/raw-handling/test/special-comment-converter.js
@@ -8,22 +8,22 @@ describe( 'specialCommentConverter', () => {
 	it( 'should convert a single "more" comment into a basic block', () => {
 		expect(
 			deepFilterHTML( '<p><!--more--></p>', [ specialCommentConverter ] )
-		).toEqual( '<p></p><wp-block data-block="core/more"></wp-block>' );
+		).toEqual( '<wp-block data-block="core/more"></wp-block>' );
 	} );
 	it( 'should convert a single "nextpage" comment into a basic block', () => {
 		expect(
 			deepFilterHTML( '<p><!--nextpage--></p>', [
 				specialCommentConverter,
 			] )
-		).toEqual( '<p></p><wp-block data-block="core/nextpage"></wp-block>' );
+		).toEqual( '<wp-block data-block="core/nextpage"></wp-block>' );
 	} );
 	it( 'should convert two comments into a block', () => {
 		expect(
 			deepFilterHTML( '<p><!--more--><!--noteaser--></p>', [
 				specialCommentConverter,
 			] )
 		).toEqual(
-			'<p></p><wp-block data-block="core/more" data-no-teaser=""></wp-block>'
+			'<wp-block data-block="core/more" data-no-teaser=""></wp-block>'
 		);
 	} );
 	it( 'should pass custom text to the block', () => {
@@ -33,7 +33,7 @@ describe( 'specialCommentConverter', () => {
 				[ specialCommentConverter ]
 			)
 		).toEqual(
-			'<p></p><wp-block data-block="core/more" data-custom-text="Read all about it!" data-no-teaser=""></wp-block>'
+			'<wp-block data-block="core/more" data-custom-text="Read all about it!" data-no-teaser=""></wp-block>'
 		);
 	} );
 	it( 'should not break content order', () => {
@@ -95,7 +95,7 @@ describe( 'specialCommentConverter', () => {
 				[ specialCommentConverter ]
 			);
 			expect( output ).toEqual(
-				'<p></p><wp-block data-block="core/more" data-no-teaser=""></wp-block>'
+				'<wp-block data-block="core/more" data-no-teaser=""></wp-block>'
 			);
 		} );
 		it( 'should not break content order', () => {
@@ -108,7 +108,7 @@ describe( 'specialCommentConverter', () => {
 			);
 			expect( output ).toEqual(
 				`<p>First paragraph.</p>
-				<p></p><wp-block data-block=\"core/more\"></wp-block>
+				<wp-block data-block=\"core/more\"></wp-block>
 				<p>Second paragraph</p>
 				<p>Third paragraph</p>`
 			);
@@ -124,9 +124,9 @@ describe( 'specialCommentConverter', () => {
 			);
 			expect( output ).toEqual(
 				`<p>First page.</p>
-				<p></p><wp-block data-block=\"core/nextpage\"></wp-block>
+				<wp-block data-block=\"core/nextpage\"></wp-block>
 				<p>Second page</p>
-				<p></p><wp-block data-block=\"core/nextpage\"></wp-block>
+				<wp-block data-block=\"core/nextpage\"></wp-block>
 				<p>Third page</p>`
 			);
 		} );

diff --git a/test/integration/__snapshots__/blocks-raw-handling.test.js.snap b/test/integration/__snapshots__/blocks-raw-handling.test.js.snap
@@ -198,3 +198,21 @@ exports[`rawHandler should preserve alignment 1`] = `
 <p class="has-text-align-center">center</p>
 <!-- /wp:paragraph -->"
 `;
+
+exports[`rawHandler should preserve all paragraphs 1`] = `
+"<!-- wp:paragraph -->
+<p></p>
+<!-- /wp:paragraph -->
+
+<!-- wp:paragraph -->
+<p>&nbsp;&nbsp;</p>
+<!-- /wp:paragraph -->
+
+<!-- wp:paragraph -->
+<p></p>
+<!-- /wp:paragraph -->
+
+<!-- wp:paragraph -->
+<p></p>
+<!-- /wp:paragraph -->"
+`;
diff --git a/test/integration/blocks-raw-handling.test.js b/test/integration/blocks-raw-handling.test.js
@@ -560,4 +560,12 @@ describe( 'rawHandler', () => {
 		const HTML = '<p style="text-align:center">center</p>';
 		expect( serialize( rawHandler( { HTML } ) ) ).toMatchSnapshot();
 	} );
+
+	it( 'should preserve all paragraphs', () => {
+		const HTML = `<p></p>
+<p>&nbsp;&nbsp;</p>
+<p class="p"></p>
+<p style="border: 1px solid tomato;"></p>`;
+		expect( serialize( rawHandler( { HTML } ) ) ).toMatchSnapshot();
+	} );
 } );