diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index a9e8bbdd3ec5f..0ef5803ebc7b4 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -988,6 +988,9 @@ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) { (|$)) # - Normative HTML comments. | ]*> # - Closing tags with invalid tag names. + | + ]*> # - Invalid markup declaration nodes. Not all invalid nodes + # are matched so as to avoid breaking legacy behaviors. ) | (<[^>]*(>|$)|>) # Tag-like spans of text. @@ -1114,22 +1117,30 @@ function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) { } /* - * When a closing tag appears with a name that isn't a valid tag name, - * it must be interpreted as an HTML comment. It extends until the - * first `>` character after the initial opening `` + * and then transforms the entire span into an HTML comment. * * Preserve these comments and do not treat them like tags. + * + * @see https://html.spec.whatwg.org/#bogus-comment-state */ - if ( 1 === preg_match( '~^]*>$~', $content ) ) { - $content = substr( $content, 2, -1 ); - $transformed = null; + if ( 1 === preg_match( '~^(?:]*>|]*>)$~', $content ) ) { + /** + * Since the pattern matches `` and also ``, this will + * preserve the type of the cleaned-up token in the output. + */ + $opener = $content[1]; + $content = substr( $content, 2, -1 ); - while ( $transformed !== $content ) { - $transformed = wp_kses( $content, $allowed_html, $allowed_protocols ); - $content = $transformed; - } + do { + $prev = $content; + $content = wp_kses( $content, $allowed_html, $allowed_protocols ); + } while ( $prev !== $content ); - return ""; + // Recombine the modified inner content with the original token structure. + return "<{$opener}{$content}>"; } /* diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index 36bf2baf123d3..ea65a89092c07 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -1936,11 +1936,13 @@ public function filter_wp_kses_object_added_in_html_filter( $tags, $context ) { * * @ticket 61009 * + * @dataProvider data_html_containing_various_kinds_of_html_comments + * * @param string $html_comment HTML containing a comment; must not be a valid comment * but must be syntax which a browser interprets as a comment. * @param string $expected_output How `wp_kses()` ought to transform the comment. */ - public function wp_kses_preserves_html_comments( $html_comment, $expected_output ) { + public function test_wp_kses_preserves_html_comments( $html_comment, $expected_output ) { $this->assertSame( $expected_output, wp_kses( $html_comment, array() ), @@ -1957,6 +1959,7 @@ public static function data_html_containing_various_kinds_of_html_comments() { return array( 'Normative HTML comment' => array( 'beforeafter', 'beforeafter' ), 'Closing tag with invalid tag name' => array( 'beforeafter', 'beforeafter' ), + 'Incorrectly opened comment (Markup declaration)' => array( 'beforeafter', 'beforeafter' ), ); }