Skip to content

Commit

Permalink
HTML API: Backports from Core
Browse files Browse the repository at this point in the history
 - Support a few extra invalid comment syntaxes. WordPress/wordpress-develop#4256
 - Invalidate bookmarks which have been eliminated by enqueued changes. WordPress/wordpress-develop#4116
 - Expose whether the currently-matched tag has the self-closing flag. WordPress/wordpress-develop#4266
 - Avoid double-writing an attribute value if given case-variations of the name. WordPress/wordpress-develop#4337
  • Loading branch information
dmsnell committed Apr 20, 2023
1 parent 70d4266 commit 3d7902e
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 21 deletions.
125 changes: 111 additions & 14 deletions lib/compat/wordpress-6.2/html-api/class-wp-html-tag-processor.php
Expand Up @@ -971,6 +971,7 @@ private function skip_script_data() {
* closing `>`; these are left for other methods.
*
* @since 6.2.0
* @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
*
* @return bool Whether a tag was found before the end of the document.
*/
Expand Down Expand Up @@ -1039,13 +1040,42 @@ private function parse_next_tag() {
'-' === $html[ $at + 2 ] &&
'-' === $html[ $at + 3 ]
) {
$closer_at = strpos( $html, '-->', $at + 4 );
if ( false === $closer_at ) {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
return false;
}

$at = $closer_at + 3;
continue;
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$at = $closer_at + $span_of_dashes + 1;
continue;
}

/*
* Comments may be closed by either a --> or an invalid --!>.
* The first occurrence closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
*/
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
return false;
}

if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$at = $closer_at + 3;
continue 2;
}

if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$at = $closer_at + 4;
continue 2;
}
}
}

/*
Expand Down Expand Up @@ -1104,9 +1134,19 @@ private function parse_next_tag() {
continue;
}

/*
* </> is a missing end tag name, which is ignored.
*
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
$at++;
continue;
}

/*
* <? transitions to a bogus comment state – skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
Expand All @@ -1118,6 +1158,22 @@ private function parse_next_tag() {
continue;
}

/*
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
return false;
}

$at = $closer_at + 1;
continue;
}

++$at;
}

Expand Down Expand Up @@ -1404,6 +1460,7 @@ private function class_name_updates_to_attributes_updates() {
* Applies attribute updates to HTML document.
*
* @since 6.2.0
* @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
*
* @return void
*/
Expand Down Expand Up @@ -1434,7 +1491,7 @@ private function apply_attributes_updates() {
* Adjust bookmark locations to account for how the text
* replacements adjust offsets in the input document.
*/
foreach ( $this->bookmarks as $bookmark ) {
foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
/*
* Each lexical update which appears before the bookmark's endpoints
* might shift the offsets for those endpoints. Loop through each change
Expand All @@ -1445,20 +1502,22 @@ private function apply_attributes_updates() {
$tail_delta = 0;

foreach ( $this->lexical_updates as $diff ) {
$update_head = $bookmark->start >= $diff->start;
$update_tail = $bookmark->end >= $diff->start;

if ( ! $update_head && ! $update_tail ) {
if ( $bookmark->start < $diff->start && $bookmark->end < $diff->start ) {
break;
}

if ( $bookmark->start >= $diff->start && $bookmark->end < $diff->end ) {
$this->release_bookmark( $bookmark_name );
continue 2;
}

$delta = strlen( $diff->text ) - ( $diff->end - $diff->start );

if ( $update_head ) {
if ( $bookmark->start >= $diff->start ) {
$head_delta += $delta;
}

if ( $update_tail ) {
if ( $bookmark->end >= $diff->end ) {
$tail_delta += $delta;
}
}
Expand All @@ -1470,6 +1529,18 @@ private function apply_attributes_updates() {
$this->lexical_updates = array();
}

/**
* Checks whether a bookmark with the given name exists.
*
* @since 6.3.0
*
* @param string $bookmark_name Name to identify a bookmark that potentially exists.
* @return bool Whether that bookmark exists.
*/
public function has_bookmark( $bookmark_name ) {
return array_key_exists( $bookmark_name, $this->bookmarks );
}

/**
* Move the internal cursor in the Tag Processor to a given bookmark's location.
*
Expand Down Expand Up @@ -1744,6 +1815,31 @@ public function get_tag() {
return strtoupper( $tag_name );
}

/**
* Indicates if the currently matched tag contains the self-closing flag.
*
* No HTML elements ought to have the self-closing flag and for those, the self-closing
* flag will be ignored. For void elements this is benign because they "self close"
* automatically. For non-void HTML elements though problems will appear if someone
* intends to use a self-closing element in place of that element with an empty body.
* For HTML foreign elements and custom elements the self-closing flag determines if
* they self-close or not.
*
* This function does not determine if a tag is self-closing,
* but only if the self-closing flag is present in the syntax.
*
* @since 6.3.0
*
* @return bool Whether the currently matched tag contains the self-closing flag.
*/
public function has_self_closing_flag() {
if ( ! $this->tag_name_starts_at ) {
return false;
}

return '/' === $this->html[ $this->tag_ends_at - 1 ];
}

/**
* Indicates if the current tag token is a tag closer.
*
Expand Down Expand Up @@ -1775,6 +1871,7 @@ public function is_tag_closer() {
* For string attributes, the value is escaped using the `esc_attr` function.
*
* @since 6.2.0
* @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names.
*
* @param string $name The attribute name to target.
* @param string|bool $value The new attribute value.
Expand Down Expand Up @@ -1867,8 +1964,8 @@ public function set_attribute( $name, $value ) {
*
* Result: <div id="new"/>
*/
$existing_attribute = $this->attributes[ $comparable_name ];
$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
$existing_attribute = $this->attributes[ $comparable_name ];

Check warning on line 1967 in lib/compat/wordpress-6.2/html-api/class-wp-html-tag-processor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards

Equals sign not aligned with surrounding assignments; expected 24 spaces but found 1 space
$this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
$existing_attribute->start,
$existing_attribute->end,
$updated_attribute
Expand Down
Expand Up @@ -971,6 +971,7 @@ private function skip_script_data() {
* closing `>`; these are left for other methods.
*
* @since 6.2.0
* @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
*
* @return bool Whether a tag was found before the end of the document.
*/
Expand Down Expand Up @@ -1039,13 +1040,42 @@ private function parse_next_tag() {
'-' === $html[ $at + 2 ] &&
'-' === $html[ $at + 3 ]
) {
$closer_at = strpos( $html, '-->', $at + 4 );
if ( false === $closer_at ) {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
return false;
}

$at = $closer_at + 3;
continue;
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$at = $closer_at + $span_of_dashes + 1;
continue;
}

/*
* Comments may be closed by either a --> or an invalid --!>.
* The first occurrence closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
*/
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
return false;
}

if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$at = $closer_at + 3;
continue 2;
}

if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$at = $closer_at + 4;
continue 2;
}
}
}

/*
Expand Down Expand Up @@ -1104,9 +1134,19 @@ private function parse_next_tag() {
continue;
}

/*
* </> is a missing end tag name, which is ignored.
*
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
$at++;
continue;
}

/*
* <? transitions to a bogus comment state – skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
Expand All @@ -1118,6 +1158,22 @@ private function parse_next_tag() {
continue;
}

/*
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
return false;
}

$at = $closer_at + 1;
continue;
}

++$at;
}

Expand Down Expand Up @@ -1815,6 +1871,7 @@ public function is_tag_closer() {
* For string attributes, the value is escaped using the `esc_attr` function.
*
* @since 6.2.0
* @since 6.2.1 Fix: Only create a single update for multiple calls with case-variant attribute names.
*
* @param string $name The attribute name to target.
* @param string|bool $value The new attribute value.
Expand Down Expand Up @@ -1907,8 +1964,8 @@ public function set_attribute( $name, $value ) {
*
* Result: <div id="new"/>
*/
$existing_attribute = $this->attributes[ $comparable_name ];
$this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
$existing_attribute = $this->attributes[ $comparable_name ];

Check warning on line 1967 in lib/compat/wordpress-6.3/html-api/class-gutenberg-html-tag-processor-6-3.php

View workflow job for this annotation

GitHub Actions / PHP coding standards

Equals sign not aligned with surrounding assignments; expected 24 spaces but found 1 space
$this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement(
$existing_attribute->start,
$existing_attribute->end,
$updated_attribute
Expand Down

0 comments on commit 3d7902e

Please sign in to comment.