Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,14 @@ public function next_block_attribute() {
return isset( $this->block_attribute_paths[ $this->block_attribute_index ] );
}

protected function get_block_attribute_path() {
if ( null === $this->block_attribute_paths || ! isset( $this->block_attribute_paths[ $this->block_attribute_index ] ) ) {
return false;
}

return $this->block_attribute_paths[ $this->block_attribute_index ];
}

/**
* Gets the key of the currently matched block attribute.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private function next_url_in_text_node() {
private function next_url_attribute() {
$tag = $this->get_tag();

if ( ! array_key_exists( $tag, self::URL_ATTRIBUTES ) ) {
if ( ! array_key_exists( $tag, self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM ) ) {
return false;
}

Expand All @@ -142,7 +142,7 @@ private function next_url_attribute() {
* for the current token. The last element is the attribute we'll
* inspect in the while() loop below.
*/
$this->inspecting_html_attributes = self::URL_ATTRIBUTES[ $tag ];
$this->inspecting_html_attributes = self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $tag ];
} else {
/**
* Forget the attribute we've inspected on the previous call to
Expand Down Expand Up @@ -184,22 +184,75 @@ private function next_url_attribute() {
private function next_url_block_attribute() {
while ( $this->next_block_attribute() ) {
$url_maybe = $this->get_block_attribute_value();
/*
* Do not use base URL for block attributes. to avoid false positives.
* When a base URL is present, any word is a valid URL relative to the
* base URL.
* When a base URL is missing, the string must start with a protocol to
* be considered a URL.
if ( ! is_string( $url_maybe ) ||
count( $this->get_block_attribute_path() ) > 1
) {
// @TODO: support arrays, objects, and other non-string data structures.
continue;
}

/**
* Decide whether the current block attribute holds a URL.
*
* Known URL attributes can be assumed to hold a URL and be
* parsed with the base URL. For example, a "/about-us" value
* in a wp:navigation-link block's `url` attribute is a
* relative URL to the `/about-us` page.
*
* Other attributes may or may not contain URLs, but we cannot assume
* they do. A value `/about-us` could be a relative URL or a class name.
* In those cases, we'll let go of relative URLs and only detect
* absolute URLs to avoid treating every string as a URL. This requires
* parsing without a base URL.
*/
$is_relative_url_block_attribute = (
isset( self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ] ) &&
in_array( $this->get_block_attribute_key(), self::BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $this->get_block_name() ], true )
);

/**
* Filters whether a block attribute is known to contain a relative URL.
*
* This filter allows extending the list of block attributes that are
* recognized as containing URLs. When a block attribute is marked as
* a known URL attribute, it will be parsed with the base URL, allowing
* relative URLs to be properly resolved.
*
* @since 6.8.0
*
* @param bool $is_relative_url_block_attribute Whether the block attribute is known to contain a relative URL.
* @param array $context {
* Context information about the block attribute.
*
* @type string $block_name The name of the block (e.g., 'wp:image', 'wp:button').
* @type string $attribute_name The name of the attribute (e.g., 'url', 'href').
* }
*/
if ( is_string( $url_maybe ) ) {
$is_relative_url_block_attribute = apply_filters(
'url_processor_is_relative_url_block_attribute',
$is_relative_url_block_attribute,
array(
'block_name' => $this->get_block_name(),
'attribute_name' => $this->get_block_attribute_key(),
)
);

$parsed_url = false;
if ( $is_relative_url_block_attribute ) {
// Known relative URL attribute – let's parse with the base URL.
$parsed_url = WPURL::parse( $url_maybe, $this->base_url_string );
} else {
// Other attributes – let's parse without a base URL (and only detect absolute URLs).
$parsed_url = WPURL::parse( $url_maybe );
if ( false !== $parsed_url ) {
$this->raw_url = $url_maybe;
$this->parsed_url = $parsed_url;
}

return true;
}
if ( false === $parsed_url ) {
continue;
}

$this->raw_url = $url_maybe;
$this->parsed_url = $parsed_url;
return true;
}

return false;
Expand Down Expand Up @@ -362,6 +415,26 @@ public function get_inspected_attribute_name() {
return $this->inspecting_html_attributes[ count( $this->inspecting_html_attributes ) - 1 ];
}

/**
* A list of block attributes that are known to contain URLs.
*
* It covers WordPress core blocks as of WordPress version 6.9. It can be
* extended by plugins and themes via the "url_processor_is_relative_url_block_attribute"
* filter.
*
* @var array
*/
public const BLOCK_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array(
'wp:button' => array( 'url', 'linkTarget' ),
'wp:cover' => array( 'url' ),
'wp:embed' => array( 'url' ),
'wp:gallery' => array( 'url', 'fullUrl' ),
'wp:image' => array( 'url', 'src', 'href' ),
'wp:media-text' => array( 'mediaUrl', 'href' ),
'wp:navigation-link' => array( 'url' ),
'wp:navigation-submenu' => array( 'url' ),
'wp:rss' => array( 'feedURL' ),
);

/**
* A list of HTML attributes meant to contain URLs, as defined in the HTML specification.
Expand All @@ -370,7 +443,7 @@ public function get_inspected_attribute_name() {
* See https://html.spec.whatwg.org/multipage/indices.html#attributes-1.
* See https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value.
*/
public const URL_ATTRIBUTES = array(
public const HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM = array(
'A' => array( 'href' ),
'APPLET' => array( 'codebase', 'archive' ),
'AREA' => array( 'href' ),
Expand Down Expand Up @@ -405,7 +478,7 @@ public function get_inspected_attribute_name() {
* @TODO: Either explicitly support these attributes, or explicitly drop support for
* handling their subsyntax. A generic URL matcher might be good enough.
*/
public const URL_ATTRIBUTES_WITH_SUBSYNTAX = array(
public const HTML_ATTRIBUTES_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array(
'*' => array( 'style' ), // background(), background-image().
'APPLET' => array( 'archive' ),
'IMG' => array( 'srcset' ),
Expand All @@ -425,7 +498,7 @@ public function get_inspected_attribute_name() {
* @TODO: Either explicitly support these tags, or explicitly drop support for
* handling their subsyntax. A generic URL matcher might be good enough.
*/
public const URL_CONTAINING_TAGS_WITH_SUBSYNTAX = array(
public const HTML_TAGS_WITH_SUBSYNTAX_TO_ACCEPT_RELATIVE_URLS_FROM = array(
'STYLE',
'SCRIPT',
);
Expand Down
59 changes: 42 additions & 17 deletions components/DataLiberation/Tests/BlockMarkupUrlProcessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,96 @@ public function test_next_url_in_current_token_returns_false_when_no_url_is_foun
*
* @dataProvider provider_test_finds_next_url
*/
public function test_next_url_finds_the_url( $expected_result, $markup, $base_url = 'https://wordpress.org' ) {
public function test_next_url_finds_the_url( $expected_raw_url, $expected_absolute_url, $markup, $base_url = 'https://wordpress.org' ) {
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
$this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' );
$this->assertEquals( $expected_result, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
$this->assertEquals( $expected_raw_url, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
$this->assertEquals( $expected_absolute_url, $p->get_parsed_url()->toString(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
}

public static function provider_test_finds_next_url() {
return array(
'In the <a> tag' => array(
'https://wordpress.org',
'https://wordpress.org/',
'<a href="https://wordpress.org">',
),
'In the second block attribute, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "src": "https://mysite.com/wp-content/image.png"} -->',
'In the wp:image url attribute when it is the first block attribute and contains a relative URL' => array(
'/wp-content/image.png',
'https://wordpress.org/wp-content/image.png',
'<!-- wp:image {"url": "/wp-content/image.png"} -->',
),
'In the first block attribute, when it contains just the URL' => array(
'In the wp:image url attribute when it is the second block attribute and contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"src": "https://mysite.com/wp-content/image.png"} -->',
),
'In a block attribute, in a nested object, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
),
'In a block attribute, in an array, when it contains just the URL' => array(
'https://mysite.com/wp-content/image.png',
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
'<!-- wp:image {"class": "wp-bold", "url": "https://mysite.com/wp-content/image.png"} -->',
),
'In a text node, when it contains a well-formed absolute URL' => array(
'https://wordpress.org',
'https://wordpress.org/',
'Have you seen https://wordpress.org? ',
),
'In a text node after a tag' => array(
'wordpress.org',
'https://wordpress.org/',
'<p>Have you seen wordpress.org',
),
'In a text node, when it contains a protocol-relative absolute URL' => array(
'//wordpress.org',
'https://wordpress.org/',
'Have you seen //wordpress.org? ',
),
'In a text node, when it contains a domain-only absolute URL' => array(
'wordpress.org',
'https://wordpress.org/',
'Have you seen wordpress.org? ',
),
'In a text node, when it contains a domain-only absolute URL with path' => array(
'wordpress.org/plugins',
'https://wordpress.org/plugins',
'Have you seen wordpress.org/plugins? ',
),
'Matches an empty string in <a href=""> as a valid relative URL when given a base URL' => array(
'',
'https://wordpress.org/',
'<a href=""></a>',
'https://wordpress.org',
'https://wordpress.org/',
),
'Skips over an empty string in <a href=""> when not given a base URL' => array(
'https://developer.w.org',
'https://developer.w.org/',
'<a href=""></a><a href="https://developer.w.org"></a>',
null,
),
'Skips over a class name in the <a> tag' => array(
'https://developer.w.org',
'https://developer.w.org/',
'<a class="http://example.com" href="https://developer.w.org"></a>',
null,
),
);
}

/**
*
* @dataProvider provider_test_finds_next_negative_url
*/
public function test_next_url_finds_the_negative_url( $markup, $base_url = 'https://wordpress.org' ) {
$p = new BlockMarkupUrlProcessor( $markup, $base_url );
$this->assertFalse( $p->next_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' );
}

public static function provider_test_finds_next_negative_url() {
return array(
'In a block attribute, in a nested object, when it contains just the URL' => array(
'<!-- wp:image {"class": "wp-bold", "meta": { "src": "https://mysite.com/wp-content/image.png" } } -->',
),
'In a block attribute, in an array, when it contains just the URL' => array(
'<!-- wp:image {"class": "wp-bold", "srcs": [ "https://mysite.com/wp-content/image.png" ] } -->',
),
);
}

/**
* @dataProvider provider_test_parse_url_with_base_url
*/
Expand Down Expand Up @@ -180,7 +204,7 @@ public static function provider_test_set_url_examples() {
public function test_set_url_complex_test_case() {
$p = new BlockMarkupUrlProcessor(
<<<HTML
<!-- wp:image {"src": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
<!-- wp:image {"url": "https://mysite.com/wp-content/image.png", "meta": {"src": "https://mysite.com/wp-content/image.png"}} -->
<img src="https://mysite.com/wp-content/image.png">
<!-- /wp:image -->

Expand All @@ -204,9 +228,10 @@ public function test_set_url_complex_test_case() {
$p->set_url( 'https://site-export.internal', WPURL::parse( 'https://site-export.internal' ) );
}

// meta.src is a nested property and not supported yet
$this->assertEquals(
<<<HTML
<!-- wp:image {"src":"https:\/\/site-export.internal","meta":{"src":"https:\/\/site-export.internal"}} -->
<!-- wp:image {"url":"https:\/\/site-export.internal","meta":{"src":"https:\/\/mysite.com\/wp-content\/image.png"}} -->
<img src="https://site-export.internal">
<!-- /wp:image -->

Expand Down
Loading