Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
d332205
Kickoff migrating URLs in CSS
adamziel Oct 21, 2025
adb07a9
Support Unicode escapes
adamziel Oct 21, 2025
40380e5
Simplify the replacements, format the code
adamziel Oct 21, 2025
f6710aa
Improve clarity of the CSSUrlProcessor
adamziel Oct 22, 2025
ff59ffd
Test CSS unicode escapes decoder
adamziel Oct 22, 2025
0813667
Ditch regexp
adamziel Oct 22, 2025
3b69730
PHPCS
adamziel Oct 22, 2025
95a1302
Do not allocate memory for every match optimistically
adamziel Oct 22, 2025
e98c3ba
Test for data URI
adamziel Oct 22, 2025
3bdbda6
Skip data URIs in the replacement logic
adamziel Oct 22, 2025
8a5e734
Optimize get_parsed_url() for data uris
adamziel Oct 22, 2025
3739a95
Simplify the CSS URL Processor
adamziel Oct 22, 2025
0d5d95f
Move URL parsing from CSS processor to BlockMarkupURLProcessor
adamziel Oct 22, 2025
5feafb5
Use wp.org as a test domain
adamziel Oct 22, 2025
c387bd5
Simplify the css processor integration
adamziel Oct 23, 2025
2b2170b
Add a generic CSS Processor
adamziel Oct 24, 2025
ee3ed64
Simplify consume_string()
adamziel Oct 24, 2025
cd32ab2
Pass most CSS tokenizer test cases
adamziel Oct 24, 2025
4b75739
Less failures
adamziel Oct 24, 2025
d3d1b07
1 last failure
adamziel Oct 24, 2025
0245453
Remove the offending fuzzer test
adamziel Oct 24, 2025
8996fd4
Adjust details
adamziel Oct 28, 2025
38f89af
Use codepoints instead of bytes for decoding idents
adamziel Oct 29, 2025
2382057
Use the bundled unicode decoder
adamziel Oct 29, 2025
663db21
Do not concat to repr when consuming numeric values
adamziel Oct 29, 2025
c647699
Comments, renaming for clarity
adamziel Oct 29, 2025
8227327
Simplify consume_ident_sequence()
adamziel Oct 29, 2025
2182023
Fix inconsistencies in CSSProcessor
adamziel Oct 29, 2025
20947cb
Simplify is_valid_escape
adamziel Oct 29, 2025
43301e6
Simplify would_next_3_code_points_start_an_ident
adamziel Oct 29, 2025
2c0b357
Simplify is_ident_code_point_at
adamziel Oct 29, 2025
46ce619
Reformat and further simplify
adamziel Oct 29, 2025
eeba23f
Reformat and further simplify
adamziel Oct 29, 2025
b427f9d
PHPCS
adamziel Oct 29, 2025
b7fcbef
Merge branch 'trunk' into migrate-urls-in-css
adamziel Oct 31, 2025
e288522
use css processor in css url processor
adamziel Oct 31, 2025
2340e19
Merge branch 'trunk' into migrate-urls-in-css
adamziel Oct 31, 2025
09a6d6d
Simplify the css url processor
adamziel Oct 31, 2025
e0c5010
Fix last failing test
adamziel Nov 1, 2025
5583d5e
Test data URI parsing and memory treatment
adamziel Nov 1, 2025
562475c
Preliminary test suite cleanup with a more thorough rewrite coming
adamziel Nov 1, 2025
c32a771
More readable escaping test cases
adamziel Nov 1, 2025
25019d8
More readable escaping test cases
adamziel Nov 1, 2025
0c4d80e
Additional tests for set() and one larger CSS snippet
adamziel Nov 1, 2025
65e6ff6
Delete components/DataLiberation/Tests/css-test-cases.php
adamziel Nov 1, 2025
c5c0f4a
Delete generate-css-tests.mjs
adamziel Nov 1, 2025
3817238
Rollback obsolete changes
adamziel Nov 1, 2025
616bffc
Remove unneeded WP polyfills, rename class to CSSURLProcessor
adamziel Nov 1, 2025
ed95beb
Remove unrelated chante
adamziel Nov 1, 2025
5135a2a
brush up the test_comprehensive_url_replacement_in_complex_css case
adamziel Nov 1, 2025
f89769c
format
adamziel Nov 1, 2025
cb4b4ed
format
adamziel Nov 1, 2025
a1afc11
Fix URL replacement in BlockMarkupUrlProcessorTest
adamziel Nov 2, 2025
123f36d
Remove extra llm changes
adamziel Nov 2, 2025
0716d74
Merge branch 'trunk' into migrate-urls-in-css
adamziel Nov 2, 2025
015ea44
Enhance memory usage tests in CSSUrlProcessorTest
adamziel Nov 2, 2025
7a64cdf
Fix unit tests
adamziel Nov 2, 2025
ff7d636
Move CSSProcessor to its own namespace
adamziel Nov 2, 2025
052ad8a
Add comments
adamziel Nov 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@

use Rowbot\URL\URL;
use WordPress\DataLiberation\URL\URLInTextProcessor;
use WordPress\DataLiberation\URL\CSSURLProcessor;
use WordPress\DataLiberation\URL\WPURL;
use WordPress\DataLiberation\URL\ConvertedUrl;

use function WordPress\DataLiberation\URL\urldecode_n;

/**
* Reports all the URLs in the imported post and enables rewriting them.
Expand All @@ -23,6 +21,8 @@ class BlockMarkupUrlProcessor extends BlockMarkupProcessor {
private $base_url_object;
private $url_in_text_processor;
private $url_in_text_node_updated;
private $css_url_processor;
private $css_url_processor_updated;

/**
* The list of names of URL-related HTML attributes that may be available on
Expand Down Expand Up @@ -52,6 +52,14 @@ public function get_updated_html(): string {
$this->url_in_text_node_updated = false;
}

if ( $this->css_url_processor_updated ) {
if ( null !== $this->css_url_processor ) {
$updated_css = $this->css_url_processor->get_updated_css();
$this->set_attribute( 'style', $updated_css );
}
$this->css_url_processor_updated = false;
}

return parent::get_updated_html();
}

Expand All @@ -70,8 +78,11 @@ public function next_token(): bool {
$this->parsed_url = null;
$this->inspecting_html_attributes = null;
$this->url_in_text_processor = null;
// Do not reset url_in_text_node_updated – it's reset in get_updated_html() which
// is called in parent::next_token().
$this->css_url_processor = null;
/*
* Do not reset url_in_text_node_updated or css_url_processor_updated – they're reset
* in get_updated_html() which is called in parent::next_token().
*/

return parent::next_token();
}
Expand Down Expand Up @@ -111,7 +122,7 @@ private function next_url_in_text_node() {
* way to recognize a substring "WordPress.org" as a URL. We might
* get some false positives this way, e.g. in this string:
*
* > And that's how you build a theme.Now let's take a look at..."
* > And that's how you build a theme. Now let's take a look at..."
*
* `theme.Now` would be recognized as a URL. It's up to the API consumer
* to filter out such false positives e.g. by checking the domain against
Expand All @@ -130,20 +141,75 @@ private function next_url_in_text_node() {
return false;
}

/**
* Advances to the next CSS URL in the `style` attribute of the current tag token.
*
* @return bool Whether a CSS URL was found.
*/
private function next_url_in_css() {
if ( '#tag' !== $this->get_token_type() ) {
return false;
}

if ( null === $this->css_url_processor ) {
$css_value = $this->get_attribute( 'style' );
if ( ! is_string( $css_value ) ) {
return false;
}

$this->css_url_processor = new CSSURLProcessor( $css_value );
}

while ( $this->css_url_processor->next_url() ) {
/**
* Skip data URIs. They may be really large and they don't
* have a hostname to migrate.
*/
if ( $this->css_url_processor->is_data_uri() ) {
continue;
}
$this->raw_url = $this->css_url_processor->get_raw_url();
$this->parsed_url = WPURL::parse( $this->raw_url, $this->base_url_string );
if ( false === $this->parsed_url ) {
continue;
}

return true;
}

return false;
}

private function next_url_attribute() {
$tag = $this->get_tag();

if ( ! array_key_exists( $tag, self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM ) ) {
return false;
// Check if we have a style attribute with CSS URLs to process.
if ( null !== $this->css_url_processor ) {
if ( $this->next_url_in_css() ) {
return true;
}
// Done with CSS URLs in this attribute, apply any pending updates and move on.
$this->get_updated_html();
$this->css_url_processor = null;
}

if ( null === $this->inspecting_html_attributes ) {
/**
* Initialize the list on the first call to next_url_attribute()
* for the current token. The last element is the attribute we'll
* inspect in the while() loop below.
*/
$this->inspecting_html_attributes = self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $tag ];
if ( array_key_exists( $tag, self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM ) ) {
/**
* Initialize the list on the first call to next_url_attribute()
* for the current token. The last element is the attribute we'll
* inspect in the while() loop below.
*/
$this->inspecting_html_attributes = self::HTML_ATTRIBUTES_TO_ACCEPT_RELATIVE_URLS_FROM[ $tag ];
// Add style attribute to the list if it exists.
if ( null !== $this->get_attribute( 'style' ) ) {
$this->inspecting_html_attributes[] = 'style';
}
} elseif ( null !== $this->get_attribute( 'style' ) ) {
$this->inspecting_html_attributes = array( 'style' );
} else {
return false;
}
} else {
/**
* Forget the attribute we've inspected on the previous call to
Expand All @@ -160,6 +226,18 @@ private function next_url_attribute() {
continue;
}

// Rewrite any CSS `url()` declarations in the `style` attribute.
if ( 'style' === $attr ) {
$this->css_url_processor = new CSSURLProcessor( $url_maybe );
if ( $this->next_url_in_css() ) {
return true;
}
// No CSS URLs found, move to next attribute.
$this->css_url_processor = null;
array_pop( $this->inspecting_html_attributes );
continue;
}

/*
* Use base URL to resolve known URI attributes as we are certain we're
* dealing with URI values.
Expand Down Expand Up @@ -277,6 +355,12 @@ public function set_url( $raw_url, $parsed_url ) {
$this->parsed_url = $parsed_url;
switch ( parent::get_token_type() ) {
case '#tag':
// Check if we're processing a CSS URL.
if ( null !== $this->css_url_processor ) {
$this->css_url_processor_updated = true;
return $this->css_url_processor->set_raw_url( $raw_url );
}

$attr = $this->get_inspected_attribute_name();
if ( false === $attr ) {
return false;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

namespace WordPress\DataLiberation\URL;
namespace WordPress\DataLiberation\CSS;

use function WordPress\Encoding\codepoint_to_utf8_bytes;
use function WordPress\Encoding\compat\_wp_scan_utf8;
Expand Down Expand Up @@ -742,6 +742,32 @@ public function get_token_value() {
return $this->token_value;
}

/**
* Determines whether the current token is a data URI.
*
* Only meaningful for URL and STRING tokens. Returns false for all other token types.
*
* @return bool Whether the current token value starts with "data:" (case-insensitive).
*/
public function is_data_uri(): bool {
if ( null === $this->token_value_starts_at || null === $this->token_value_length ) {
return false;
}

if ( $this->token_value_length < 5 ) {
return false;
}

$offset = $this->token_value_starts_at;
return (
( 'd' === $this->css[ $offset ] || 'D' === $this->css[ $offset ] ) &&
( 'a' === $this->css[ $offset + 1 ] || 'A' === $this->css[ $offset + 1 ] ) &&
( 't' === $this->css[ $offset + 2 ] || 'T' === $this->css[ $offset + 2 ] ) &&
( 'a' === $this->css[ $offset + 3 ] || 'A' === $this->css[ $offset + 3 ] ) &&
':' === $this->css[ $offset + 4 ]
);
}

/**
* Gets the token start at.
*
Expand Down Expand Up @@ -812,27 +838,26 @@ public function get_token_value_length(): ?int {
* @return bool Whether the value was successfully updated.
*/
public function set_token_value( string $new_value ): bool {
// Only URL tokens are currently supported.
if ( self::TOKEN_URL !== $this->token_type ) {
return false;
}

// Ensure we have valid token value boundaries.
if ( null === $this->token_value_starts_at || null === $this->token_value_length ) {
return false;
// Only URL and string tokens are currently supported.
switch ( $this->token_type ) {
case self::TOKEN_URL:
$this->lexical_updates[] = array(
'start' => $this->token_value_starts_at,
'length' => $this->token_value_length,
'text' => $this->escape_url_value( $new_value ),
);
return true;
case self::TOKEN_STRING:
$this->lexical_updates[] = array(
'start' => $this->token_starts_at,
'length' => $this->token_length,
'text' => $this->escape_url_value( $new_value ),
);
return true;
default:
_doing_it_wrong( __METHOD__, 'set_token_value() only supports URL and string tokens. Got token type: ' . $this->token_type, '1.0.0' );
return false;
}

// Escape the URL value for unquoted URL syntax.
$escaped_value = $this->escape_url_value( $new_value );

// Queue the lexical update.
$this->lexical_updates[] = array(
'start' => $this->token_value_starts_at,
'length' => $this->token_value_length,
'text' => $escaped_value,
);

return true;
}

/**
Expand Down
Loading