Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 52 additions & 29 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -2274,12 +2274,31 @@ function sanitize_title_for_query( $title ) {
* @param string $title The title to be sanitized.
* @param string $raw_title Optional. Not used. Default empty.
* @param string $context Optional. The operation for which the string is sanitized.
* When set to 'save', additional entities are converted to hyphens
* or stripped entirely. Default 'display'.
* When set to 'save', HTML entities are decoded to raw UTF-8 and
* Unicode dash punctuation and separators are converted to hyphens.
* Default 'display'.
* @return string The sanitized title.
*/
function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) {
$title = strip_tags( $title );

if ( 'save' === $context ) {
/*
* Decode HTML entities to raw UTF-8, ensuring all representations of the same
* character are treated identically.
*/
$title = WP_HTML_Decoder::decode_text_node( $title );

$title = str_replace( '&', '', $title );

if ( _wp_can_use_pcre_u() ) {
$title = preg_replace( '~[\p{Pd}\p{Z}]~u', '-', $title );
}

// Convert forward slash to hyphen.
$title = str_replace( '/', '-', $title );
}

// Preserve escaped octets.
$title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title );
// Remove percent signs that are not part of an octet.
Expand All @@ -2297,12 +2316,38 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
$title = strtolower( $title );

if ( 'save' === $context ) {
// Convert &nbsp, non-breaking hyphen, &ndash, and &mdash to hyphens.
/*
* Convert known dash punctuation and space separator variants to hyphens.
*
* These are the percent-encoded UTF-8 forms produced by utf8_uri_encode().
* When _wp_can_use_pcre_u() is true, raw UTF-8 dash/space chars were already
* replaced by PCRE above, so these str_replace() calls become no-ops for those.
* They remain necessary to handle inputs that arrived as pre-encoded percent
* sequences.
*/
$title = str_replace( array( '%c2%a0', '%e2%80%91', '%e2%80%93', '%e2%80%94' ), '-', $title );
// Convert &nbsp, non-breaking hyphen, &ndash, and &mdash HTML entities to hyphens.
$title = str_replace( array( ' ', '‑', ' ', '–', '–', '—', '—' ), '-', $title );
// Convert forward slash to hyphen.
$title = str_replace( '/', '-', $title );

// Convert space separator variants (percent-encoded) to hyphen.
$title = str_replace(
array(
'%e2%80%80', // En quad.
'%e2%80%81', // Em quad.
'%e2%80%82', // En space.
'%e2%80%83', // Em space.
'%e2%80%84', // Three-per-em space.
'%e2%80%85', // Four-per-em space.
'%e2%80%86', // Six-per-em space.
'%e2%80%87', // Figure space.
'%e2%80%88', // Punctuation space.
'%e2%80%89', // Thin space.
'%e2%80%8a', // Hair space.
'%e2%80%a8', // Line separator.
'%e2%80%a9', // Paragraph separator.
'%e2%80%af', // Narrow no-break space.
),
'-',
$title
);

// Strip these characters entirely.
$title = str_replace(
Expand Down Expand Up @@ -2361,28 +2406,6 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
$title
);

// Convert non-visible characters that display with a width to hyphen.
$title = str_replace(
array(
'%e2%80%80', // En quad.
'%e2%80%81', // Em quad.
'%e2%80%82', // En space.
'%e2%80%83', // Em space.
'%e2%80%84', // Three-per-em space.
'%e2%80%85', // Four-per-em space.
'%e2%80%86', // Six-per-em space.
'%e2%80%87', // Figure space.
'%e2%80%88', // Punctuation space.
'%e2%80%89', // Thin space.
'%e2%80%8a', // Hair space.
'%e2%80%a8', // Line separator.
'%e2%80%a9', // Paragraph separator.
'%e2%80%af', // Narrow no-break space.
),
'-',
$title
);

// Convert &times to 'x'.
$title = str_replace( '%c3%97', 'x', $title );
}
Expand Down
41 changes: 41 additions & 0 deletions tests/phpunit/tests/formatting/sanitizeTitleWithDashes.php
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,47 @@ public function data_converts_non_visible_characters_with_width_to_hyphen() {
);
}

/**
* @ticket 64151
*/
public function test_replaces_hex_nbsp_entity() {
$this->assertSame( 'dont-break-the-space', sanitize_title_with_dashes( "don\u{2019}t break the space", '', 'save' ) );
}

/**
* @ticket 64151
*/
public function test_replaces_hex_ndash_mdash_entities() {
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( 'Do – the Dash', '', 'save' ) );
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( 'Do — the Dash', '', 'save' ) );
}

/**
* @ticket 64151
*/
public function test_replaces_hex_non_breaking_hyphen_entity() {
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( 'Do ‑ the Dash', '', 'save' ) );
}

/**
* @ticket 64151
*/
public function test_replaces_additional_dash_punctuation() {
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( "Do \u{2012} the Dash", '', 'save' ) );
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( "Do \u{2015} the Dash", '', 'save' ) );
$this->assertSame( 'do-the-dash', sanitize_title_with_dashes( "Do \u{2010} the Dash", '', 'save' ) );
}

/**
* @ticket 64151
*/
public function test_replaces_additional_space_separators() {
$this->assertSame( 'do-the-space', sanitize_title_with_dashes( "Do \u{1680} the Space", '', 'save' ) );
$this->assertSame( 'do-the-space', sanitize_title_with_dashes( "Do \u{205F} the Space", '', 'save' ) );
$this->assertSame( 'do-the-space', sanitize_title_with_dashes( "Do \u{205F} the Space", '', 'save' ) );
$this->assertSame( 'do-the-space', sanitize_title_with_dashes( "Do \u{3000} the Space", '', 'save' ) );
}

/**
* @ticket 47912
* @dataProvider data_non_visible_characters_with_width_to_hyphen_when_not_save
Expand Down
Loading