Skip to content

Commit

Permalink
Permalinks: Sanitize non-visible characters inside `sanitize_title_wi…
Browse files Browse the repository at this point in the history
…th_dashes()`.

This change prevents non-visible characters in titles from creating encoded values in permalinks, opting instead for the following replacement strategy:

* Non-visible non-zero-width characters are replaced with hyphens
* Non-visible zero-width characters are removed entirely

Included with this change are 64 additional PHPUnit assertions to confirm that only the targeted non-visible characters are sanitized as intended.

Before this change, URLs would unintentionally contain encoded values where these non-visible characters were. After this change, URLs intentionally strip out or hyphenate these non-visible characters.

Props costdev, dhanendran, hellofromtonya, paaljoachim, peterwilsoncc, poena, sergeybiryukov.

Fixes #47912.

git-svn-id: https://develop.svn.wordpress.org/trunk@51984 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
JJJ committed Nov 2, 2021
1 parent d7518d1 commit 8f9eea8
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 0 deletions.
34 changes: 34 additions & 0 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -2288,11 +2288,45 @@ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'displa
'%cc%80',
'%cc%84',
'%cc%8c',
// Non-visible characters that display without a width.
'%e2%80%8b',
'%e2%80%8c',
'%e2%80%8d',
'%e2%80%8e',
'%e2%80%8f',
'%e2%80%aa',
'%e2%80%ab',
'%e2%80%ac',
'%e2%80%ad',
'%e2%80%ae',
'%ef%bb%bf',
),
'',
$title
);

// Convert non-visible characters that display with a width to hyphen.
$title = str_replace(
array(
'%e2%80%80',
'%e2%80%81',
'%e2%80%82',
'%e2%80%83',
'%e2%80%84',
'%e2%80%85',
'%e2%80%86',
'%e2%80%87',
'%e2%80%88',
'%e2%80%89',
'%e2%80%8a',
'%e2%80%a8',
'%e2%80%a9',
'%e2%80%af',
),
'-',
$title
);

// Convert &times to 'x'.
$title = str_replace( '%c3%97', 'x', $title );
}
Expand Down
210 changes: 210 additions & 0 deletions tests/phpunit/tests/formatting/sanitizeTitleWithDashes.php
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,214 @@ function test_replaces_acute_accents() {
$this->assertSame( 'aaaa', sanitize_title_with_dashes( 'ááa´aˊ', '', 'save' ) );
}

/**
* @ticket 47912
* @dataProvider data_removes_non_visible_characters_without_width
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_removes_non_visible_characters_without_width( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title, '', 'save' ) );
}

/**
* Data provider.
*
* @return array
*/
public function data_removes_non_visible_characters_without_width() {
return array(
// Only the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf' ),

// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %ef%bb%bfin middle of title',
'expected' => 'nonvisible-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%8bNonvisible at start of title',
'expected' => 'nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%8b',
'expected' => 'nonvisible-at-end-of-title',
),
'randomly in title' => array(
'title' => 'Nonvisible%ef%bb%bf %e2%80%aerandomly %e2%80%8ein the %e2%80%8e title%e2%80%8e',
'expected' => 'nonvisible-randomly-in-the-title',
),
);
}

/**
* @ticket 47912
* @dataProvider data_non_visible_characters_without_width_when_not_save
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_non_visible_characters_without_width_when_not_save( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title ) );
}

/**
* Data provider.
*
* @return array
*/
public function data_non_visible_characters_without_width_when_not_save() {
return array(
// Just the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b', '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c', '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d', '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e', '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f', '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa', '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab', '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac', '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad', '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae', '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf', '%ef%bb%bf' ),

// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %ef%bb%bfin middle of title',
'expected' => 'nonvisible-%ef%bb%bfin-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%8bNonvisible at start of title',
'expected' => '%e2%80%8bnonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%8b',
'expected' => 'nonvisible-at-end-of-title-%e2%80%8b',
),
'randomly in title' => array(
'title' => 'Nonvisible%ef%bb%bf %e2%80%aerandomly %e2%80%8ein the %e2%80%8e title%e2%80%8e',
'expected' => 'nonvisible%ef%bb%bf-%e2%80%aerandomly-%e2%80%8ein-the-%e2%80%8e-title%e2%80%8e',
),
);
}

/**
* @ticket 47912
* @dataProvider data_converts_non_visible_characters_with_width_to_hyphen
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_converts_non_visible_characters_with_width_to_hyphen( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title, '', 'save' ) );
}

/**
* Data provider.
*
* @return array
*/
public function data_converts_non_visible_characters_with_width_to_hyphen() {
return array(
// Only the non-visible characters.
'only %e2%80%80' => array( '%e2%80%80' ),
'only %e2%80%81' => array( '%e2%80%81' ),
'only %e2%80%82' => array( '%e2%80%82' ),
'only %e2%80%83' => array( '%e2%80%83' ),
'only %e2%80%84' => array( '%e2%80%84' ),
'only %e2%80%85' => array( '%e2%80%85' ),
'only %e2%80%86' => array( '%e2%80%86' ),
'only %e2%80%87' => array( '%e2%80%87' ),
'only %e2%80%88' => array( '%e2%80%88' ),
'only %e2%80%89' => array( '%e2%80%89' ),
'only %e2%80%8a' => array( '%e2%80%8a' ),
'only %e2%80%a8' => array( '%e2%80%a8' ),
'only %e2%80%a9' => array( '%e2%80%a9' ),
'only %e2%80%af' => array( '%e2%80%af' ),

// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %e2%80%82 in middle of title',
'expected' => 'nonvisible-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%83Nonvisible at start of title',
'expected' => 'nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81',
'expected' => 'nonvisible-at-end-of-title',
),
'two end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81 %e2%80%af',
'expected' => 'nonvisible-at-end-of-title',
),
'randomly in title' => array(
'title' => 'Nonvisible%e2%80%80 %e2%80%a9randomly %e2%80%87in the %e2%80%a8 title%e2%80%af',
'expected' => 'nonvisible-randomly-in-the-title',
),
);
}

/**
* @ticket 47912
* @dataProvider data_non_visible_characters_with_width_to_hyphen_when_not_save
*
* @param string $title The title to be sanitized.
* @param string $expected Expected sanitized title.
*/
public function test_non_visible_characters_with_width_to_hyphen_when_not_save( $title, $expected = '' ) {
$this->assertSame( $expected, sanitize_title_with_dashes( $title ) );
}

/**
* Data provider.
*
* @return array
*/
public function data_non_visible_characters_with_width_to_hyphen_when_not_save() {
return array(
// Just the non-visible characters.
'only %e2%80%8b' => array( '%e2%80%8b', '%e2%80%8b' ),
'only %e2%80%8c' => array( '%e2%80%8c', '%e2%80%8c' ),
'only %e2%80%8d' => array( '%e2%80%8d', '%e2%80%8d' ),
'only %e2%80%8e' => array( '%e2%80%8e', '%e2%80%8e' ),
'only %e2%80%8f' => array( '%e2%80%8f', '%e2%80%8f' ),
'only %e2%80%aa' => array( '%e2%80%aa', '%e2%80%aa' ),
'only %e2%80%ab' => array( '%e2%80%ab', '%e2%80%ab' ),
'only %e2%80%ac' => array( '%e2%80%ac', '%e2%80%ac' ),
'only %e2%80%ad' => array( '%e2%80%ad', '%e2%80%ad' ),
'only %e2%80%ae' => array( '%e2%80%ae', '%e2%80%ae' ),
'only %ef%bb%bf' => array( '%ef%bb%bf', '%ef%bb%bf' ),

// Non-visible characters within the title.
'in middle of title' => array(
'title' => 'Nonvisible %e2%80%82 in middle of title',
'expected' => 'nonvisible-%e2%80%82-in-middle-of-title',
),
'at start of title' => array(
'title' => '%e2%80%83Nonvisible at start of title',
'expected' => '%e2%80%83nonvisible-at-start-of-title',
),
'at end of title' => array(
'title' => 'Nonvisible at end of title %e2%80%81',
'expected' => 'nonvisible-at-end-of-title-%e2%80%81',
),
'randomly in title' => array(
'title' => 'Nonvisible%e2%80%80 %e2%80%aerandomly %e2%80%87in the %e2%80%a8 title%e2%80%af',
'expected' => 'nonvisible%e2%80%80-%e2%80%aerandomly-%e2%80%87in-the-%e2%80%a8-title%e2%80%af',
),
);
}
}

0 comments on commit 8f9eea8

Please sign in to comment.