Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/wp-includes/class-wp-query.php
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,36 @@ protected function parse_search( &$query_vars ) {
$query_vars['s'] = urldecode( $query_vars['s'] );
}
// There are no line breaks in <input /> fields.
$query_vars['s'] = str_replace( array( "\r", "\n" ), '', $query_vars['s'] );
$query_vars['s'] = str_replace( array( "\r", "\n" ), '', $query_vars['s'] );
/*
* Normalize Unicode whitespace (Zs category) to a regular half-width space
* so that CJK ideographic spaces and other Unicode space separators are
* recognized as word separators in the search query.
*
* @see https://core.trac.wordpress.org/ticket/44296
*/
$query_vars['s'] = str_replace(
array(
"\u{00A0}", // No-Break Space.
"\u{1680}", // Ogham Space Mark.
"\u{2000}", // En Quad.
"\u{2001}", // Em Quad.
"\u{2002}", // En Space.
"\u{2003}", // Em Space.
"\u{2004}", // Three-Per-Em Space.
"\u{2005}", // Four-Per-Em Space.
"\u{2006}", // Six-Per-Em Space.
"\u{2007}", // Figure Space.
"\u{2008}", // Punctuation Space.
"\u{2009}", // Thin Space.
"\u{200A}", // Hair Space.
"\u{202F}", // Narrow No-Break Space.
"\u{205F}", // Medium Mathematical Space.
"\u{3000}", // Ideographic Space (CJK).
),
' ',
$query_vars['s']
);
$query_vars['search_terms_count'] = 1;
if ( ! empty( $query_vars['sentence'] ) ) {
$query_vars['search_terms'] = array( $query_vars['s'] );
Expand Down
33 changes: 33 additions & 0 deletions tests/phpunit/tests/query/search.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,39 @@ public function filter_wp_search_stopwords() {
return array();
}

/**
* Tests that ideographic spaces are treated as separators even when stopwords are disabled.
*
* @ticket 44296
*/
public function test_ideographic_space_separator_with_no_stopwords() {
$terms = "This\u{3000}is\u{3000}a\u{3000}search\u{3000}term";
add_filter( 'wp_search_stopwords', array( $this, 'filter_wp_search_stopwords' ) );
$query = new WP_Query( array( 's' => $terms ) );
remove_filter( 'wp_search_stopwords', array( $this, 'filter_wp_search_stopwords' ) );

$this->assertSame( 5, $query->get( 'search_terms_count' ) );
$this->assertSame( array( 'This', 'is', 'search', 'term' ), $query->get( 'search_terms' ) );
}

/**
* Tests that other Unicode space separators (Zs category) are also normalized.
*
* @ticket 44296
*/
public function test_unicode_space_separators_are_treated_as_separators() {
add_filter( 'wp_search_stopwords', array( $this, 'filter_wp_search_stopwords' ) );

// U+2003 Em Space — a representative Unicode Zs-category space.
$terms = "search\u{2003}term";
$query = new WP_Query( array( 's' => $terms ) );

remove_filter( 'wp_search_stopwords', array( $this, 'filter_wp_search_stopwords' ) );

$this->assertSame( 2, $query->get( 'search_terms_count' ) );
$this->assertSame( array( 'search', 'term' ), $query->get( 'search_terms' ) );
}

/**
* @ticket 38099
*/
Expand Down
Loading