Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
<?php
/**
* Main WordPress Formatting API.
*
* Handles many functions for formatting output.
*
* @package WordPress
*/
/**
* Replaces common plain text characters with formatted entities.
*
* Returns given text with transformations of quotes into smart quotes, apostrophes,
* dashes, ellipses, the trademark symbol, and the multiplication symbol.
*
* As an example,
*
* 'cause today's effort makes it worth tomorrow's "holiday" ...
*
* Becomes:
*
* &#8217;cause today&#8217;s effort makes it worth tomorrow&#8217;s &#8220;holiday&#8221; &#8230;
*
* Code within certain HTML blocks are skipped.
*
* Do not use this function before the {@see 'init'} action hook; everything will break.
*
* @since 0.71
*
* @global array $wp_cockneyreplace Array of formatted entities for certain common phrases.
* @global array $shortcode_tags
*
* @param string $text The text to be formatted.
* @param bool $reset Set to true for unit testing. Translated patterns will reset.
* @return string The string replaced with HTML entities.
*/
function wptexturize( $text, $reset = false ) {
global $wp_cockneyreplace, $shortcode_tags;
static $static_characters = null,
$static_replacements = null,
$dynamic_characters = null,
$dynamic_replacements = null,
$default_no_texturize_tags = null,
$default_no_texturize_shortcodes = null,
$run_texturize = true,
$apos = null,
$prime = null,
$double_prime = null,
$opening_quote = null,
$closing_quote = null,
$opening_single_quote = null,
$closing_single_quote = null,
$open_q_flag = '<!--oq-->',
$open_sq_flag = '<!--osq-->',
$apos_flag = '<!--apos-->';
// If there's nothing to do, just stop.
if ( empty( $text ) || false === $run_texturize ) {
return $text;
}
// Set up static variables. Run once only.
if ( $reset || ! isset( $static_characters ) ) {
/**
* Filters whether to skip running wptexturize().
*
* Returning false from the filter will effectively short-circuit wptexturize()
* and return the original text passed to the function instead.
*
* The filter runs only once, the first time wptexturize() is called.
*
* @since 4.0.0
*
* @see wptexturize()
*
* @param bool $run_texturize Whether to short-circuit wptexturize().
*/
$run_texturize = apply_filters( 'run_wptexturize', $run_texturize );
if ( false === $run_texturize ) {
return $text;
}
/* translators: Opening curly double quote. */
$opening_quote = _x( '&#8220;', 'opening curly double quote' );
/* translators: Closing curly double quote. */
$closing_quote = _x( '&#8221;', 'closing curly double quote' );
/* translators: Apostrophe, for example in 'cause or can't. */
$apos = _x( '&#8217;', 'apostrophe' );
/* translators: Prime, for example in 9' (nine feet). */
$prime = _x( '&#8242;', 'prime' );
/* translators: Double prime, for example in 9" (nine inches). */
$double_prime = _x( '&#8243;', 'double prime' );
/* translators: Opening curly single quote. */
$opening_single_quote = _x( '&#8216;', 'opening curly single quote' );
/* translators: Closing curly single quote. */
$closing_single_quote = _x( '&#8217;', 'closing curly single quote' );
/* translators: En dash. */
$en_dash = _x( '&#8211;', 'en dash' );
/* translators: Em dash. */
$em_dash = _x( '&#8212;', 'em dash' );
$default_no_texturize_tags = array( 'pre', 'code', 'kbd', 'style', 'script', 'tt' );
$default_no_texturize_shortcodes = array( 'code' );
// If a plugin has provided an autocorrect array, use it.
if ( isset( $wp_cockneyreplace ) ) {
$cockney = array_keys( $wp_cockneyreplace );
$cockneyreplace = array_values( $wp_cockneyreplace );
} else {
/*
* translators: This is a comma-separated list of words that defy the syntax of quotations in normal use,
* for example... 'We do not have enough words yet'... is a typical quoted phrase. But when we write
* lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes.
*/
$cockney = explode(
',',
_x(
"'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em",
'Comma-separated list of words to texturize in your language'
)
);
$cockneyreplace = explode(
',',
_x(
'&#8217;tain&#8217;t,&#8217;twere,&#8217;twas,&#8217;tis,&#8217;twill,&#8217;til,&#8217;bout,&#8217;nuff,&#8217;round,&#8217;cause,&#8217;em',
'Comma-separated list of replacement words in your language'
)
);
}
$static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney );
$static_replacements = array_merge( array( '&#8230;', $opening_quote, $closing_quote, ' &#8482;' ), $cockneyreplace );
// Pattern-based replacements of characters.
// Sort the remaining patterns into several arrays for performance tuning.
$dynamic_characters = array(
'apos' => array(),
'quote' => array(),
'dash' => array(),
);
$dynamic_replacements = array(
'apos' => array(),
'quote' => array(),
'dash' => array(),
);
$dynamic = array();
$spaces = wp_spaces_regexp();
// '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
if ( "'" !== $apos || "'" !== $closing_single_quote ) {
$dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
}
if ( "'" !== $apos || '"' !== $closing_quote ) {
$dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
}
// '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0.
if ( "'" !== $apos ) {
$dynamic['/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/'] = $apos_flag;
}
// Quoted numbers like '0.42'.
if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
$dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
}
// Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
if ( "'" !== $opening_single_quote ) {
$dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag;
}
// Apostrophe in a word. No spaces, double apostrophes, or other punctuation.
if ( "'" !== $apos ) {
$dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
}
$dynamic_characters['apos'] = array_keys( $dynamic );
$dynamic_replacements['apos'] = array_values( $dynamic );
$dynamic = array();
// Quoted numbers like "42".
if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
}
// Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
if ( '"' !== $opening_quote ) {
$dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
}
$dynamic_characters['quote'] = array_keys( $dynamic );
$dynamic_replacements['quote'] = array_values( $dynamic );
$dynamic = array();
// Dashes and spaces.
$dynamic['/---/'] = $em_dash;
$dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash;
$dynamic['/(?<!xn)--/'] = $en_dash;
$dynamic[ '/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/' ] = $en_dash;
$dynamic_characters['dash'] = array_keys( $dynamic );
$dynamic_replacements['dash'] = array_values( $dynamic );
}
// Must do this every time in case plugins use these filters in a context sensitive manner.
/**
* Filters the list of HTML elements not to texturize.
*
* @since 2.8.0
*
* @param string[] $default_no_texturize_tags An array of HTML element names.
*/
$no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
/**
* Filters the list of shortcodes not to texturize.
*
* @since 2.8.0
*
* @param string[] $default_no_texturize_shortcodes An array of shortcode names.
*/
$no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes );
$no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array();
// Look for shortcodes and HTML elements.
preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches );
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
$found_shortcodes = ! empty( $tagnames );
$shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : '';
$regex = _get_wptexturize_split_regex( $shortcode_regex );
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
foreach ( $textarr as &$curl ) {
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
$first = $curl[0];
if ( '<' === $first ) {
if ( '<!--' === substr( $curl, 0, 4 ) ) {
// This is an HTML comment delimiter.
continue;
} else {
// This is an HTML element delimiter.
// Replace each & with &#038; unless it already looks like an entity.
$curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl );
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
}
} elseif ( '' === trim( $curl ) ) {
// This is a newline between delimiters. Performance improves when we check this.
continue;
} elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
// This is a shortcode delimiter.
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
// Looks like a normal shortcode.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} else {
// Looks like an escaped shortcode.
continue;
}
} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
// This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize.
$curl = str_replace( $static_characters, $static_replacements, $curl );
if ( false !== strpos( $curl, "'" ) ) {
$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
$curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
$curl = str_replace( $apos_flag, $apos, $curl );
$curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
}
if ( false !== strpos( $curl, '"' ) ) {
$curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
$curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
$curl = str_replace( $open_q_flag, $opening_quote, $curl );
}
if ( false !== strpos( $curl, '-' ) ) {
$curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );
}
// 9x9 (times), but never 0x9999.
if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) {
// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
}
// Replace each & with &#038; unless it already looks like an entity.
$curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl );
}
}
return implode( '', $textarr );
}
/**
* Implements a logic tree to determine whether or not "7'." represents seven feet,
* then converts the special char into either a prime char or a closing quote char.
*
* @since 4.3.0
*
* @param string $haystack The plain text to be searched.
* @param string $needle The character to search for such as ' or ".
* @param string $prime The prime char to use for replacement.
* @param string $open_quote The opening quote char. Opening quote replacement must be
* accomplished already.
* @param string $close_quote The closing quote char to use for replacement.
* @return string The $haystack value after primes and quotes replacements.
*/
function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {
$spaces = wp_spaces_regexp();
$flag = '<!--wp-prime-or-quote-->';
$quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|" . $spaces . ')/';
$prime_pattern = "/(?<=\\d)$needle/";
$flag_after_digit = "/(?<=\\d)$flag/";
$flag_no_digit = "/(?<!\\d)$flag/";
$sentences = explode( $open_quote, $haystack );
foreach ( $sentences as $key => &$sentence ) {
if ( false === strpos( $sentence, $needle ) ) {
continue;
} elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) {
$sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count );
if ( $count > 1 ) {
// This sentence appears to have multiple closing quotes. Attempt Vulcan logic.
$sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 );
if ( 0 === $count2 ) {
// Try looking for a quote followed by a period.
$count2 = substr_count( $sentence, "$flag." );
if ( $count2 > 0 ) {
// Assume the rightmost quote-period match is the end of quotation.
$pos = strrpos( $sentence, "$flag." );
} else {
// When all else fails, make the rightmost candidate a closing quote.
// This is most likely to be problematic in the context of bug #18549.
$pos = strrpos( $sentence, $flag );
}
$sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) );
}
// Use conventional replacement on any remaining primes and quotes.
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
$sentence = preg_replace( $flag_after_digit, $prime, $sentence );
$sentence = str_replace( $flag, $close_quote, $sentence );
} elseif ( 1 == $count ) {
// Found only one closing quote candidate, so give it priority over primes.
$sentence = str_replace( $flag, $close_quote, $sentence );
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
} else {
// No closing quotes found. Just run primes pattern.
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
}
} else {
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
$sentence = preg_replace( $quote_pattern, $close_quote, $sentence );
}
if ( '"' === $needle && false !== strpos( $sentence, '"' ) ) {
$sentence = str_replace( '"', $close_quote, $sentence );
}
}
return implode( $open_quote, $sentences );
}
/**
* Searches for disabled element tags. Pushes element to stack on tag open
* and pops on tag close.
*
* Assumes first char of `$text` is tag opening and last char is tag closing.
* Assumes second char of `$text` is optionally `/` to indicate closing as in `</html>`.
*
* @since 2.9.0
* @access private
*
* @param string $text Text to check. Must be a tag like `<html>` or `[shortcode]`.
* @param string[] $stack Array of open tag elements.
* @param string[] $disabled_elements Array of tag names to match against. Spaces are not allowed in tag names.
*/
function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) {
// Is it an opening tag or closing tag?
if ( isset( $text[1] ) && '/' !== $text[1] ) {
$opening_tag = true;
$name_offset = 1;
} elseif ( 0 === count( $stack ) ) {
// Stack is empty. Just stop.
return;
} else {
$opening_tag = false;
$name_offset = 2;
}
// Parse out the tag name.
$space = strpos( $text, ' ' );
if ( false === $space ) {
$space = -1;
} else {
$space -= $name_offset;
}
$tag = substr( $text, $name_offset, $space );
// Handle disabled tags.
if ( in_array( $tag, $disabled_elements, true ) ) {
if ( $opening_tag ) {
/*
* This disables texturize until we find a closing tag of our type
* (e.g. <pre>) even if there was invalid nesting before that.
*
* Example: in the case <pre>sadsadasd</code>"baba"</pre>
* "baba" won't be texturized.
*/
array_push( $stack, $tag );
} elseif ( end( $stack ) == $tag ) {
array_pop( $stack );
}
}
}
/**
* Replaces double line breaks with paragraph elements.
*
* A group of regex replaces used to identify text formatted with newlines and
* replace double line breaks with HTML paragraph tags. The remaining line breaks
* after conversion become `<br />` tags, unless `$br` is set to '0' or 'false'.
*
* @since 0.71
*
* @param string $text The text which has to be formatted.
* @param bool $br Optional. If set, this will convert all remaining line breaks
* after paragraphing. Line breaks within `<script>`, `<style>`,
* and `<svg>` tags are not affected. Default true.
* @return string Text which has been converted into correct paragraph tags.
*/
function wpautop( $text, $br = true ) {
$pre_tags = array();
if ( trim( $text ) === '' ) {
return '';
}
// Just to make things a little easier, pad the end.
$text = $text . "\n";
/*
* Pre tags shouldn't be touched by autop.
* Replace pre tags with placeholders and bring them back after autop.
*/
if ( strpos( $text, '<pre' ) !== false ) {
$text_parts = explode( '</pre>', $text );
$last_part = array_pop( $text_parts );
$text = '';
$i = 0;
foreach ( $text_parts as $text_part ) {
$start = strpos( $text_part, '<pre' );
// Malformed HTML?
if ( false === $start ) {
$text .= $text_part;
continue;
}
$name = "<pre wp-pre-tag-$i></pre>";
$pre_tags[ $name ] = substr( $text_part, $start ) . '</pre>';
$text .= substr( $text_part, 0, $start ) . $name;
$i++;
}
$text .= $last_part;
}
// Change multiple <br>'s into two line breaks, which will turn into paragraphs.
$text = preg_replace( '|<br\s*/?>\s*<br\s*/?>|', "\n\n", $text );
$allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
// Add a double line break above block-level opening tags.
$text = preg_replace( '!(<' . $allblocks . '[\s/>])!', "\n\n$1", $text );
// Add a double line break below block-level closing tags.
$text = preg_replace( '!(</' . $allblocks . '>)!', "$1\n\n", $text );
// Add a double line break after hr tags, which are self closing.
$text = preg_replace( '!(<hr\s*?/?>)!', "$1\n\n", $text );
// Standardize newline characters to "\n".
$text = str_replace( array( "\r\n", "\r" ), "\n", $text );
// Find newlines in all elements and add placeholders.
$text = wp_replace_in_html_tags( $text, array( "\n" => ' <!-- wpnl --> ' ) );
// Collapse line breaks before and after <option> elements so they don't get autop'd.
if ( strpos( $text, '<option' ) !== false ) {
$text = preg_replace( '|\s*<option|', '<option', $text );
$text = preg_replace( '|</option>\s*|', '</option>', $text );
}
/*
* Collapse line breaks inside <object> elements, before <param> and <embed> elements
* so they don't get autop'd.
*/
if ( strpos( $text, '</object>' ) !== false ) {
$text = preg_replace( '|(<object[^>]*>)\s*|', '$1', $text );
$text = preg_replace( '|\s*</object>|', '</object>', $text );
$text = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $text );
}
/*
* Collapse line breaks inside <audio> and <video> elements,
* before and after <source> and <track> elements.
*/
if ( strpos( $text, '<source' ) !== false || strpos( $text, '<track' ) !== false ) {
$text = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $text );
$text = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $text );
$text = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $text );
}
// Collapse line breaks before and after <figcaption> elements.
if ( strpos( $text, '<figcaption' ) !== false ) {
$text = preg_replace( '|\s*(<figcaption[^>]*>)|', '$1', $text );
$text = preg_replace( '|</figcaption>\s*|', '</figcaption>', $text );
}
// Remove more than two contiguous line breaks.
$text = preg_replace( "/\n\n+/", "\n\n", $text );
// Split up the contents into an array of strings, separated by double line breaks.
$paragraphs = preg_split( '/\n\s*\n/', $text, -1, PREG_SPLIT_NO_EMPTY );
// Reset $text prior to rebuilding.
$text = '';
// Rebuild the content as a string, wrapping every bit with a <p>.
foreach ( $paragraphs as $paragraph ) {
$text .= '<p>' . trim( $paragraph, "\n" ) . "</p>\n";
}
// Under certain strange conditions it could create a P of entirely whitespace.
$text = preg_replace( '|<p>\s*</p>|', '', $text );
// Add a closing <p> inside <div>, <address>, or <form> tag if missing.
$text = preg_replace( '!<p>([^<]+)</(div|address|form)>!', '<p>$1</p></$2>', $text );
// If an opening or closing block element tag is wrapped in a <p>, unwrap it.
$text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text );
// In some cases <li> may get wrapped in <p>, fix them.
$text = preg_replace( '|<p>(<li.+?)</p>|', '$1', $text );
// If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
$text = preg_replace( '|<p><blockquote([^>]*)>|i', '<blockquote$1><p>', $text );
$text = str_replace( '</blockquote></p>', '</p></blockquote>', $text );
// If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
$text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)!', '$1', $text );
// If an opening or closing block element tag is followed by a closing <p> tag, remove it.
$text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text );
// Optionally insert line breaks.
if ( $br ) {
// Replace newlines that shouldn't be touched with a placeholder.
$text = preg_replace_callback( '/<(script|style|svg).*?<\/\\1>/s', '_autop_newline_preservation_helper', $text );
// Normalize <br>
$text = str_replace( array( '<br>', '<br/>' ), '<br />', $text );
// Replace any new line characters that aren't preceded by a <br /> with a <br />.
$text = preg_replace( '|(?<!<br />)\s*\n|', "<br />\n", $text );
// Replace newline placeholders with newlines.
$text = str_replace( '<WPPreserveNewline />', "\n", $text );
}
// If a <br /> tag is after an opening or closing block tag, remove it.
$text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*<br />!', '$1', $text );
// If a <br /> tag is before a subset of opening or closing block tags, remove it.
$text = preg_replace( '!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $text );
$text = preg_replace( "|\n</p>$|", '</p>', $text );
// Replace placeholder <pre> tags with their original content.
if ( ! empty( $pre_tags ) ) {
$text = str_replace( array_keys( $pre_tags ), array_values( $pre_tags ), $text );
}
// Restore newlines in all elements.
if ( false !== strpos( $text, '<!-- wpnl -->' ) ) {
$text = str_replace( array( ' <!-- wpnl --> ', '<!-- wpnl -->' ), "\n", $text );
}
return $text;
}
/**
* Separates HTML elements and comments from the text.
*
* @since 4.2.4
*
* @param string $input The text which has to be formatted.
* @return string[] Array of the formatted text.
*/
function wp_html_split( $input ) {
return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE );
}
/**
* Retrieves the regular expression for an HTML element.
*
* @since 4.4.0
*
* @return string The regular expression
*/
function get_html_split_regex() {
static $regex;
if ( ! isset( $regex ) ) {
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
$comments =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$cdata =
'!\[CDATA\[' // Start of comment, after the <.
. '[^\]]*+' // Consume non-].
. '(?:' // Unroll the loop: Consume everything until ]]> is found.
. '](?!]>)' // One ] not followed by end of comment.
. '[^\]]*+' // Consume non-].
. ')*+' // Loop possessively.
. '(?:]]>)?'; // End of comment. If not found, match all input.
$escaped =
'(?=' // Is the element escaped?
. '!--'
. '|'
. '!\[CDATA\['
. ')'
. '(?(?=!-)' // If yes, which type?
. $comments
. '|'
. $cdata
. ')';
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?' // Conditional expression follows.
. $escaped // Find end of escaped element.
. '|' // ...else...
. '[^>]*>?' // Find end of normal element.
. ')'
. ')/';
// phpcs:enable
}
return $regex;
}
/**
* Retrieves the combined regular expression for HTML and shortcodes.
*
* @access private
* @ignore
* @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
* @since 4.4.0
*
* @param string $shortcode_regex Optional. The result from _get_wptexturize_shortcode_regex().
* @return string The regular expression
*/
function _get_wptexturize_split_regex( $shortcode_regex = '' ) {
static $html_regex;
if ( ! isset( $html_regex ) ) {
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
$comment_regex =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
'<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comment_regex // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')';
// phpcs:enable
}
if ( empty( $shortcode_regex ) ) {
$regex = '/(' . $html_regex . ')/';
} else {
$regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/';
}
return $regex;
}
/**
* Retrieves the regular expression for shortcodes.
*
* @access private
* @ignore
* @since 4.4.0
*
* @param string[] $tagnames Array of shortcodes to find.
* @return string The regular expression
*/
function _get_wptexturize_shortcode_regex( $tagnames ) {
$tagregexp = implode( '|', array_map( 'preg_quote', $tagnames ) );
$tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex().
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation
$regex =
'\[' // Find start of shortcode.
. '[\/\[]?' // Shortcodes may begin with [/ or [[.
. $tagregexp // Only match registered shortcodes, because performance.
. '(?:'
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
. '|'
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
. ')*+' // Possessive critical.
. '\]' // Find end of shortcode.
. '\]?'; // Shortcodes may end with ]].
// phpcs:enable
return $regex;
}
/**
* Replaces characters or phrases within HTML elements only.
*
* @since 4.2.3
*
* @param string $haystack The text which has to be formatted.
* @param array $replace_pairs In the form array('from' => 'to', ...).
* @return string The formatted text.
*/
function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
// Find all elements.
$textarr = wp_html_split( $haystack );
$changed = false;
// Optimize when searching for one item.
if ( 1 === count( $replace_pairs ) ) {
// Extract $needle and $replace.
foreach ( $replace_pairs as $needle => $replace ) {
}
// Loop through delimiters (elements) only.
for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) {
if ( false !== strpos( $textarr[ $i ], $needle ) ) {
$textarr[ $i ] = str_replace( $needle, $replace, $textarr[ $i ] );
$changed = true;
}
}
} else {
// Extract all $needles.
$needles = array_keys( $replace_pairs );
// Loop through delimiters (elements) only.
for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) {
foreach ( $needles as $needle ) {
if ( false !== strpos( $textarr[ $i ], $needle ) ) {
$textarr[ $i ] = strtr( $textarr[ $i ], $replace_pairs );
$changed = true;
// After one strtr() break out of the foreach loop and look at next element.
break;
}
}
}
}
if ( $changed ) {
$haystack = implode( $textarr );
}
return $haystack;
}
/**
* Newline preservation help function for wpautop().
*
* @since 3.1.0
* @access private
*
* @param array $matches preg_replace_callback matches array
* @return string
*/
function _autop_newline_preservation_helper( $matches ) {
return str_replace( "\n", '<WPPreserveNewline />', $matches[0] );
}
/**
* Don't auto-p wrap shortcodes that stand alone.
*
* Ensures that shortcodes are not wrapped in `<p>...</p>`.
*
* @since 2.9.0
*
* @global array $shortcode_tags
*
* @param string $text The content.
* @return string The filtered content.
*/
function shortcode_unautop( $text ) {
global $shortcode_tags;
if ( empty( $shortcode_tags ) || ! is_array( $shortcode_tags ) ) {
return $text;
}
$tagregexp = implode( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
$spaces = wp_spaces_regexp();
// phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound,WordPress.WhiteSpace.PrecisionAlignment.Found -- don't remove regex indentation
$pattern =
'/'
. '<p>' // Opening paragraph.
. '(?:' . $spaces . ')*+' // Optional leading whitespace.
. '(' // 1: The shortcode.
. '\\[' // Opening bracket.
. "($tagregexp)" // 2: Shortcode name.
. '(?![\\w-])' // Not followed by word character or hyphen.
// Unroll the loop: Inside the opening shortcode tag.
. '[^\\]\\/]*' // Not a closing bracket or forward slash.
. '(?:'
. '\\/(?!\\])' // A forward slash not followed by a closing bracket.
. '[^\\]\\/]*' // Not a closing bracket or forward slash.
. ')*?'
. '(?:'
. '\\/\\]' // Self closing tag and closing bracket.
. '|'
. '\\]' // Closing bracket.
. '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags.
. '[^\\[]*+' // Not an opening bracket.
. '(?:'
. '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag.
. '[^\\[]*+' // Not an opening bracket.
. ')*+'
. '\\[\\/\\2\\]' // Closing shortcode tag.
. ')?'
. ')'
. ')'
. '(?:' . $spaces . ')*+' // Optional trailing whitespace.
. '<\\/p>' // Closing paragraph.
. '/';
// phpcs:enable
return preg_replace( $pattern, '$1', $text );
}
/**
* Checks to see if a string is utf8 encoded.
*
* NOTE: This function checks for 5-Byte sequences, UTF8
* has Bytes Sequences with a maximum length of 4.
*
* @author bmorel at ssi dot fr (modified)
* @since 1.2.1
*
* @param string $str The string to be checked
* @return bool True if $str fits a UTF-8 model, false otherwise.
*/
function seems_utf8( $str ) {
mbstring_binary_safe_encoding();
$length = strlen( $str );
reset_mbstring_encoding();
for ( $i = 0; $i < $length; $i++ ) {
$c = ord( $str[ $i ] );
if ( $c < 0x80 ) {
$n = 0; // 0bbbbbbb
} elseif ( ( $c & 0xE0 ) == 0xC0 ) {
$n = 1; // 110bbbbb
} elseif ( ( $c & 0xF0 ) == 0xE0 ) {
$n = 2; // 1110bbbb
} elseif ( ( $c & 0xF8 ) == 0xF0 ) {
$n = 3; // 11110bbb
} elseif ( ( $c & 0xFC ) == 0xF8 ) {
$n = 4; // 111110bb
} elseif ( ( $c & 0xFE ) == 0xFC ) {
$n = 5; // 1111110b
} else {
return false; // Does not match any model.
}
for ( $j = 0; $j < $n; $j++ ) { // n bytes matching 10bbbbbb follow ?
if ( ( ++$i == $length ) || ( ( ord( $str[ $i ] ) & 0xC0 ) != 0x80 ) ) {
return false;
}
}
}
return true;
}
/**
* Converts a number of special characters into their HTML entities.
*
* Specifically deals with: `&`, `<`, `>`, `"`, and `'`.
*
* `$quote_style` can be set to ENT_COMPAT to encode `"` to
* `&quot;`, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
*
* @since 1.2.2
* @since 5.5.0 `$quote_style` also accepts `ENT_XML1`.
* @access private
*
* @param string $string The text which is to be encoded.
* @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT,
* both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES.
* Converts single and double quotes, as well as converting HTML
* named entities (that are not also XML named entities) to their
* code points if set to ENT_XML1. Also compatible with old values;
* converting single quotes if set to 'single',
* double if set to 'double' or both if otherwise set.
* Default is ENT_NOQUOTES.
* @param false|string $charset Optional. The character encoding of the string. Default false.
* @param bool $double_encode Optional. Whether to encode existing HTML entities. Default false.
* @return string The encoded text with HTML entities.
*/
function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
return '';
}
// Don't bother if there are no specialchars - saves some processing.
if ( ! preg_match( '/[&<>"\']/', $string ) ) {
return $string;
}
// Account for the previous behavior of the function when the $quote_style is not an accepted value.
if ( empty( $quote_style ) ) {
$quote_style = ENT_NOQUOTES;
} elseif ( ENT_XML1 === $quote_style ) {
$quote_style = ENT_QUOTES | ENT_XML1;
} elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) {
$quote_style = ENT_QUOTES;
}
// Store the site charset as a static to avoid multiple calls to wp_load_alloptions().
if ( ! $charset ) {
static $_charset = null;
if ( ! isset( $_charset ) ) {
$alloptions = wp_load_alloptions();
$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
}
$charset = $_charset;
}
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ), true ) ) {
$charset = 'UTF-8';
}
$_quote_style = $quote_style;
if ( 'double' === $quote_style ) {
$quote_style = ENT_COMPAT;
$_quote_style = ENT_COMPAT;
} elseif ( 'single' === $quote_style ) {
$quote_style = ENT_NOQUOTES;
}
if ( ! $double_encode ) {
// Guarantee every &entity; is valid, convert &garbage; into &amp;garbage;
// This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable.
$string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' );
}
$string = htmlspecialchars( $string, $quote_style, $charset, $double_encode );
// Back-compat.
if ( 'single' === $_quote_style ) {
$string = str_replace( "'", '&#039;', $string );
}
return $string;
}
/**
* Converts a number of HTML entities into their special characters.
*
* Specifically deals with: `&`, `<`, `>`, `"`, and `'`.
*
* `$quote_style` can be set to ENT_COMPAT to decode `"` entities,
* or ENT_QUOTES to do both `"` and `'`. Default is ENT_NOQUOTES where no quotes are decoded.
*
* @since 2.8.0
*
* @param string $string The text which is to be decoded.
* @param string|int $quote_style Optional. Converts double quotes if set to ENT_COMPAT,
* both single and double if set to ENT_QUOTES or
* none if set to ENT_NOQUOTES.
* Also compatible with old _wp_specialchars() values;
* converting single quotes if set to 'single',
* double if set to 'double' or both if otherwise set.
* Default is ENT_NOQUOTES.
* @return string The decoded text without HTML entities.
*/
function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
return '';
}
// Don't bother if there are no entities - saves a lot of processing.
if ( strpos( $string, '&' ) === false ) {
return $string;
}
// Match the previous behavior of _wp_specialchars() when the $quote_style is not an accepted value.
if ( empty( $quote_style ) ) {
$quote_style = ENT_NOQUOTES;
} elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
$quote_style = ENT_QUOTES;
}
// More complete than get_html_translation_table( HTML_SPECIALCHARS ).
$single = array(
'&#039;' => '\'',
'&#x27;' => '\'',
);
$single_preg = array(
'/&#0*39;/' => '&#039;',
'/&#x0*27;/i' => '&#x27;',
);
$double = array(
'&quot;' => '"',
'&#034;' => '"',
'&#x22;' => '"',
);
$double_preg = array(
'/&#0*34;/' => '&#034;',
'/&#x0*22;/i' => '&#x22;',
);
$others = array(
'&lt;' => '<',
'&#060;' => '<',
'&gt;' => '>',
'&#062;' => '>',
'&amp;' => '&',
'&#038;' => '&',
'&#x26;' => '&',
);
$others_preg = array(
'/&#0*60;/' => '&#060;',
'/&#0*62;/' => '&#062;',
'/&#0*38;/' => '&#038;',
'/&#x0*26;/i' => '&#x26;',
);
if ( ENT_QUOTES === $quote_style ) {
$translation = array_merge( $single, $double, $others );
$translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
} elseif ( ENT_COMPAT === $quote_style || 'double' === $quote_style ) {
$translation = array_merge( $double, $others );
$translation_preg = array_merge( $double_preg, $others_preg );
} elseif ( 'single' === $quote_style ) {
$translation = array_merge( $single, $others );
$translation_preg = array_merge( $single_preg, $others_preg );
} elseif ( ENT_NOQUOTES === $quote_style ) {
$translation = $others;
$translation_preg = $others_preg;
}
// Remove zero padding on numeric entities.
$string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
// Replace characters according to translation table.
return strtr( $string, $translation );
}
/**
* Checks for invalid UTF8 in a string.
*
* @since 2.8.0
*
* @param string $string The text which is to be checked.
* @param bool $strip Optional. Whether to attempt to strip out invalid UTF8. Default false.
* @return string The checked text.
*/
function wp_check_invalid_utf8( $string, $strip = false ) {
$string = (string) $string;
if ( 0 === strlen( $string ) ) {
return '';
}
// Store the site charset as a static to avoid multiple calls to get_option().
static $is_utf8 = null;
if ( ! isset( $is_utf8 ) ) {
$is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true );
}
if ( ! $is_utf8 ) {
return $string;
}
// Check for support for utf8 in the installed PCRE library once and store the result in a static.
static $utf8_pcre = null;
if ( ! isset( $utf8_pcre ) ) {
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
$utf8_pcre = @preg_match( '/^./u', 'a' );
}
// We can't demand utf8 in the PCRE installation, so just return the string in those cases.
if ( ! $utf8_pcre ) {
return $string;
}
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $string.
if ( 1 === @preg_match( '/^./us', $string ) ) {
return $string;
}
// Attempt to strip the bad chars if requested (not recommended).
if ( $strip && function_exists( 'iconv' ) ) {
return iconv( 'utf-8', 'utf-8', $string );
}
return '';
}
/**
* Encodes the Unicode values to be used in the URI.
*
* @since 1.5.0
* @since 5.8.3 Added the `encode_ascii_characters` parameter.
*
* @param string $utf8_string String to encode.
* @param int $length Max length of the string
* @param bool $encode_ascii_characters Whether to encode ascii characters such as < " '
* @return string String with Unicode encoded for URI.
*/
function utf8_uri_encode( $utf8_string, $length = 0, $encode_ascii_characters = false ) {
$unicode = '';
$values = array();
$num_octets = 1;
$unicode_length = 0;
mbstring_binary_safe_encoding();
$string_length = strlen( $utf8_string );
reset_mbstring_encoding();
for ( $i = 0; $i < $string_length; $i++ ) {
$value = ord( $utf8_string[ $i ] );
if ( $value < 128 ) {
$char = chr( $value );
$encoded_char = $encode_ascii_characters ? rawurlencode( $char ) : $char;
$encoded_char_length = strlen( $encoded_char );
if ( $length && ( $unicode_length + $encoded_char_length ) > $length ) {
break;
}
$unicode .= $encoded_char;
$unicode_length += $encoded_char_length;
} else {
if ( count( $values ) == 0 ) {
if ( $value < 224 ) {
$num_octets = 2;
} elseif ( $value < 240 ) {
$num_octets = 3;
} else {
$num_octets = 4;
}
}
$values[] = $value;
if ( $length && ( $unicode_length + ( $num_octets * 3 ) ) > $length ) {
break;
}
if ( count( $values ) == $num_octets ) {
for ( $j = 0; $j < $num_octets; $j++ ) {
$unicode .= '%' . dechex( $values[ $j ] );
}
$unicode_length += $num_octets * 3;
$values = array();
$num_octets = 1;
}
}
}
return $unicode;
}
/**
* Converts all accent characters to ASCII characters.
*
* If there are no accent characters, then the string given is just returned.
*
* **Accent characters converted:**
*
* Currency signs:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | ------------------- |
* | U+00A3 | £ | (empty) | British Pound sign |
* | U+20AC | € | E | Euro sign |
*
* Decompositions for Latin-1 Supplement:
*
* | Code | Glyph | Replacement | Description |
* | ------- | ----- | ----------- | -------------------------------------- |
* | U+00AA | ª | a | Feminine ordinal indicator |
* | U+00BA | º | o | Masculine ordinal indicator |
* | U+00C0 | À | A | Latin capital letter A with grave |
* | U+00C1 | Á | A | Latin capital letter A with acute |
* | U+00C2 | Â | A | Latin capital letter A with circumflex |
* | U+00C3 | Ã | A | Latin capital letter A with tilde |
* | U+00C4 | Ä | A | Latin capital letter A with diaeresis |
* | U+00C5 | Å | A | Latin capital letter A with ring above |
* | U+00C6 | Æ | AE | Latin capital letter AE |
* | U+00C7 | Ç | C | Latin capital letter C with cedilla |
* | U+00C8 | È | E | Latin capital letter E with grave |
* | U+00C9 | É | E | Latin capital letter E with acute |
* | U+00CA | Ê | E | Latin capital letter E with circumflex |
* | U+00CB | Ë | E | Latin capital letter E with diaeresis |
* | U+00CC | Ì | I | Latin capital letter I with grave |
* | U+00CD | Í | I | Latin capital letter I with acute |
* | U+00CE | Î | I | Latin capital letter I with circumflex |
* | U+00CF | Ï | I | Latin capital letter I with diaeresis |
* | U+00D0 | Ð | D | Latin capital letter Eth |
* | U+00D1 | Ñ | N | Latin capital letter N with tilde |
* | U+00D2 | Ò | O | Latin capital letter O with grave |
* | U+00D3 | Ó | O | Latin capital letter O with acute |
* | U+00D4 | Ô | O | Latin capital letter O with circumflex |
* | U+00D5 | Õ | O | Latin capital letter O with tilde |
* | U+00D6 | Ö | O | Latin capital letter O with diaeresis |
* | U+00D8 | Ø | O | Latin capital letter O with stroke |
* | U+00D9 | Ù | U | Latin capital letter U with grave |
* | U+00DA | Ú | U | Latin capital letter U with acute |
* | U+00DB | Û | U | Latin capital letter U with circumflex |
* | U+00DC | Ü | U | Latin capital letter U with diaeresis |
* | U+00DD | Ý | Y | Latin capital letter Y with acute |
* | U+00DE | Þ | TH | Latin capital letter Thorn |
* | U+00DF | ß | s | Latin small letter sharp s |
* | U+00E0 | à | a | Latin small letter a with grave |
* | U+00E1 | á | a | Latin small letter a with acute |
* | U+00E2 | â | a | Latin small letter a with circumflex |
* | U+00E3 | ã | a | Latin small letter a with tilde |
* | U+00E4 | ä | a | Latin small letter a with diaeresis |
* | U+00E5 | å | a | Latin small letter a with ring above |
* | U+00E6 | æ | ae | Latin small letter ae |
* | U+00E7 | ç | c | Latin small letter c with cedilla |
* | U+00E8 | è | e | Latin small letter e with grave |
* | U+00E9 | é | e | Latin small letter e with acute |
* | U+00EA | ê | e | Latin small letter e with circumflex |
* | U+00EB | ë | e | Latin small letter e with diaeresis |
* | U+00EC | ì | i | Latin small letter i with grave |
* | U+00ED | í | i | Latin small letter i with acute |
* | U+00EE | î | i | Latin small letter i with circumflex |
* | U+00EF | ï | i | Latin small letter i with diaeresis |
* | U+00F0 | ð | d | Latin small letter Eth |
* | U+00F1 | ñ | n | Latin small letter n with tilde |
* | U+00F2 | ò | o | Latin small letter o with grave |
* | U+00F3 | ó | o | Latin small letter o with acute |
* | U+00F4 | ô | o | Latin small letter o with circumflex |
* | U+00F5 | õ | o | Latin small letter o with tilde |
* | U+00F6 | ö | o | Latin small letter o with diaeresis |
* | U+00F8 | ø | o | Latin small letter o with stroke |
* | U+00F9 | ù | u | Latin small letter u with grave |
* | U+00FA | ú | u | Latin small letter u with acute |
* | U+00FB | û | u | Latin small letter u with circumflex |
* | U+00FC | ü | u | Latin small letter u with diaeresis |
* | U+00FD | ý | y | Latin small letter y with acute |
* | U+00FE | þ | th | Latin small letter Thorn |
* | U+00FF | ÿ | y | Latin small letter y with diaeresis |
*
* Decompositions for Latin Extended-A:
*
* | Code | Glyph | Replacement | Description |
* | ------- | ----- | ----------- | ------------------------------------------------- |
* | U+0100 | Ā | A | Latin capital letter A with macron |
* | U+0101 | ā | a | Latin small letter a with macron |
* | U+0102 | Ă | A | Latin capital letter A with breve |
* | U+0103 | ă | a | Latin small letter a with breve |
* | U+0104 | Ą | A | Latin capital letter A with ogonek |
* | U+0105 | ą | a | Latin small letter a with ogonek |
* | U+01006 | Ć | C | Latin capital letter C with acute |
* | U+0107 | ć | c | Latin small letter c with acute |
* | U+0108 | Ĉ | C | Latin capital letter C with circumflex |
* | U+0109 | ĉ | c | Latin small letter c with circumflex |
* | U+010A | Ċ | C | Latin capital letter C with dot above |
* | U+010B | ċ | c | Latin small letter c with dot above |
* | U+010C | Č | C | Latin capital letter C with caron |
* | U+010D | č | c | Latin small letter c with caron |
* | U+010E | Ď | D | Latin capital letter D with caron |
* | U+010F | ď | d | Latin small letter d with caron |
* | U+0110 | Đ | D | Latin capital letter D with stroke |
* | U+0111 | đ | d | Latin small letter d with stroke |
* | U+0112 | Ē | E | Latin capital letter E with macron |
* | U+0113 | ē | e | Latin small letter e with macron |
* | U+0114 | Ĕ | E | Latin capital letter E with breve |
* | U+0115 | ĕ | e | Latin small letter e with breve |
* | U+0116 | Ė | E | Latin capital letter E with dot above |
* | U+0117 | ė | e | Latin small letter e with dot above |
* | U+0118 | Ę | E | Latin capital letter E with ogonek |
* | U+0119 | ę | e | Latin small letter e with ogonek |
* | U+011A | Ě | E | Latin capital letter E with caron |
* | U+011B | ě | e | Latin small letter e with caron |
* | U+011C | Ĝ | G | Latin capital letter G with circumflex |
* | U+011D | ĝ | g | Latin small letter g with circumflex |
* | U+011E | Ğ | G | Latin capital letter G with breve |
* | U+011F | ğ | g | Latin small letter g with breve |
* | U+0120 | Ġ | G | Latin capital letter G with dot above |
* | U+0121 | ġ | g | Latin small letter g with dot above |
* | U+0122 | Ģ | G | Latin capital letter G with cedilla |
* | U+0123 | ģ | g | Latin small letter g with cedilla |
* | U+0124 | Ĥ | H | Latin capital letter H with circumflex |
* | U+0125 | ĥ | h | Latin small letter h with circumflex |
* | U+0126 | Ħ | H | Latin capital letter H with stroke |
* | U+0127 | ħ | h | Latin small letter h with stroke |
* | U+0128 | Ĩ | I | Latin capital letter I with tilde |
* | U+0129 | ĩ | i | Latin small letter i with tilde |
* | U+012A | Ī | I | Latin capital letter I with macron |
* | U+012B | ī | i | Latin small letter i with macron |
* | U+012C | Ĭ | I | Latin capital letter I with breve |
* | U+012D | ĭ | i | Latin small letter i with breve |
* | U+012E | Į | I | Latin capital letter I with ogonek |
* | U+012F | į | i | Latin small letter i with ogonek |
* | U+0130 | İ | I | Latin capital letter I with dot above |
* | U+0131 | ı | i | Latin small letter dotless i |
* | U+0132 | IJ | IJ | Latin capital ligature IJ |
* | U+0133 | ij | ij | Latin small ligature ij |
* | U+0134 | Ĵ | J | Latin capital letter J with circumflex |
* | U+0135 | ĵ | j | Latin small letter j with circumflex |
* | U+0136 | Ķ | K | Latin capital letter K with cedilla |
* | U+0137 | ķ | k | Latin small letter k with cedilla |
* | U+0138 | ĸ | k | Latin small letter Kra |
* | U+0139 | Ĺ | L | Latin capital letter L with acute |
* | U+013A | ĺ | l | Latin small letter l with acute |
* | U+013B | Ļ | L | Latin capital letter L with cedilla |
* | U+013C | ļ | l | Latin small letter l with cedilla |
* | U+013D | Ľ | L | Latin capital letter L with caron |
* | U+013E | ľ | l | Latin small letter l with caron |
* | U+013F | Ŀ | L | Latin capital letter L with middle dot |
* | U+0140 | ŀ | l | Latin small letter l with middle dot |
* | U+0141 | Ł | L | Latin capital letter L with stroke |
* | U+0142 | ł | l | Latin small letter l with stroke |
* | U+0143 | Ń | N | Latin capital letter N with acute |
* | U+0144 | ń | n | Latin small letter N with acute |
* | U+0145 | Ņ | N | Latin capital letter N with cedilla |
* | U+0146 | ņ | n | Latin small letter n with cedilla |
* | U+0147 | Ň | N | Latin capital letter N with caron |
* | U+0148 | ň | n | Latin small letter n with caron |
* | U+0149 | ʼn | n | Latin small letter n preceded by apostrophe |
* | U+014A | Ŋ | N | Latin capital letter Eng |
* | U+014B | ŋ | n | Latin small letter Eng |
* | U+014C | Ō | O | Latin capital letter O with macron |
* | U+014D | ō | o | Latin small letter o with macron |
* | U+014E | Ŏ | O | Latin capital letter O with breve |
* | U+014F | ŏ | o | Latin small letter o with breve |
* | U+0150 | Ő | O | Latin capital letter O with double acute |
* | U+0151 | ő | o | Latin small letter o with double acute |
* | U+0152 | Π| OE | Latin capital ligature OE |
* | U+0153 | œ | oe | Latin small ligature oe |
* | U+0154 | Ŕ | R | Latin capital letter R with acute |
* | U+0155 | ŕ | r | Latin small letter r with acute |
* | U+0156 | Ŗ | R | Latin capital letter R with cedilla |
* | U+0157 | ŗ | r | Latin small letter r with cedilla |
* | U+0158 | Ř | R | Latin capital letter R with caron |
* | U+0159 | ř | r | Latin small letter r with caron |
* | U+015A | Ś | S | Latin capital letter S with acute |
* | U+015B | ś | s | Latin small letter s with acute |
* | U+015C | Ŝ | S | Latin capital letter S with circumflex |
* | U+015D | ŝ | s | Latin small letter s with circumflex |
* | U+015E | Ş | S | Latin capital letter S with cedilla |
* | U+015F | ş | s | Latin small letter s with cedilla |
* | U+0160 | Š | S | Latin capital letter S with caron |
* | U+0161 | š | s | Latin small letter s with caron |
* | U+0162 | Ţ | T | Latin capital letter T with cedilla |
* | U+0163 | ţ | t | Latin small letter t with cedilla |
* | U+0164 | Ť | T | Latin capital letter T with caron |
* | U+0165 | ť | t | Latin small letter t with caron |
* | U+0166 | Ŧ | T | Latin capital letter T with stroke |
* | U+0167 | ŧ | t | Latin small letter t with stroke |
* | U+0168 | Ũ | U | Latin capital letter U with tilde |
* | U+0169 | ũ | u | Latin small letter u with tilde |
* | U+016A | Ū | U | Latin capital letter U with macron |
* | U+016B | ū | u | Latin small letter u with macron |
* | U+016C | Ŭ | U | Latin capital letter U with breve |
* | U+016D | ŭ | u | Latin small letter u with breve |
* | U+016E | Ů | U | Latin capital letter U with ring above |
* | U+016F | ů | u | Latin small letter u with ring above |
* | U+0170 | Ű | U | Latin capital letter U with double acute |
* | U+0171 | ű | u | Latin small letter u with double acute |
* | U+0172 | Ų | U | Latin capital letter U with ogonek |
* | U+0173 | ų | u | Latin small letter u with ogonek |
* | U+0174 | Ŵ | W | Latin capital letter W with circumflex |
* | U+0175 | ŵ | w | Latin small letter w with circumflex |
* | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex |
* | U+0177 | ŷ | y | Latin small letter y with circumflex |
* | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis |
* | U+0179 | Ź | Z | Latin capital letter Z with acute |
* | U+017A | ź | z | Latin small letter z with acute |
* | U+017B | Ż | Z | Latin capital letter Z with dot above |
* | U+017C | ż | z | Latin small letter z with dot above |
* | U+017D | Ž | Z | Latin capital letter Z with caron |
* | U+017E | ž | z | Latin small letter z with caron |
* | U+017F | ſ | s | Latin small letter long s |
* | U+01A0 | Ơ | O | Latin capital letter O with horn |
* | U+01A1 | ơ | o | Latin small letter o with horn |
* | U+01AF | Ư | U | Latin capital letter U with horn |
* | U+01B0 | ư | u | Latin small letter u with horn |
* | U+01CD | Ǎ | A | Latin capital letter A with caron |
* | U+01CE | ǎ | a | Latin small letter a with caron |
* | U+01CF | Ǐ | I | Latin capital letter I with caron |
* | U+01D0 | ǐ | i | Latin small letter i with caron |
* | U+01D1 | Ǒ | O | Latin capital letter O with caron |
* | U+01D2 | ǒ | o | Latin small letter o with caron |
* | U+01D3 | Ǔ | U | Latin capital letter U with caron |
* | U+01D4 | ǔ | u | Latin small letter u with caron |
* | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron |
* | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron |
* | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute |
* | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute |
* | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron |
* | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron |
* | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave |
* | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave |
*
* Decompositions for Latin Extended-B:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | ----------------------------------------- |
* | U+0218 | Ș | S | Latin capital letter S with comma below |
* | U+0219 | ș | s | Latin small letter s with comma below |
* | U+021A | Ț | T | Latin capital letter T with comma below |
* | U+021B | ț | t | Latin small letter t with comma below |
*
* Vowels with diacritic (Chinese, Hanyu Pinyin):
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | ----------------------------------------------------- |
* | U+0251 | ɑ | a | Latin small letter alpha |
* | U+1EA0 | Ạ | A | Latin capital letter A with dot below |
* | U+1EA1 | ạ | a | Latin small letter a with dot below |
* | U+1EA2 | Ả | A | Latin capital letter A with hook above |
* | U+1EA3 | ả | a | Latin small letter a with hook above |
* | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute |
* | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute |
* | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave |
* | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave |
* | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above |
* | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above |
* | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde |
* | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde |
* | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below |
* | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below |
* | U+1EAE | Ắ | A | Latin capital letter A with breve and acute |
* | U+1EAF | ắ | a | Latin small letter a with breve and acute |
* | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave |
* | U+1EB1 | ằ | a | Latin small letter a with breve and grave |
* | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above |
* | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above |
* | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde |
* | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde |
* | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below |
* | U+1EB7 | ặ | a | Latin small letter a with breve and dot below |
* | U+1EB8 | Ẹ | E | Latin capital letter E with dot below |
* | U+1EB9 | ẹ | e | Latin small letter e with dot below |
* | U+1EBA | Ẻ | E | Latin capital letter E with hook above |
* | U+1EBB | ẻ | e | Latin small letter e with hook above |
* | U+1EBC | Ẽ | E | Latin capital letter E with tilde |
* | U+1EBD | ẽ | e | Latin small letter e with tilde |
* | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute |
* | U+1EBF | ế | e | Latin small letter e with circumflex and acute |
* | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave |
* | U+1EC1 | ề | e | Latin small letter e with circumflex and grave |
* | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above |
* | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above |
* | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde |
* | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde |
* | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below |
* | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below |
* | U+1EC8 | Ỉ | I | Latin capital letter I with hook above |
* | U+1EC9 | ỉ | i | Latin small letter i with hook above |
* | U+1ECA | Ị | I | Latin capital letter I with dot below |
* | U+1ECB | ị | i | Latin small letter i with dot below |
* | U+1ECC | Ọ | O | Latin capital letter O with dot below |
* | U+1ECD | ọ | o | Latin small letter o with dot below |
* | U+1ECE | Ỏ | O | Latin capital letter O with hook above |
* | U+1ECF | ỏ | o | Latin small letter o with hook above |
* | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute |
* | U+1ED1 | ố | o | Latin small letter o with circumflex and acute |
* | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave |
* | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave |
* | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above |
* | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above |
* | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde |
* | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde |
* | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below |
* | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below |
* | U+1EDA | Ớ | O | Latin capital letter O with horn and acute |
* | U+1EDB | ớ | o | Latin small letter o with horn and acute |
* | U+1EDC | Ờ | O | Latin capital letter O with horn and grave |
* | U+1EDD | ờ | o | Latin small letter o with horn and grave |
* | U+1EDE | Ở | O | Latin capital letter O with horn and hook above |
* | U+1EDF | ở | o | Latin small letter o with horn and hook above |
* | U+1EE0 | Ỡ | O | Latin capital letter O with horn and tilde |
* | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde |
* | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below |
* | U+1EE3 | ợ | o | Latin small letter o with horn and dot below |
* | U+1EE4 | Ụ | U | Latin capital letter U with dot below |
* | U+1EE5 | ụ | u | Latin small letter u with dot below |
* | U+1EE6 | Ủ | U | Latin capital letter U with hook above |
* | U+1EE7 | ủ | u | Latin small letter u with hook above |
* | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute |
* | U+1EE9 | ứ | u | Latin small letter u with horn and acute |
* | U+1EEA | Ừ | U | Latin capital letter U with horn and grave |
* | U+1EEB | ừ | u | Latin small letter u with horn and grave |
* | U+1EEC | Ử | U | Latin capital letter U with horn and hook above |
* | U+1EED | ử | u | Latin small letter u with horn and hook above |
* | U+1EEE | Ữ | U | Latin capital letter U with horn and tilde |
* | U+1EEF | ữ | u | Latin small letter u with horn and tilde |
* | U+1EF0 | Ự | U | Latin capital letter U with horn and dot below |
* | U+1EF1 | ự | u | Latin small letter u with horn and dot below |
* | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave |
* | U+1EF3 | ỳ | y | Latin small letter y with grave |
* | U+1EF4 | Ỵ | Y | Latin capital letter Y with dot below |
* | U+1EF5 | ỵ | y | Latin small letter y with dot below |
* | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above |
* | U+1EF7 | ỷ | y | Latin small letter y with hook above |
* | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde |
* | U+1EF9 | ỹ | y | Latin small letter y with tilde |
*
* German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`),
* German (Switzerland) informal (`de_CH_informal`), and German (Austria) (`de_AT`) locales:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis |
* | U+00E4 | ä | ae | Latin small letter a with diaeresis |
* | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis |
* | U+00F6 | ö | oe | Latin small letter o with diaeresis |
* | U+00DC | Ü | Ue | Latin capital letter U with diaeresis |
* | U+00FC | ü | ue | Latin small letter u with diaeresis |
* | U+00DF | ß | ss | Latin small letter sharp s |
*
* Danish (`da_DK`) locale:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00C6 | Æ | Ae | Latin capital letter AE |
* | U+00E6 | æ | ae | Latin small letter ae |
* | U+00D8 | Ø | Oe | Latin capital letter O with stroke |
* | U+00F8 | ø | oe | Latin small letter o with stroke |
* | U+00C5 | Å | Aa | Latin capital letter A with ring above |
* | U+00E5 | å | aa | Latin small letter a with ring above |
*
* Catalan (`ca`) locale:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+00B7 | l·l | ll | Flown dot (between two Ls) |
*
* Serbian (`sr_RS`) and Bosnian (`bs_BA`) locales:
*
* | Code | Glyph | Replacement | Description |
* | -------- | ----- | ----------- | --------------------------------------- |
* | U+0110 | Đ | DJ | Latin capital letter D with stroke |
* | U+0111 | đ | dj | Latin small letter d with stroke |
*
* @since 1.2.1
* @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`.
* @since 4.7.0 Added locale support for `sr_RS`.
* @since 4.8.0 Added locale support for `bs_BA`.
* @since 5.7.0 Added locale support for `de_AT`.
* @since 6.0.0 Added the `$locale` parameter.
* @since 6.1.0 Added Unicode NFC encoding normalization support.
*
* @param string $string Text that might have accent characters.
* @param string $locale Optional. The locale to use for accent removal. Some character
* replacements depend on the locale being used (e.g. 'de_DE').
* Defaults to the current locale.
* @return string Filtered string with replaced "nice" characters.
*/
function remove_accents( $string, $locale = '' ) {
if ( ! preg_match( '/[\x80-\xff]/', $string ) ) {
return $string;
}
if ( seems_utf8( $string ) ) {
// Unicode sequence normalization from NFD (Normalization Form Decomposed)
// to NFC (Normalization Form [Pre]Composed), the encoding used in this function.
if ( function_exists( 'normalizer_is_normalized' )
&& function_exists( 'normalizer_normalize' )
) {
if ( ! normalizer_is_normalized( $string ) ) {
$string = normalizer_normalize( $string );
}
}
$chars = array(
// Decompositions for Latin-1 Supplement.
'ª' => 'a',
'º' => 'o',
'À' => 'A',
'Á' => 'A',
'Â' => 'A',
'Ã' => 'A',
'Ä' => 'A',
'Å' => 'A',
'Æ' => 'AE',
'Ç' => 'C',
'È' => 'E',
'É' => 'E',
'Ê' => 'E',
'Ë' => 'E',
'Ì' => 'I',
'Í' => 'I',
'Î' => 'I',
'Ï' => 'I',
'Ð' => 'D',
'Ñ' => 'N',
'Ò' => 'O',
'Ó' => 'O',
'Ô' => 'O',
'Õ' => 'O',
'Ö' => 'O',
'Ù' => 'U',
'Ú' => 'U',
'Û' => 'U',
'Ü' => 'U',
'Ý' => 'Y',
'Þ' => 'TH',
'ß' => 's',
'à' => 'a',
'á' => 'a',
'â' => 'a',
'ã' => 'a',
'ä' => 'a',
'å' => 'a',
'æ' => 'ae',
'ç' => 'c',
'è' => 'e',
'é' => 'e',
'ê' => 'e',
'ë' => 'e',
'ì' => 'i',
'í' => 'i',
'î' => 'i',
'ï' => 'i',
'ð' => 'd',
'ñ' => 'n',
'ò' => 'o',
'ó' => 'o',
'ô' => 'o',
'õ' => 'o',
'ö' => 'o',
'ø' => 'o',
'ù' => 'u',
'ú' => 'u',
'û' => 'u',
'ü' => 'u',
'ý' => 'y',
'þ' => 'th',
'ÿ' => 'y',
'Ø' => 'O',
// Decompositions for Latin Extended-A.
'Ā' => 'A',
'ā' => 'a',
'Ă' => 'A',
'ă' => 'a',
'Ą' => 'A',
'ą' => 'a',
'Ć' => 'C',
'ć' => 'c',
'Ĉ' => 'C',
'ĉ' => 'c',
'Ċ' => 'C',
'ċ' => 'c',
'Č' => 'C',
'č' => 'c',
'Ď' => 'D',
'ď' => 'd',
'Đ' => 'D',
'đ' => 'd',
'Ē' => 'E',
'ē' => 'e',
'Ĕ' => 'E',
'ĕ' => 'e',
'Ė' => 'E',
'ė' => 'e',
'Ę' => 'E',
'ę' => 'e',
'Ě' => 'E',
'ě' => 'e',
'Ĝ' => 'G',
'ĝ' => 'g',
'Ğ' => 'G',
'ğ' => 'g',
'Ġ' => 'G',
'ġ' => 'g',
'Ģ' => 'G',
'ģ' => 'g',
'Ĥ' => 'H',
'ĥ' => 'h',
'Ħ' => 'H',
'ħ' => 'h',
'Ĩ' => 'I',
'ĩ' => 'i',
'Ī' => 'I',
'ī' => 'i',
'Ĭ' => 'I',
'ĭ' => 'i',
'Į' => 'I',
'į' => 'i',
'İ' => 'I',
'ı' => 'i',
'IJ' => 'IJ',
'ij' => 'ij',
'Ĵ' => 'J',
'ĵ' => 'j',
'Ķ' => 'K',
'ķ' => 'k',
'ĸ' => 'k',
'Ĺ' => 'L',
'ĺ' => 'l',
'Ļ' => 'L',
'ļ' => 'l',
'Ľ' => 'L',
'ľ' => 'l',
'Ŀ' => 'L',
'ŀ' => 'l',
'Ł' => 'L',
'ł' => 'l',
'Ń' => 'N',
'ń' => 'n',
'Ņ' => 'N',
'ņ' => 'n',
'Ň' => 'N',
'ň' => 'n',
'ʼn' => 'n',
'Ŋ' => 'N',
'ŋ' => 'n',
'Ō' => 'O',
'ō' => 'o',
'Ŏ' => 'O',
'ŏ' => 'o',
'Ő' => 'O',
'ő' => 'o',
'Œ' => 'OE',
'œ' => 'oe',
'Ŕ' => 'R',
'ŕ' => 'r',
'Ŗ' => 'R',
'ŗ' => 'r',
'Ř' => 'R',
'ř' => 'r',
'Ś' => 'S',
'ś' => 's',
'Ŝ' => 'S',
'ŝ' => 's',
'Ş' => 'S',
'ş' => 's',
'Š' => 'S',
'š' => 's',
'Ţ' => 'T',
'ţ' => 't',
'Ť' => 'T',
'ť' => 't',
'Ŧ' => 'T',
'ŧ' => 't',
'Ũ' => 'U',
'ũ' => 'u',
'Ū' => 'U',
'ū' => 'u',
'Ŭ' => 'U',
'ŭ' => 'u',
'Ů' => 'U',
'ů' => 'u',
'Ű' => 'U',
'ű' => 'u',
'Ų' => 'U',
'ų' => 'u',
'Ŵ' => 'W',
'ŵ' => 'w',
'Ŷ' => 'Y',
'ŷ' => 'y',
'Ÿ' => 'Y',
'Ź' => 'Z',
'ź' => 'z',
'Ż' => 'Z',
'ż' => 'z',
'Ž' => 'Z',
'ž' => 'z',
'ſ' => 's',
// Decompositions for Latin Extended-B.
'Ș' => 'S',
'ș' => 's',
'Ț' => 'T',
'ț' => 't',
// Euro sign.
'€' => 'E',
// GBP (Pound) sign.
'£' => '',
// Vowels with diacritic (Vietnamese).
// Unmarked.
'Ơ' => 'O',
'ơ' => 'o',
'Ư' => 'U',
'ư' => 'u',
// Grave accent.
'Ầ' => 'A',
'ầ' => 'a',
'Ằ' => 'A',
'ằ' => 'a',
'Ề' => 'E',
'ề' => 'e',
'Ồ' => 'O',
'ồ' => 'o',
'Ờ' => 'O',
'ờ' => 'o',
'Ừ' => 'U',
'ừ' => 'u',
'Ỳ' => 'Y',
'ỳ' => 'y',
// Hook.
'Ả' => 'A',
'ả' => 'a',
'Ẩ' => 'A',
'ẩ' => 'a',
'Ẳ' => 'A',
'ẳ' => 'a',
'Ẻ' => 'E',
'ẻ' => 'e',
'Ể' => 'E',
'ể' => 'e',
'Ỉ' => 'I',
'ỉ' => 'i',
'Ỏ' => 'O',
'ỏ' => 'o',
'Ổ' => 'O',
'ổ' => 'o',
'Ở' => 'O',
'ở' => 'o',
'Ủ' => 'U',
'ủ' => 'u',
'Ử' => 'U',
'ử' => 'u',
'Ỷ' => 'Y',
'ỷ' => 'y',
// Tilde.
'Ẫ' => 'A',
'ẫ' => 'a',
'Ẵ' => 'A',
'ẵ' => 'a',
'Ẽ' => 'E',
'ẽ' => 'e',
'Ễ' => 'E',
'ễ' => 'e',
'Ỗ' => 'O',
'ỗ' => 'o',
'Ỡ' => 'O',
'ỡ' => 'o',
'Ữ' => 'U',
'ữ' => 'u',
'Ỹ' => 'Y',
'ỹ' => 'y',
// Acute accent.
'Ấ' => 'A',
'ấ' => 'a',
'Ắ' => 'A',
'ắ' => 'a',
'Ế' => 'E',
'ế' => 'e',
'Ố' => 'O',
'ố' => 'o',
'Ớ' => 'O',
'ớ' => 'o',
'Ứ' => 'U',
'ứ' => 'u',
// Dot below.
'Ạ' => 'A',
'ạ' => 'a',
'Ậ' => 'A',
'ậ' => 'a',
'Ặ' => 'A',
'ặ' => 'a',
'Ẹ' => 'E',
'ẹ' => 'e',
'Ệ' => 'E',
'ệ' => 'e',
'Ị' => 'I',
'ị' => 'i',
'Ọ' => 'O',
'ọ' => 'o',
'Ộ' => 'O',
'ộ' => 'o',
'Ợ' => 'O',
'ợ' => 'o',
'Ụ' => 'U',
'ụ' => 'u',
'Ự' => 'U',
'ự' => 'u',
'Ỵ' => 'Y',
'ỵ' => 'y',
// Vowels with diacritic (Chinese, Hanyu Pinyin).
'ɑ' => 'a',
// Macron.
'Ǖ' => 'U',
'ǖ' => 'u',
// Acute accent.
'Ǘ' => 'U',
'ǘ' => 'u',
// Caron.
'Ǎ' => 'A',
'ǎ' => 'a',
'Ǐ' => 'I',
'ǐ' => 'i',
'Ǒ' => 'O',
'ǒ' => 'o',
'Ǔ' => 'U',
'ǔ' => 'u',
'Ǚ' => 'U',
'ǚ' => 'u',
// Grave accent.
'Ǜ' => 'U',
'ǜ' => 'u',
);
// Used for locale-specific rules.
if ( empty( $locale ) ) {
$locale = get_locale();
}
/*
* German has various locales (de_DE, de_CH, de_AT, ...) with formal and informal variants.
* There is no 3-letter locale like 'def', so checking for 'de' instead of 'de_' is safe,
* since 'de' itself would be a valid locale too.
*/
if ( str_starts_with( $locale, 'de' ) ) {
$chars['Ä'] = 'Ae';
$chars['ä'] = 'ae';
$chars['Ö'] = 'Oe';
$chars['ö'] = 'oe';
$chars['Ü'] = 'Ue';
$chars['ü'] = 'ue';
$chars['ß'] = 'ss';
} elseif ( 'da_DK' === $locale ) {
$chars['Æ'] = 'Ae';
$chars['æ'] = 'ae';
$chars['Ø'] = 'Oe';
$chars['ø'] = 'oe';
$chars['Å'] = 'Aa';
$chars['å'] = 'aa';
} elseif ( 'ca' === $locale ) {
$chars['l·l'] = 'll';
} elseif ( 'sr_RS' === $locale || 'bs_BA' === $locale ) {
$chars['Đ'] = 'DJ';
$chars['đ'] = 'dj';
}
$string = strtr( $string, $chars );
} else {
$chars = array();
// Assume ISO-8859-1 if not UTF-8.
$chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e"
. "\x9f\xa2\xa5\xb5\xc0\xc1\xc2"
. "\xc3\xc4\xc5\xc7\xc8\xc9\xca"
. "\xcb\xcc\xcd\xce\xcf\xd1\xd2"
. "\xd3\xd4\xd5\xd6\xd8\xd9\xda"
. "\xdb\xdc\xdd\xe0\xe1\xe2\xe3"
. "\xe4\xe5\xe7\xe8\xe9\xea\xeb"
. "\xec\xed\xee\xef\xf1\xf2\xf3"
. "\xf4\xf5\xf6\xf8\xf9\xfa\xfb"
. "\xfc\xfd\xff";
$chars['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
$string = strtr( $string, $chars['in'], $chars['out'] );
$double_chars = array();
$double_chars['in'] = array( "\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe" );
$double_chars['out'] = array( 'OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th' );
$string = str_replace( $double_chars['in'], $double_chars['out'], $string );
}
return $string;
}
/**
* Sanitizes a filename, replacing whitespace with dashes.
*
* Removes special characters that are illegal in filenames on certain
* operating systems and special characters requiring special escaping
* to manipulate at the command line. Replaces spaces and consecutive
* dashes with a single dash. Trims period, dash and underscore from beginning
* and end of filename. It is not guaranteed that this function will return a
* filename that is allowed to be uploaded.
*
* @since 2.1.0
*
* @param string $filename The filename to be sanitized.
* @return string The sanitized filename.
*/
function sanitize_file_name( $filename ) {
$filename_raw = $filename;
$filename = remove_accents( $filename );
$special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', '’', '«', '»', '”', '“', chr( 0 ) );
// Check for support for utf8 in the installed PCRE library once and store the result in a static.
static $utf8_pcre = null;
if ( ! isset( $utf8_pcre ) ) {
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
$utf8_pcre = @preg_match( '/^./u', 'a' );
}
if ( ! seems_utf8( $filename ) ) {
$_ext = pathinfo( $filename, PATHINFO_EXTENSION );
$_name = pathinfo( $filename, PATHINFO_FILENAME );
$filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext;
}
if ( $utf8_pcre ) {
$filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
}
/**
* Filters the list of characters to remove from a filename.
*
* @since 2.8.0
*
* @param string[] $special_chars Array of characters to remove.
* @param string $filename_raw The original filename to be sanitized.
*/
$special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw );
$filename = str_replace( $special_chars, '', $filename );
$filename = str_replace( array( '%20', '+' ), '-', $filename );
$filename = preg_replace( '/[\r\n\t -]+/', '-', $filename );
$filename = trim( $filename, '.-_' );
if ( false === strpos( $filename, '.' ) ) {
$mime_types = wp_get_mime_types();
$filetype = wp_check_filetype( 'test.' . $filename, $mime_types );
if ( $filetype['ext'] === $filename ) {
$filename = 'unnamed-file.' . $filetype['ext'];
}
}
// Split the filename into a base and extension[s].
$parts = explode( '.', $filename );
// Return if only one extension.
if ( count( $parts ) <= 2 ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
}
// Process multiple extensions.
$filename = array_shift( $parts );
$extension = array_pop( $parts );
$mimes = get_allowed_mime_types();
/*
* Loop over any intermediate extensions. Postfix them with a trailing underscore
* if they are a 2 - 5 character long alpha string not in the allowed extension list.
*/
foreach ( (array) $parts as $part ) {
$filename .= '.' . $part;
if ( preg_match( '/^[a-zA-Z]{2,5}\d?$/', $part ) ) {
$allowed = false;
foreach ( $mimes as $ext_preg => $mime_match ) {
$ext_preg = '!^(' . $ext_preg . ')$!i';
if ( preg_match( $ext_preg, $part ) ) {
$allowed = true;
break;
}
}
if ( ! $allowed ) {
$filename .= '_';
}
}
}
$filename .= '.' . $extension;
/**
* Filters a sanitized filename string.
*
* @since 2.8.0
*
* @param string $filename Sanitized filename.
* @param string $filename_raw The filename prior to sanitization.
*/
return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
}
/**
* Sanitizes a username, stripping out unsafe characters.
*
* Removes tags, octets, entities, and if strict is enabled, will only keep
* alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
* raw username (the username in the parameter), and the value of $strict as
* parameters for the {@see 'sanitize_user'} filter.
*
* @since 2.0.0
*
* @param string $username The username to be sanitized.
* @param bool $strict Optional. If set limits $username to specific characters.
* Default false.
* @return string The sanitized username, after passing through filters.
*/
function sanitize_user( $username, $strict = false ) {
$raw_username = $username;
$username = wp_strip_all_tags( $username );
$username = remove_accents( $username );
// Kill octets.
$username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
// Kill entities.
$username = preg_replace( '/&.+?;/', '', $username );
// If strict, reduce to ASCII for max portability.
if ( $strict ) {
$username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
}
$username = trim( $username );
// Consolidate contiguous whitespace.
$username = preg_replace( '|\s+|', ' ', $username );
/**
* Filters a sanitized username string.
*
* @since 2.0.1
*
* @param string $username Sanitized username.
* @param string $raw_username The username prior to sanitization.
* @param bool $strict Whether to limit the sanitization to specific characters.
*/
return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
}
/**
* Sanitizes a string key.
*
* Keys are used as internal identifiers. Lowercase alphanumeric characters,
* dashes, and underscores are allowed.
*
* @since 3.0.0
*
* @param string $key String key.
* @return string Sanitized key.
*/
function sanitize_key( $key ) {
$sanitized_key = '';
if ( is_scalar( $key ) ) {
$sanitized_key = strtolower( $key );
$sanitized_key = preg_replace( '/[^a-z0-9_\-]/', '', $sanitized_key );
}
/**
* Filters a sanitized key string.
*
* @since 3.0.0
*
* @param string $sanitized_key Sanitized key.
* @param string $key The key prior to sanitization.
*/
return apply_filters( 'sanitize_key', $sanitized_key, $key );
}
/**
* Sanitizes a string into a slug, which can be used in URLs or HTML attributes.
*
* By default, converts accent characters to ASCII characters and further
* limits the output to alphanumeric characters, underscore (_) and dash (-)
* through the {@see 'sanitize_title'} filter.
*
* If `$title` is empty and `$fallback_title` is set, the latter will be used.
*
* @since 1.0.0
*
* @param string $title The string to be sanitized.
* @param string $fallback_title Optional. A title to use if $title is empty. Default empty.
* @param string $context Optional. The operation for which the string is sanitized.
* When set to 'save', the string runs through remove_accents().
* Default 'save'.
* @return string The sanitized string.
*/
function sanitize_title( $title, $fallback_title = '', $context = 'save' ) {
$raw_title = $title;
if ( 'save' === $context ) {
$title = remove_accents( $title );
}
/**
* Filters a sanitized title string.
*
* @since 1.2.0
*
* @param string $title Sanitized title.
* @param string $raw_title The title prior to sanitization.
* @param string $context The context for which the title is being sanitized.
*/
$title = apply_filters( 'sanitize_title', $title, $raw_title, $context );
if ( '' === $title || false === $title ) {
$title = $fallback_title;
}
return $title;
}
/**
* Sanitizes a title with the 'query' context.
*
* Used for querying the database for a value from URL.
*
* @since 3.1.0
*
* @param string $title The string to be sanitized.
* @return string The sanitized string.
*/
function sanitize_title_for_query( $title ) {
return sanitize_title( $title, '', 'query' );
}
/**
* Sanitizes a title, replacing whitespace and a few other characters with dashes.
*
* Limits the output to alphanumeric characters, underscore (_) and dash (-).
* Whitespace becomes a dash.
*
* @since 1.2.0
*
* @param string $title The title to be sanitized.
* @param string $raw_title Optional. Not used. Default empty.
* @param string $context Optional. The operation for which the string is sanitized.
* When set to 'save', additional entities are converted to hyphens
* or stripped entirely. Default 'display'.
* @return string The sanitized title.
*/
function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) {
$title = strip_tags( $title );
// Preserve escaped octets.
$title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title );
// Remove percent signs that are not part of an octet.
$title = str_replace( '%', '', $title );
// Restore octets.
$title = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title );
if ( seems_utf8( $title ) ) {
if ( function_exists( 'mb_strtolower' ) ) {
$title = mb_strtolower( $title, 'UTF-8' );
}
$title = utf8_uri_encode( $title, 200 );
}
$title = strtolower( $title );
if ( 'save' === $context ) {
// Convert &nbsp, &ndash, and &mdash to hyphens.
$title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
// Convert &nbsp, &ndash, and &mdash HTML entities to hyphens.
$title = str_replace( array( '&nbsp;', '&#160;', '&ndash;', '&#8211;', '&mdash;', '&#8212;' ), '-', $title );
// Convert forward slash to hyphen.
$title = str_replace( '/', '-', $title );
// Strip these characters entirely.
$title = str_replace(
array(
// Soft hyphens.
'%c2%ad',
// &iexcl and &iquest.
'%c2%a1',
'%c2%bf',
// Angle quotes.
'%c2%ab',
'%c2%bb',
'%e2%80%b9',
'%e2%80%ba',
// Curly quotes.
'%e2%80%98',
'%e2%80%99',
'%e2%80%9c',
'%e2%80%9d',
'%e2%80%9a',
'%e2%80%9b',
'%e2%80%9e',
'%e2%80%9f',
// Bullet.
'%e2%80%a2',
// &copy, &reg, &deg, &hellip, and &trade.
'%c2%a9',
'%c2%ae',
'%c2%b0',
'%e2%80%a6',
'%e2%84%a2',
// Acute accents.
'%c2%b4',
'%cb%8a',
'%cc%81',
'%cd%81',
// Grave accent, macron, caron.
'%cc%80',
'%cc%84',
'%cc%8c',
// Non-visible characters that display without a width.
'%e2%80%8b', // Zero width space.
'%e2%80%8c', // Zero width non-joiner.
'%e2%80%8d', // Zero width joiner.
'%e2%80%8e', // Left-to-right mark.
'%e2%80%8f', // Right-to-left mark.
'%e2%80%aa', // Left-to-right embedding.
'%e2%80%ab', // Right-to-left embedding.
'%e2%80%ac', // Pop directional formatting.
'%e2%80%ad', // Left-to-right override.
'%e2%80%ae', // Right-to-left override.
'%ef%bb%bf', // Byte order mark.
'%ef%bf%bc', // Object replacement character.
),
'',
$title
);
// Convert non-visible characters that display with a width to hyphen.
$title = str_replace(
array(
'%e2%80%80', // En quad.
'%e2%80%81', // Em quad.
'%e2%80%82', // En space.
'%e2%80%83', // Em space.
'%e2%80%84', // Three-per-em space.
'%e2%80%85', // Four-per-em space.
'%e2%80%86', // Six-per-em space.
'%e2%80%87', // Figure space.
'%e2%80%88', // Punctuation space.
'%e2%80%89', // Thin space.
'%e2%80%8a', // Hair space.
'%e2%80%a8', // Line separator.
'%e2%80%a9', // Paragraph separator.
'%e2%80%af', // Narrow no-break space.
),
'-',
$title
);
// Convert &times to 'x'.
$title = str_replace( '%c3%97', 'x', $title );
}
// Kill entities.
$title = preg_replace( '/&.+?;/', '', $title );
$title = str_replace( '.', '-', $title );
$title = preg_replace( '/[^%a-z0-9 _-]/', '', $title );
$title = preg_replace( '/\s+/', '-', $title );
$title = preg_replace( '|-+|', '-', $title );
$title = trim( $title, '-' );
return $title;
}
/**
* Ensures a string is a valid SQL 'order by' clause.
*
* Accepts one or more columns, with or without a sort order (ASC / DESC).
* e.g. 'column_1', 'column_1, column_2', 'column_1 ASC, column_2 DESC' etc.
*
* Also accepts 'RAND()'.
*
* @since 2.5.1
*
* @param string $orderby Order by clause to be validated.
* @return string|false Returns $orderby if valid, false otherwise.
*/
function sanitize_sql_orderby( $orderby ) {
if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) {
return $orderby;
}
return false;
}
/**
* Sanitizes an HTML classname to ensure it only contains valid characters.
*
* Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
* string then it will return the alternative value supplied.
*
* @todo Expand to support the full range of CDATA that a class attribute can contain.
*
* @since 2.8.0
*
* @param string $class The classname to be sanitized
* @param string $fallback Optional. The value to return if the sanitization ends up as an empty string.
* Defaults to an empty string.
* @return string The sanitized value
*/
function sanitize_html_class( $class, $fallback = '' ) {
// Strip out any %-encoded octets.
$sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
// Limit to A-Z, a-z, 0-9, '_', '-'.
$sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
if ( '' === $sanitized && $fallback ) {
return sanitize_html_class( $fallback );
}
/**
* Filters a sanitized HTML class string.
*
* @since 2.8.0
*
* @param string $sanitized The sanitized HTML class.
* @param string $class HTML class before sanitization.
* @param string $fallback The fallback string.
*/
return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback );
}
/**
* Converts lone & characters into `&#038;` (a.k.a. `&amp;`)
*
* @since 0.71
*
* @param string $content String of characters to be converted.
* @param string $deprecated Not used.
* @return string Converted string.
*/
function convert_chars( $content, $deprecated = '' ) {
if ( ! empty( $deprecated ) ) {
_deprecated_argument( __FUNCTION__, '0.71' );
}
if ( strpos( $content, '&' ) !== false ) {
$content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&#038;$1', $content );
}
return $content;
}
/**
* Converts invalid Unicode references range to valid range.
*
* @since 4.3.0
*
* @param string $content String with entities that need converting.
* @return string Converted string.
*/
function convert_invalid_entities( $content ) {
$wp_htmltranswinuni = array(
'&#128;' => '&#8364;', // The Euro sign.
'&#129;' => '',
'&#130;' => '&#8218;', // These are Windows CP1252 specific characters.
'&#131;' => '&#402;', // They would look weird on non-Windows browsers.
'&#132;' => '&#8222;',
'&#133;' => '&#8230;',
'&#134;' => '&#8224;',
'&#135;' => '&#8225;',
'&#136;' => '&#710;',
'&#137;' => '&#8240;',
'&#138;' => '&#352;',
'&#139;' => '&#8249;',
'&#140;' => '&#338;',
'&#141;' => '',
'&#142;' => '&#381;',
'&#143;' => '',
'&#144;' => '',
'&#145;' => '&#8216;',
'&#146;' => '&#8217;',
'&#147;' => '&#8220;',
'&#148;' => '&#8221;',
'&#149;' => '&#8226;',
'&#150;' => '&#8211;',
'&#151;' => '&#8212;',
'&#152;' => '&#732;',
'&#153;' => '&#8482;',
'&#154;' => '&#353;',
'&#155;' => '&#8250;',
'&#156;' => '&#339;',
'&#157;' => '',
'&#158;' => '&#382;',
'&#159;' => '&#376;',
);
if ( strpos( $content, '&#1' ) !== false ) {
$content = strtr( $content, $wp_htmltranswinuni );
}
return $content;
}
/**
* Balances tags if forced to, or if the 'use_balanceTags' option is set to true.
*
* @since 0.71
*
* @param string $text Text to be balanced
* @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
* @return string Balanced text
*/
function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid
if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) {
return force_balance_tags( $text );
} else {
return $text;
}
}
/**
* Balances tags of string using a modified stack.
*
* @since 2.0.4
* @since 5.3.0 Improve accuracy and add support for custom element tags.
*
* @author Leonard Lin <leonard@acm.org>
* @license GPL
* @copyright November 4, 2001
* @version 1.1
* @todo Make better - change loop condition to $text in 1.2
* @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004
* 1.1 Fixed handling of append/stack pop order of end text
* Added Cleaning Hooks
* 1.0 First Version
*
* @param string $text Text to be balanced.
* @return string Balanced text.
*/
function force_balance_tags( $text ) {
$tagstack = array();
$stacksize = 0;
$tagqueue = '';
$newtext = '';
// Known single-entity/self-closing tags.
$single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr' );
// Tags that can be immediately nested within themselves.
$nestable_tags = array( 'article', 'aside', 'blockquote', 'details', 'div', 'figure', 'object', 'q', 'section', 'span' );
// WP bug fix for comments - in case you REALLY meant to type '< !--'.
$text = str_replace( '< !--', '< !--', $text );
// WP bug fix for LOVE <3 (and other situations with '<' before a number).
$text = preg_replace( '#<([0-9]{1})#', '&lt;$1', $text );
/**
* Matches supported tags.
*
* To get the pattern as a string without the comments paste into a PHP
* REPL like `php -a`.
*
* @see https://html.spec.whatwg.org/#elements-2
* @see https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
*
* @example
* ~# php -a
* php > $s = [paste copied contents of expression below including parentheses];
* php > echo $s;
*/
$tag_pattern = (
'#<' . // Start with an opening bracket.
'(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash.
'(' . // Group 2 - Tag name.
// Custom element tags have more lenient rules than HTML tag names.
'(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' .
'|' .
// Traditional tag rules approximate HTML tag names.
'(?:[\w:]+)' .
')' .
'(?:' .
// We either immediately close the tag with its '>' and have nothing here.
'\s*' .
'(/?)' . // Group 3 - "attributes" for empty tag.
'|' .
// Or we must start with space characters to separate the tag name from the attributes (or whitespace).
'(\s+)' . // Group 4 - Pre-attribute whitespace.
'([^>]*)' . // Group 5 - Attributes.
')' .
'>#' // End with a closing bracket.
);
while ( preg_match( $tag_pattern, $text, $regex ) ) {
$full_match = $regex[0];
$has_leading_slash = ! empty( $regex[1] );
$tag_name = $regex[2];
$tag = strtolower( $tag_name );
$is_single_tag = in_array( $tag, $single_tags, true );
$pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : '';
$attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] );
$has_self_closer = '/' === substr( $attributes, -1 );
$newtext .= $tagqueue;
$i = strpos( $text, $full_match );
$l = strlen( $full_match );
// Clear the shifter.
$tagqueue = '';
if ( $has_leading_slash ) { // End tag.
// If too many closing tags.
if ( $stacksize <= 0 ) {
$tag = '';
// Or close to be safe $tag = '/' . $tag.
// If stacktop value = tag close value, then pop.
} elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag.
$tag = '</' . $tag . '>'; // Close tag.
array_pop( $tagstack );
$stacksize--;
} else { // Closing tag not at top, search for it.
for ( $j = $stacksize - 1; $j >= 0; $j-- ) {
if ( $tagstack[ $j ] === $tag ) {
// Add tag to tagqueue.
for ( $k = $stacksize - 1; $k >= $j; $k-- ) {
$tagqueue .= '</' . array_pop( $tagstack ) . '>';
$stacksize--;
}
break;
}
}
$tag = '';
}
} else { // Begin tag.
if ( $has_self_closer ) { // If it presents itself as a self-closing tag...
// ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such
// and immediately close it with a closing tag (the tag will encapsulate no text as a result).
if ( ! $is_single_tag ) {
$attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag";
}
} elseif ( $is_single_tag ) { // Else if it's a known single-entity tag but it doesn't close itself, do so.
$pre_attribute_ws = ' ';
$attributes .= '/';
} else { // It's not a single-entity tag.
// If the top of the stack is the same as the tag we want to push, close previous tag.
if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) {
$tagqueue = '</' . array_pop( $tagstack ) . '>';
$stacksize--;
}
$stacksize = array_push( $tagstack, $tag );
}
// Attributes.
if ( $has_self_closer && $is_single_tag ) {
// We need some space - avoid <br/> and prefer <br />.
$pre_attribute_ws = ' ';
}
$tag = '<' . $tag . $pre_attribute_ws . $attributes . '>';
// If already queuing a close tag, then put this tag on too.
if ( ! empty( $tagqueue ) ) {
$tagqueue .= $tag;
$tag = '';
}
}
$newtext .= substr( $text, 0, $i ) . $tag;
$text = substr( $text, $i + $l );
}
// Clear tag queue.
$newtext .= $tagqueue;
// Add remaining text.
$newtext .= $text;
while ( $x = array_pop( $tagstack ) ) {
$newtext .= '</' . $x . '>'; // Add remaining tags to close.
}
// WP fix for the bug with HTML comments.
$newtext = str_replace( '< !--', '<!--', $newtext );
$newtext = str_replace( '< !--', '< !--', $newtext );
return $newtext;
}
/**
* Acts on text which is about to be edited.
*
* The $content is run through esc_textarea(), which uses htmlspecialchars()
* to convert special characters to HTML entities. If `$richedit` is set to true,
* it is simply a holder for the {@see 'format_to_edit'} filter.
*
* @since 0.71
* @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity.
*
* @param string $content The text about to be edited.
* @param bool $rich_text Optional. Whether `$content` should be considered rich text,
* in which case it would not be passed through esc_textarea().
* Default false.
* @return string The text after the filter (and possibly htmlspecialchars()) has been run.
*/
function format_to_edit( $content, $rich_text = false ) {
/**
* Filters the text to be formatted for editing.
*
* @since 1.2.0
*
* @param string $content The text, prior to formatting for editing.
*/
$content = apply_filters( 'format_to_edit', $content );
if ( ! $rich_text ) {
$content = esc_textarea( $content );
}
return $content;
}
/**
* Add leading zeros when necessary.
*
* If you set the threshold to '4' and the number is '10', then you will get
* back '0010'. If you set the threshold to '4' and the number is '5000', then you
* will get back '5000'.
*
* Uses sprintf to append the amount of zeros based on the $threshold parameter
* and the size of the number. If the number is large enough, then no zeros will
* be appended.
*
* @since 0.71
*
* @param int $number Number to append zeros to if not greater than threshold.
* @param int $threshold Digit places number needs to be to not have zeros added.
* @return string Adds leading zeros to number if needed.
*/
function zeroise( $number, $threshold ) {
return sprintf( '%0' . $threshold . 's', $number );
}
/**
* Adds backslashes before letters and before a number at the start of a string.
*
* @since 0.71
*
* @param string $string Value to which backslashes will be added.
* @return string String with backslashes inserted.
*/
function backslashit( $string ) {
if ( isset( $string[0] ) && $string[0] >= '0' && $string[0] <= '9' ) {
$string = '\\\\' . $string;
}
return addcslashes( $string, 'A..Za..z' );
}
/**
* Appends a trailing slash.
*
* Will remove trailing forward and backslashes if it exists already before adding
* a trailing forward slash. This prevents double slashing a string or path.
*
* The primary use of this is for paths and thus should be used for paths. It is
* not restricted to paths and offers no specific path support.
*
* @since 1.2.0
*
* @param string $string What to add the trailing slash to.
* @return string String with trailing slash added.
*/
function trailingslashit( $string ) {
return untrailingslashit( $string ) . '/';
}
/**
* Removes trailing forward slashes and backslashes if they exist.
*
* The primary use of this is for paths and thus should be used for paths. It is
* not restricted to paths and offers no specific path support.
*
* @since 2.2.0
*
* @param string $string What to remove the trailing slashes from.
* @return string String without the trailing slashes.
*/
function untrailingslashit( $string ) {
return rtrim( $string, '/\\' );
}
/**
* Adds slashes to a string or recursively adds slashes to strings within an array.
*
* @since 0.71
*
* @param string|array $gpc String or array of data to slash.
* @return string|array Slashed `$gpc`.
*/
function addslashes_gpc( $gpc ) {
return wp_slash( $gpc );
}
/**
* Navigates through an array, object, or scalar, and removes slashes from the values.
*
* @since 2.0.0
*
* @param mixed $value The value to be stripped.
* @return mixed Stripped value.
*/
function stripslashes_deep( $value ) {
return map_deep( $value, 'stripslashes_from_strings_only' );
}
/**
* Callback function for `stripslashes_deep()` which strips slashes from strings.
*
* @since 4.4.0
*
* @param mixed $value The array or string to be stripped.
* @return mixed The stripped value.
*/
function stripslashes_from_strings_only( $value ) {
return is_string( $value ) ? stripslashes( $value ) : $value;
}
/**
* Navigates through an array, object, or scalar, and encodes the values to be used in a URL.
*
* @since 2.2.0
*
* @param mixed $value The array or string to be encoded.
* @return mixed The encoded value.
*/
function urlencode_deep( $value ) {
return map_deep( $value, 'urlencode' );
}
/**
* Navigates through an array, object, or scalar, and raw-encodes the values to be used in a URL.
*
* @since 3.4.0
*
* @param mixed $value The array or string to be encoded.
* @return mixed The encoded value.
*/
function rawurlencode_deep( $value ) {
return map_deep( $value, 'rawurlencode' );
}
/**
* Navigates through an array, object, or scalar, and decodes URL-encoded values
*
* @since 4.4.0
*
* @param mixed $value The array or string to be decoded.
* @return mixed The decoded value.
*/
function urldecode_deep( $value ) {
return map_deep( $value, 'urldecode' );
}
/**
* Converts email addresses characters to HTML entities to block spam bots.
*
* @since 0.71
*
* @param string $email_address Email address.
* @param int $hex_encoding Optional. Set to 1 to enable hex encoding.
* @return string Converted email address.