Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
<?php
/**
* Provides string functions for UTF-8 strings
*
* This class is implemented to provide a UTF-8 version of almost every built-in
* PHP string function. For more information about UTF-8, please visit
* http://flourishlib.com/docs/UTF-8.
*
* @copyright Copyright (c) 2008-2012 Will Bond
* @author Will Bond [wb] <will@flourishlib.com>
* @license http://flourishlib.com/license
*
* @package Flourish
* @link http://flourishlib.com/fUTF8
*
* @version 1.0.0b16
* @changes 1.0.0b16 Added code to ::clean() to use mbstring if available since recent versions of iconv and `//IGNORE` now return `FALSE` for bad encodings [wb, 2012-09-21]
* @changes 1.0.0b15 Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
* @changes 1.0.0b14 Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
* @changes 1.0.0b13 Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
* @changes 1.0.0b12 Fixed a variable name typo in ::sub() [wb, 2011-05-09]
* @changes 1.0.0b11 Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
* @changes 1.0.0b10 Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
* @changes 1.0.0b9 Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
* @changes 1.0.0b8 Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
* @changes 1.0.0b7 Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
* @changes 1.0.0b6 Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
* @changes 1.0.0b5 Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
* @changes 1.0.0b4 Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
* @changes 1.0.0b3 Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
* @changes 1.0.0b2 Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
* @changes 1.0.0b The initial implementation [wb, 2008-06-01]
*/
class fUTF8
{
// The following constants allow for nice looking callbacks to static methods
const ascii = 'fUTF8::ascii';
const chr = 'fUTF8::chr';
const clean = 'fUTF8::clean';
const cmp = 'fUTF8::cmp';
const explode = 'fUTF8::explode';
const icmp = 'fUTF8::icmp';
const inatcmp = 'fUTF8::inatcmp';
const ipos = 'fUTF8::ipos';
const ireplace = 'fUTF8::ireplace';
const irpos = 'fUTF8::irpos';
const istr = 'fUTF8::istr';
const len = 'fUTF8::len';
const lower = 'fUTF8::lower';
const ltrim = 'fUTF8::ltrim';
const natcmp = 'fUTF8::natcmp';
const ord = 'fUTF8::ord';
const pad = 'fUTF8::pad';
const pos = 'fUTF8::pos';
const replace = 'fUTF8::replace';
const reset = 'fUTF8::reset';
const rev = 'fUTF8::rev';
const rpos = 'fUTF8::rpos';
const rtrim = 'fUTF8::rtrim';
const str = 'fUTF8::str';
const sub = 'fUTF8::sub';
const trim = 'fUTF8::trim';
const ucfirst = 'fUTF8::ucfirst';
const ucwords = 'fUTF8::ucwords';
const upper = 'fUTF8::upper';
const wordwrap = 'fUTF8::wordwrap';
/**
* Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
*
* If //IGNORE support is not provided strings with invalid characters will be truncated
*
* @var boolean
*/
static private $can_ignore_invalid = NULL;
/**
* All lowercase UTF-8 characters mapped to uppercase characters
*
* @var array
*/
static private $lower_to_upper = array(
'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
'y' => 'Y', 'z' => 'Z', 'à' => 'À', 'á' => 'Á', 'â' => 'Â', 'ã' => 'Ã',
'ä' => 'Ä', 'å' => 'Å', 'æ' => 'Æ', 'ç' => 'Ç', 'è' => 'È', 'é' => 'É',
'ê' => 'Ê', 'ë' => 'Ë', 'ì' => 'Ì', 'í' => 'Í', 'î' => 'Î', 'ï' => 'Ï',
'ð' => 'Ð', 'ñ' => 'Ñ', 'ò' => 'Ò', 'ó' => 'Ó', 'ô' => 'Ô', 'õ' => 'Õ',
'ö' => 'Ö', 'ø' => 'Ø', 'ù' => 'Ù', 'ú' => 'Ú', 'û' => 'Û', 'ü' => 'Ü',
'ý' => 'Ý', 'þ' => 'Þ', 'ā' => 'Ā', 'ă' => 'Ă', 'ą' => 'Ą', 'ć' => 'Ć',
'ĉ' => 'Ĉ', 'ċ' => 'Ċ', 'č' => 'Č', 'ď' => 'Ď', 'đ' => 'Đ', 'ē' => 'Ē',
'ĕ' => 'Ĕ', 'ė' => 'Ė', 'ę' => 'Ę', 'ě' => 'Ě', 'ĝ' => 'Ĝ', 'ğ' => 'Ğ',
'ġ' => 'Ġ', 'ģ' => 'Ģ', 'ĥ' => 'Ĥ', 'ħ' => 'Ħ', 'ĩ' => 'Ĩ', 'ī' => 'Ī',
'ĭ' => 'Ĭ', 'į' => 'Į', 'ij' => 'IJ', 'ĵ' => 'Ĵ', 'ķ' => 'Ķ', 'ĺ' => 'Ĺ',
'ļ' => 'Ļ', 'ľ' => 'Ľ', 'ŀ' => 'Ŀ', 'ł' => 'Ł', 'ń' => 'Ń', 'ņ' => 'Ņ',
'ň' => 'Ň', 'ŋ' => 'Ŋ', 'ō' => 'Ō', 'ŏ' => 'Ŏ', 'ő' => 'Ő', 'œ' => 'Œ',
'ŕ' => 'Ŕ', 'ŗ' => 'Ŗ', 'ř' => 'Ř', 'ś' => 'Ś', 'ŝ' => 'Ŝ', 'ş' => 'Ş',
'š' => 'Š', 'ţ' => 'Ţ', 'ť' => 'Ť', 'ŧ' => 'Ŧ', 'ũ' => 'Ũ', 'ū' => 'Ū',
'ŭ' => 'Ŭ', 'ů' => 'Ů', 'ű' => 'Ű', 'ų' => 'Ų', 'ŵ' => 'Ŵ', 'ŷ' => 'Ŷ',
'ÿ' => 'Ÿ', 'ź' => 'Ź', 'ż' => 'Ż', 'ž' => 'Ž', 'ɓ' => 'Ɓ', 'ƃ' => 'Ƃ',
'ƅ' => 'Ƅ', 'ɔ' => 'Ɔ', 'ƈ' => 'Ƈ', 'ɗ' => 'Ɗ', 'ƌ' => 'Ƌ', 'ɘ' => 'Ǝ',
'ə' => 'Ə', 'ɛ' => 'Ɛ', 'ƒ' => 'Ƒ', 'ɠ' => 'Ɠ', 'ɣ' => 'Ɣ', 'ɩ' => 'Ɩ',
'ɨ' => 'Ɨ', 'ƙ' => 'Ƙ', 'ɯ' => 'Ɯ', 'ɲ' => 'Ɲ', 'ɵ' => 'Ɵ', 'ơ' => 'Ơ',
'ƣ' => 'Ƣ', 'ƥ' => 'Ƥ', 'ƨ' => 'Ƨ', 'ʃ' => 'Ʃ', 'ƭ' => 'Ƭ', 'ʈ' => 'Ʈ',
'ư' => 'Ư', 'ʊ' => 'Ʊ', 'ʋ' => 'Ʋ', 'ƴ' => 'Ƴ', 'ƶ' => 'Ƶ', 'ʒ' => 'Ʒ',
'ƹ' => 'Ƹ', 'ƽ' => 'Ƽ', 'dž' => 'DŽ', 'dž' => 'Dž', 'lj' => 'LJ', 'lj' => 'Lj',
'nj' => 'NJ', 'nj' => 'Nj', 'ǎ' => 'Ǎ', 'ǐ' => 'Ǐ', 'ǒ' => 'Ǒ', 'ǔ' => 'Ǔ',
'ǖ' => 'Ǖ', 'ǘ' => 'Ǘ', 'ǚ' => 'Ǚ', 'ǜ' => 'Ǜ', 'ǟ' => 'Ǟ', 'ǡ' => 'Ǡ',
'ǣ' => 'Ǣ', 'ǥ' => 'Ǥ', 'ǧ' => 'Ǧ', 'ǩ' => 'Ǩ', 'ǫ' => 'Ǫ', 'ǭ' => 'Ǭ',
'ǯ' => 'Ǯ', 'dz' => 'DZ', 'ǵ' => 'Ǵ', 'ǻ' => 'Ǻ', 'ǽ' => 'Ǽ', 'ǿ' => 'Ǿ',
'ȁ' => 'Ȁ', 'ȃ' => 'Ȃ', 'ȅ' => 'Ȅ', 'ȇ' => 'Ȇ', 'ȉ' => 'Ȉ', 'ȋ' => 'Ȋ',
'ȍ' => 'Ȍ', 'ȏ' => 'Ȏ', 'ȑ' => 'Ȑ', 'ȓ' => 'Ȓ', 'ȕ' => 'Ȕ', 'ȗ' => 'Ȗ',
'ά' => 'Ά', 'έ' => 'Έ', 'ή' => 'Ή', 'ί' => 'Ί', 'ό' => 'Ό', 'ύ' => 'Ύ',
'ώ' => 'Ώ', 'α' => 'Α', 'β' => 'Β', 'γ' => 'Γ', 'δ' => 'Δ', 'ε' => 'Ε',
'ζ' => 'Ζ', 'η' => 'Η', 'θ' => 'Θ', 'ι' => 'Ι', 'κ' => 'Κ', 'λ' => 'Λ',
'μ' => 'Μ', 'ν' => 'Ν', 'ξ' => 'Ξ', 'ο' => 'Ο', 'π' => 'Π', 'ρ' => 'Ρ',
'σ' => 'Σ', 'τ' => 'Τ', 'υ' => 'Υ', 'φ' => 'Φ', 'χ' => 'Χ', 'ψ' => 'Ψ',
'ω' => 'Ω', 'ϊ' => 'Ϊ', 'ϋ' => 'Ϋ', 'ϣ' => 'Ϣ', 'ϥ' => 'Ϥ', 'ϧ' => 'Ϧ',
'ϩ' => 'Ϩ', 'ϫ' => 'Ϫ', 'ϭ' => 'Ϭ', 'ϯ' => 'Ϯ', 'ё' => 'Ё', 'ђ' => 'Ђ',
'ѓ' => 'Ѓ', 'є' => 'Є', 'ѕ' => 'Ѕ', 'і' => 'І', 'ї' => 'Ї', 'ј' => 'Ј',
'љ' => 'Љ', 'њ' => 'Њ', 'ћ' => 'Ћ', 'ќ' => 'Ќ', 'ў' => 'Ў', 'џ' => 'Џ',
'а' => 'А', 'б' => 'Б', 'в' => 'В', 'г' => 'Г', 'д' => 'Д', 'е' => 'Е',
'ж' => 'Ж', 'з' => 'З', 'и' => 'И', 'й' => 'Й', 'к' => 'К', 'л' => 'Л',
'м' => 'М', 'н' => 'Н', 'о' => 'О', 'п' => 'П', 'р' => 'Р', 'с' => 'С',
'т' => 'Т', 'у' => 'У', 'ф' => 'Ф', 'х' => 'Х', 'ц' => 'Ц', 'ч' => 'Ч',
'ш' => 'Ш', 'щ' => 'Щ', 'ъ' => 'Ъ', 'ы' => 'Ы', 'ь' => 'Ь', 'э' => 'Э',
'ю' => 'Ю', 'я' => 'Я', 'ѡ' => 'Ѡ', 'ѣ' => 'Ѣ', 'ѥ' => 'Ѥ', 'ѧ' => 'Ѧ',
'ѩ' => 'Ѩ', 'ѫ' => 'Ѫ', 'ѭ' => 'Ѭ', 'ѯ' => 'Ѯ', 'ѱ' => 'Ѱ', 'ѳ' => 'Ѳ',
'ѵ' => 'Ѵ', 'ѷ' => 'Ѷ', 'ѹ' => 'Ѹ', 'ѻ' => 'Ѻ', 'ѽ' => 'Ѽ', 'ѿ' => 'Ѿ',
'ҁ' => 'Ҁ', 'ґ' => 'Ґ', 'ғ' => 'Ғ', 'ҕ' => 'Ҕ', 'җ' => 'Җ', 'ҙ' => 'Ҙ',
'қ' => 'Қ', 'ҝ' => 'Ҝ', 'ҟ' => 'Ҟ', 'ҡ' => 'Ҡ', 'ң' => 'Ң', 'ҥ' => 'Ҥ',
'ҧ' => 'Ҧ', 'ҩ' => 'Ҩ', 'ҫ' => 'Ҫ', 'ҭ' => 'Ҭ', 'ү' => 'Ү', 'ұ' => 'Ұ',
'ҳ' => 'Ҳ', 'ҵ' => 'Ҵ', 'ҷ' => 'Ҷ', 'ҹ' => 'Ҹ', 'һ' => 'Һ', 'ҽ' => 'Ҽ',
'ҿ' => 'Ҿ', 'ӂ' => 'Ӂ', 'ӄ' => 'Ӄ', 'ӈ' => 'Ӈ', 'ӌ' => 'Ӌ', 'ӑ' => 'Ӑ',
'ӓ' => 'Ӓ', 'ӕ' => 'Ӕ', 'ӗ' => 'Ӗ', 'ә' => 'Ә', 'ӛ' => 'Ӛ', 'ӝ' => 'Ӝ',
'ӟ' => 'Ӟ', 'ӡ' => 'Ӡ', 'ӣ' => 'Ӣ', 'ӥ' => 'Ӥ', 'ӧ' => 'Ӧ', 'ө' => 'Ө',
'ӫ' => 'Ӫ', 'ӯ' => 'Ӯ', 'ӱ' => 'Ӱ', 'ӳ' => 'Ӳ', 'ӵ' => 'Ӵ', 'ӹ' => 'Ӹ',
'ա' => 'Ա', 'բ' => 'Բ', 'գ' => 'Գ', 'դ' => 'Դ', 'ե' => 'Ե', 'զ' => 'Զ',
'է' => 'Է', 'ը' => 'Ը', 'թ' => 'Թ', 'ժ' => 'Ժ', 'ի' => 'Ի', 'լ' => 'Լ',
'խ' => 'Խ', 'ծ' => 'Ծ', 'կ' => 'Կ', 'հ' => 'Հ', 'ձ' => 'Ձ', 'ղ' => 'Ղ',
'ճ' => 'Ճ', 'մ' => 'Մ', 'յ' => 'Յ', 'ն' => 'Ն', 'շ' => 'Շ', 'ո' => 'Ո',
'չ' => 'Չ', 'պ' => 'Պ', 'ջ' => 'Ջ', 'ռ' => 'Ռ', 'ս' => 'Ս', 'վ' => 'Վ',
'տ' => 'Տ', 'ր' => 'Ր', 'ց' => 'Ց', 'ւ' => 'Ւ', 'փ' => 'Փ', 'ք' => 'Ք',
'օ' => 'Օ', 'ֆ' => 'Ֆ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ḁ' => 'Ḁ', 'ḃ' => 'Ḃ',
'ḅ' => 'Ḅ', 'ḇ' => 'Ḇ', 'ḉ' => 'Ḉ', 'ḋ' => 'Ḋ', 'ḍ' => 'Ḍ', 'ḏ' => 'Ḏ',
'ḑ' => 'Ḑ', 'ḓ' => 'Ḓ', 'ḕ' => 'Ḕ', 'ḗ' => 'Ḗ', 'ḙ' => 'Ḙ', 'ḛ' => 'Ḛ',
'ḝ' => 'Ḝ', 'ḟ' => 'Ḟ', 'ḡ' => 'Ḡ', 'ḣ' => 'Ḣ', 'ḥ' => 'Ḥ', 'ḧ' => 'Ḧ',
'ḩ' => 'Ḩ', 'ḫ' => 'Ḫ', 'ḭ' => 'Ḭ', 'ḯ' => 'Ḯ', 'ḱ' => 'Ḱ', 'ḳ' => 'Ḳ',
'ḵ' => 'Ḵ', 'ḷ' => 'Ḷ', 'ḹ' => 'Ḹ', 'ḻ' => 'Ḻ', 'ḽ' => 'Ḽ', 'ḿ' => 'Ḿ',
'ṁ' => 'Ṁ', 'ṃ' => 'Ṃ', 'ṅ' => 'Ṅ', 'ṇ' => 'Ṇ', 'ṉ' => 'Ṉ', 'ṋ' => 'Ṋ',
'ṍ' => 'Ṍ', 'ṏ' => 'Ṏ', 'ṑ' => 'Ṑ', 'ṓ' => 'Ṓ', 'ṕ' => 'Ṕ', 'ṗ' => 'Ṗ',
'ṙ' => 'Ṙ', 'ṛ' => 'Ṛ', 'ṝ' => 'Ṝ', 'ṟ' => 'Ṟ', 'ṡ' => 'Ṡ', 'ṣ' => 'Ṣ',
'ṥ' => 'Ṥ', 'ṧ' => 'Ṧ', 'ṩ' => 'Ṩ', 'ṫ' => 'Ṫ', 'ṭ' => 'Ṭ', 'ṯ' => 'Ṯ',
'ṱ' => 'Ṱ', 'ṳ' => 'Ṳ', 'ṵ' => 'Ṵ', 'ṷ' => 'Ṷ', 'ṹ' => 'Ṹ', 'ṻ' => 'Ṻ',
'ṽ' => 'Ṽ', 'ṿ' => 'Ṿ', 'ẁ' => 'Ẁ', 'ẃ' => 'Ẃ', 'ẅ' => 'Ẅ', 'ẇ' => 'Ẇ',
'ẉ' => 'Ẉ', 'ẋ' => 'Ẋ', 'ẍ' => 'Ẍ', 'ẏ' => 'Ẏ', 'ẑ' => 'Ẑ', 'ẓ' => 'Ẓ',
'ẕ' => 'Ẕ', 'ạ' => 'Ạ', 'ả' => 'Ả', 'ấ' => 'Ấ', 'ầ' => 'Ầ', 'ẩ' => 'Ẩ',
'ẫ' => 'Ẫ', 'ậ' => 'Ậ', 'ắ' => 'Ắ', 'ằ' => 'Ằ', 'ẳ' => 'Ẳ', 'ẵ' => 'Ẵ',
'ặ' => 'Ặ', 'ẹ' => 'Ẹ', 'ẻ' => 'Ẻ', 'ẽ' => 'Ẽ', 'ế' => 'Ế', 'ề' => 'Ề',
'ể' => 'Ể', 'ễ' => 'Ễ', 'ệ' => 'Ệ', 'ỉ' => 'Ỉ', 'ị' => 'Ị', 'ọ' => 'Ọ',
'ỏ' => 'Ỏ', 'ố' => 'Ố', 'ồ' => 'Ồ', 'ổ' => 'Ổ', 'ỗ' => 'Ỗ', 'ộ' => 'Ộ',
'ớ' => 'Ớ', 'ờ' => 'Ờ', 'ở' => 'Ở', 'ỡ' => 'Ỡ', 'ợ' => 'Ợ', 'ụ' => 'Ụ',
'ủ' => 'Ủ', 'ứ' => 'Ứ', 'ừ' => 'Ừ', 'ử' => 'Ử', 'ữ' => 'Ữ', 'ự' => 'Ự',
'ỳ' => 'Ỳ', 'ỵ' => 'Ỵ', 'ỷ' => 'Ỷ', 'ỹ' => 'Ỹ', 'ἀ' => 'Ἀ', 'ἁ' => 'Ἁ',
'ἂ' => 'Ἂ', 'ἃ' => 'Ἃ', 'ἄ' => 'Ἄ', 'ἅ' => 'Ἅ', 'ἆ' => 'Ἆ', 'ἇ' => 'Ἇ',
'ἐ' => 'Ἐ', 'ἑ' => 'Ἑ', 'ἒ' => 'Ἒ', 'ἓ' => 'Ἓ', 'ἔ' => 'Ἔ', 'ἕ' => 'Ἕ',
'ἠ' => 'Ἠ', 'ἡ' => 'Ἡ', 'ἢ' => 'Ἢ', 'ἣ' => 'Ἣ', 'ἤ' => 'Ἤ', 'ἥ' => 'Ἥ',
'ἦ' => 'Ἦ', 'ἧ' => 'Ἧ', 'ἰ' => 'Ἰ', 'ἱ' => 'Ἱ', 'ἲ' => 'Ἲ', 'ἳ' => 'Ἳ',
'ἴ' => 'Ἴ', 'ἵ' => 'Ἵ', 'ἶ' => 'Ἶ', 'ἷ' => 'Ἷ', 'ὀ' => 'Ὀ', 'ὁ' => 'Ὁ',
'ὂ' => 'Ὂ', 'ὃ' => 'Ὃ', 'ὄ' => 'Ὄ', 'ὅ' => 'Ὅ', 'ὑ' => 'Ὑ', 'ὓ' => 'Ὓ',
'ὕ' => 'Ὕ', 'ὗ' => 'Ὗ', 'ὠ' => 'Ὠ', 'ὡ' => 'Ὡ', 'ὢ' => 'Ὢ', 'ὣ' => 'Ὣ',
'ὤ' => 'Ὤ', 'ὥ' => 'Ὥ', 'ὦ' => 'Ὦ', 'ὧ' => 'Ὧ', 'ᾀ' => 'ᾈ', 'ᾁ' => 'ᾉ',
'ᾂ' => 'ᾊ', 'ᾃ' => 'ᾋ', 'ᾄ' => 'ᾌ', 'ᾅ' => 'ᾍ', 'ᾆ' => 'ᾎ', 'ᾇ' => 'ᾏ',
'ᾐ' => 'ᾘ', 'ᾑ' => 'ᾙ', 'ᾒ' => 'ᾚ', 'ᾓ' => 'ᾛ', 'ᾔ' => 'ᾜ', 'ᾕ' => 'ᾝ',
'ᾖ' => 'ᾞ', 'ᾗ' => 'ᾟ', 'ᾠ' => 'ᾨ', 'ᾡ' => 'ᾩ', 'ᾢ' => 'ᾪ', 'ᾣ' => 'ᾫ',
'ᾤ' => 'ᾬ', 'ᾥ' => 'ᾭ', 'ᾦ' => 'ᾮ', 'ᾧ' => 'ᾯ', 'ᾰ' => 'Ᾰ', 'ᾱ' => 'Ᾱ',
'ῐ' => 'Ῐ', 'ῑ' => 'Ῑ', 'ῠ' => 'Ῠ', 'ῡ' => 'Ῡ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ',
'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
'y' => 'Y', 'z' => 'Z'
);
/**
* All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
*
* @var array
*/
static private $mb_lower_to_upper_fix = array(
'ɘ' => 'Ǝ', 'Dz' => 'DZ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ'
);
/**
* All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
*
* @var array
*/
static private $mb_upper_to_lower_fix = array(
'ǝ' => 'ɘ', 'Dž' => 'dž', 'Lj' => 'lj', 'Nj' => 'nj', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ',
'Ⴂ' => 'გ', 'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ',
'Ⴈ' => 'ი', 'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო',
'Ⴎ' => 'პ', 'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ',
'Ⴔ' => 'ფ', 'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ',
'Ⴚ' => 'ც', 'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ',
'Ⴠ' => 'ჰ', 'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ',
'ᾈ' => 'ᾀ', 'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ',
'ᾎ' => 'ᾆ', 'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ',
'ᾜ' => 'ᾔ', 'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ',
'ᾪ' => 'ᾢ', 'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ',
'Ⓐ' => 'ⓐ', 'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ',
'Ⓖ' => 'ⓖ', 'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ',
'Ⓜ' => 'ⓜ', 'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ',
'Ⓢ' => 'ⓢ', 'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ',
'Ⓨ' => 'ⓨ', 'Ⓩ' => 'ⓩ'
);
/**
* All uppercase UTF-8 characters mapped to lowercase characters
*
* @var array
*/
static private $upper_to_lower = array(
'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
'Y' => 'y', 'Z' => 'z', 'À' => 'à', 'Á' => 'á', 'Â' => 'â', 'Ã' => 'ã',
'Ä' => 'ä', 'Å' => 'å', 'Æ' => 'æ', 'Ç' => 'ç', 'È' => 'è', 'É' => 'é',
'Ê' => 'ê', 'Ë' => 'ë', 'Ì' => 'ì', 'Í' => 'í', 'Î' => 'î', 'Ï' => 'ï',
'Ð' => 'ð', 'Ñ' => 'ñ', 'Ò' => 'ò', 'Ó' => 'ó', 'Ô' => 'ô', 'Õ' => 'õ',
'Ö' => 'ö', 'Ø' => 'ø', 'Ù' => 'ù', 'Ú' => 'ú', 'Û' => 'û', 'Ü' => 'ü',
'Ý' => 'ý', 'Þ' => 'þ', 'Ā' => 'ā', 'Ă' => 'ă', 'Ą' => 'ą', 'Ć' => 'ć',
'Ĉ' => 'ĉ', 'Ċ' => 'ċ', 'Č' => 'č', 'Ď' => 'ď', 'Đ' => 'đ', 'Ē' => 'ē',
'Ĕ' => 'ĕ', 'Ė' => 'ė', 'Ę' => 'ę', 'Ě' => 'ě', 'Ĝ' => 'ĝ', 'Ğ' => 'ğ',
'Ġ' => 'ġ', 'Ģ' => 'ģ', 'Ĥ' => 'ĥ', 'Ħ' => 'ħ', 'Ĩ' => 'ĩ', 'Ī' => 'ī',
'Ĭ' => 'ĭ', 'Į' => 'į', 'İ' => 'i', 'IJ' => 'ij', 'Ĵ' => 'ĵ', 'Ķ' => 'ķ',
'Ĺ' => 'ĺ', 'Ļ' => 'ļ', 'Ľ' => 'ľ', 'Ŀ' => 'ŀ', 'Ł' => 'ł', 'Ń' => 'ń',
'Ņ' => 'ņ', 'Ň' => 'ň', 'Ŋ' => 'ŋ', 'Ō' => 'ō', 'Ŏ' => 'ŏ', 'Ő' => 'ő',
'Œ' => 'œ', 'Ŕ' => 'ŕ', 'Ŗ' => 'ŗ', 'Ř' => 'ř', 'Ś' => 'ś', 'Ŝ' => 'ŝ',
'Ş' => 'ş', 'Š' => 'š', 'Ţ' => 'ţ', 'Ť' => 'ť', 'Ŧ' => 'ŧ', 'Ũ' => 'ũ',
'Ū' => 'ū', 'Ŭ' => 'ŭ', 'Ů' => 'ů', 'Ű' => 'ű', 'Ų' => 'ų', 'Ŵ' => 'ŵ',
'Ŷ' => 'ŷ', 'Ÿ' => 'ÿ', 'Ź' => 'ź', 'Ż' => 'ż', 'Ž' => 'ž', 'Ɓ' => 'ɓ',
'Ƃ' => 'ƃ', 'Ƅ' => 'ƅ', 'Ɔ' => 'ɔ', 'Ƈ' => 'ƈ', 'Ɗ' => 'ɗ', 'Ƌ' => 'ƌ',
'Ǝ' => 'ɘ', 'Ə' => 'ə', 'Ɛ' => 'ɛ', 'Ƒ' => 'ƒ', 'Ɠ' => 'ɠ', 'Ɣ' => 'ɣ',
'Ɩ' => 'ɩ', 'Ɨ' => 'ɨ', 'Ƙ' => 'ƙ', 'Ɯ' => 'ɯ', 'Ɲ' => 'ɲ', 'Ɵ' => 'ɵ',
'Ơ' => 'ơ', 'Ƣ' => 'ƣ', 'Ƥ' => 'ƥ', 'Ƨ' => 'ƨ', 'Ʃ' => 'ʃ', 'Ƭ' => 'ƭ',
'Ʈ' => 'ʈ', 'Ư' => 'ư', 'Ʊ' => 'ʊ', 'Ʋ' => 'ʋ', 'Ƴ' => 'ƴ', 'Ƶ' => 'ƶ',
'Ʒ' => 'ʒ', 'Ƹ' => 'ƹ', 'Ƽ' => 'ƽ', 'DŽ' => 'dž', 'Dž' => 'dž', 'LJ' => 'lj',
'Lj' => 'lj', 'NJ' => 'nj', 'Nj' => 'nj', 'Ǎ' => 'ǎ', 'Ǐ' => 'ǐ', 'Ǒ' => 'ǒ',
'Ǔ' => 'ǔ', 'Ǖ' => 'ǖ', 'Ǘ' => 'ǘ', 'Ǚ' => 'ǚ', 'Ǜ' => 'ǜ', 'Ǟ' => 'ǟ',
'Ǡ' => 'ǡ', 'Ǣ' => 'ǣ', 'Ǥ' => 'ǥ', 'Ǧ' => 'ǧ', 'Ǩ' => 'ǩ', 'Ǫ' => 'ǫ',
'Ǭ' => 'ǭ', 'Ǯ' => 'ǯ', 'DZ' => 'dz', 'Ǵ' => 'ǵ', 'Ǻ' => 'ǻ', 'Ǽ' => 'ǽ',
'Ǿ' => 'ǿ', 'Ȁ' => 'ȁ', 'Ȃ' => 'ȃ', 'Ȅ' => 'ȅ', 'Ȇ' => 'ȇ', 'Ȉ' => 'ȉ',
'Ȋ' => 'ȋ', 'Ȍ' => 'ȍ', 'Ȏ' => 'ȏ', 'Ȑ' => 'ȑ', 'Ȓ' => 'ȓ', 'Ȕ' => 'ȕ',
'Ȗ' => 'ȗ', 'Ά' => 'ά', 'Έ' => 'έ', 'Ή' => 'ή', 'Ί' => 'ί', 'Ό' => 'ό',
'Ύ' => 'ύ', 'Ώ' => 'ώ', 'Α' => 'α', 'Β' => 'β', 'Γ' => 'γ', 'Δ' => 'δ',
'Ε' => 'ε', 'Ζ' => 'ζ', 'Η' => 'η', 'Θ' => 'θ', 'Ι' => 'ι', 'Κ' => 'κ',
'Λ' => 'λ', 'Μ' => 'μ', 'Ν' => 'ν', 'Ξ' => 'ξ', 'Ο' => 'ο', 'Π' => 'π',
'Ρ' => 'ρ', 'Σ' => 'σ', 'Τ' => 'τ', 'Υ' => 'υ', 'Φ' => 'φ', 'Χ' => 'χ',
'Ψ' => 'ψ', 'Ω' => 'ω', 'Ϊ' => 'ϊ', 'Ϋ' => 'ϋ', 'Ϣ' => 'ϣ', 'Ϥ' => 'ϥ',
'Ϧ' => 'ϧ', 'Ϩ' => 'ϩ', 'Ϫ' => 'ϫ', 'Ϭ' => 'ϭ', 'Ϯ' => 'ϯ', 'Ё' => 'ё',
'Ђ' => 'ђ', 'Ѓ' => 'ѓ', 'Є' => 'є', 'Ѕ' => 'ѕ', 'І' => 'і', 'Ї' => 'ї',
'Ј' => 'ј', 'Љ' => 'љ', 'Њ' => 'њ', 'Ћ' => 'ћ', 'Ќ' => 'ќ', 'Ў' => 'ў',
'Џ' => 'џ', 'А' => 'а', 'Б' => 'б', 'В' => 'в', 'Г' => 'г', 'Д' => 'д',
'Е' => 'е', 'Ж' => 'ж', 'З' => 'з', 'И' => 'и', 'Й' => 'й', 'К' => 'к',
'Л' => 'л', 'М' => 'м', 'Н' => 'н', 'О' => 'о', 'П' => 'п', 'Р' => 'р',
'С' => 'с', 'Т' => 'т', 'У' => 'у', 'Ф' => 'ф', 'Х' => 'х', 'Ц' => 'ц',
'Ч' => 'ч', 'Ш' => 'ш', 'Щ' => 'щ', 'Ъ' => 'ъ', 'Ы' => 'ы', 'Ь' => 'ь',
'Э' => 'э', 'Ю' => 'ю', 'Я' => 'я', 'Ѡ' => 'ѡ', 'Ѣ' => 'ѣ', 'Ѥ' => 'ѥ',
'Ѧ' => 'ѧ', 'Ѩ' => 'ѩ', 'Ѫ' => 'ѫ', 'Ѭ' => 'ѭ', 'Ѯ' => 'ѯ', 'Ѱ' => 'ѱ',
'Ѳ' => 'ѳ', 'Ѵ' => 'ѵ', 'Ѷ' => 'ѷ', 'Ѹ' => 'ѹ', 'Ѻ' => 'ѻ', 'Ѽ' => 'ѽ',
'Ѿ' => 'ѿ', 'Ҁ' => 'ҁ', 'Ґ' => 'ґ', 'Ғ' => 'ғ', 'Ҕ' => 'ҕ', 'Җ' => 'җ',
'Ҙ' => 'ҙ', 'Қ' => 'қ', 'Ҝ' => 'ҝ', 'Ҟ' => 'ҟ', 'Ҡ' => 'ҡ', 'Ң' => 'ң',
'Ҥ' => 'ҥ', 'Ҧ' => 'ҧ', 'Ҩ' => 'ҩ', 'Ҫ' => 'ҫ', 'Ҭ' => 'ҭ', 'Ү' => 'ү',
'Ұ' => 'ұ', 'Ҳ' => 'ҳ', 'Ҵ' => 'ҵ', 'Ҷ' => 'ҷ', 'Ҹ' => 'ҹ', 'Һ' => 'һ',
'Ҽ' => 'ҽ', 'Ҿ' => 'ҿ', 'Ӂ' => 'ӂ', 'Ӄ' => 'ӄ', 'Ӈ' => 'ӈ', 'Ӌ' => 'ӌ',
'Ӑ' => 'ӑ', 'Ӓ' => 'ӓ', 'Ӕ' => 'ӕ', 'Ӗ' => 'ӗ', 'Ә' => 'ә', 'Ӛ' => 'ӛ',
'Ӝ' => 'ӝ', 'Ӟ' => 'ӟ', 'Ӡ' => 'ӡ', 'Ӣ' => 'ӣ', 'Ӥ' => 'ӥ', 'Ӧ' => 'ӧ',
'Ө' => 'ө', 'Ӫ' => 'ӫ', 'Ӯ' => 'ӯ', 'Ӱ' => 'ӱ', 'Ӳ' => 'ӳ', 'Ӵ' => 'ӵ',
'Ӹ' => 'ӹ', 'Ա' => 'ա', 'Բ' => 'բ', 'Գ' => 'գ', 'Դ' => 'դ', 'Ե' => 'ե',
'Զ' => 'զ', 'Է' => 'է', 'Ը' => 'ը', 'Թ' => 'թ', 'Ժ' => 'ժ', 'Ի' => 'ի',
'Լ' => 'լ', 'Խ' => 'խ', 'Ծ' => 'ծ', 'Կ' => 'կ', 'Հ' => 'հ', 'Ձ' => 'ձ',
'Ղ' => 'ղ', 'Ճ' => 'ճ', 'Մ' => 'մ', 'Յ' => 'յ', 'Ն' => 'ն', 'Շ' => 'շ',
'Ո' => 'ո', 'Չ' => 'չ', 'Պ' => 'պ', 'Ջ' => 'ջ', 'Ռ' => 'ռ', 'Ս' => 'ս',
'Վ' => 'վ', 'Տ' => 'տ', 'Ր' => 'ր', 'Ց' => 'ց', 'Ւ' => 'ւ', 'Փ' => 'փ',
'Ք' => 'ք', 'Օ' => 'օ', 'Ֆ' => 'ֆ', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ', 'Ⴂ' => 'გ',
'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ', 'Ⴈ' => 'ი',
'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო', 'Ⴎ' => 'პ',
'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ', 'Ⴔ' => 'ფ',
'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ', 'Ⴚ' => 'ც',
'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ', 'Ⴠ' => 'ჰ',
'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ', 'Ḁ' => 'ḁ',
'Ḃ' => 'ḃ', 'Ḅ' => 'ḅ', 'Ḇ' => 'ḇ', 'Ḉ' => 'ḉ', 'Ḋ' => 'ḋ', 'Ḍ' => 'ḍ',
'Ḏ' => 'ḏ', 'Ḑ' => 'ḑ', 'Ḓ' => 'ḓ', 'Ḕ' => 'ḕ', 'Ḗ' => 'ḗ', 'Ḙ' => 'ḙ',
'Ḛ' => 'ḛ', 'Ḝ' => 'ḝ', 'Ḟ' => 'ḟ', 'Ḡ' => 'ḡ', 'Ḣ' => 'ḣ', 'Ḥ' => 'ḥ',
'Ḧ' => 'ḧ', 'Ḩ' => 'ḩ', 'Ḫ' => 'ḫ', 'Ḭ' => 'ḭ', 'Ḯ' => 'ḯ', 'Ḱ' => 'ḱ',
'Ḳ' => 'ḳ', 'Ḵ' => 'ḵ', 'Ḷ' => 'ḷ', 'Ḹ' => 'ḹ', 'Ḻ' => 'ḻ', 'Ḽ' => 'ḽ',
'Ḿ' => 'ḿ', 'Ṁ' => 'ṁ', 'Ṃ' => 'ṃ', 'Ṅ' => 'ṅ', 'Ṇ' => 'ṇ', 'Ṉ' => 'ṉ',
'Ṋ' => 'ṋ', 'Ṍ' => 'ṍ', 'Ṏ' => 'ṏ', 'Ṑ' => 'ṑ', 'Ṓ' => 'ṓ', 'Ṕ' => 'ṕ',
'Ṗ' => 'ṗ', 'Ṙ' => 'ṙ', 'Ṛ' => 'ṛ', 'Ṝ' => 'ṝ', 'Ṟ' => 'ṟ', 'Ṡ' => 'ṡ',
'Ṣ' => 'ṣ', 'Ṥ' => 'ṥ', 'Ṧ' => 'ṧ', 'Ṩ' => 'ṩ', 'Ṫ' => 'ṫ', 'Ṭ' => 'ṭ',
'Ṯ' => 'ṯ', 'Ṱ' => 'ṱ', 'Ṳ' => 'ṳ', 'Ṵ' => 'ṵ', 'Ṷ' => 'ṷ', 'Ṹ' => 'ṹ',
'Ṻ' => 'ṻ', 'Ṽ' => 'ṽ', 'Ṿ' => 'ṿ', 'Ẁ' => 'ẁ', 'Ẃ' => 'ẃ', 'Ẅ' => 'ẅ',
'Ẇ' => 'ẇ', 'Ẉ' => 'ẉ', 'Ẋ' => 'ẋ', 'Ẍ' => 'ẍ', 'Ẏ' => 'ẏ', 'Ẑ' => 'ẑ',
'Ẓ' => 'ẓ', 'Ẕ' => 'ẕ', 'Ạ' => 'ạ', 'Ả' => 'ả', 'Ấ' => 'ấ', 'Ầ' => 'ầ',
'Ẩ' => 'ẩ', 'Ẫ' => 'ẫ', 'Ậ' => 'ậ', 'Ắ' => 'ắ', 'Ằ' => 'ằ', 'Ẳ' => 'ẳ',
'Ẵ' => 'ẵ', 'Ặ' => 'ặ', 'Ẹ' => 'ẹ', 'Ẻ' => 'ẻ', 'Ẽ' => 'ẽ', 'Ế' => 'ế',
'Ề' => 'ề', 'Ể' => 'ể', 'Ễ' => 'ễ', 'Ệ' => 'ệ', 'Ỉ' => 'ỉ', 'Ị' => 'ị',
'Ọ' => 'ọ', 'Ỏ' => 'ỏ', 'Ố' => 'ố', 'Ồ' => 'ồ', 'Ổ' => 'ổ', 'Ỗ' => 'ỗ',
'Ộ' => 'ộ', 'Ớ' => 'ớ', 'Ờ' => 'ờ', 'Ở' => 'ở', 'Ỡ' => 'ỡ', 'Ợ' => 'ợ',
'Ụ' => 'ụ', 'Ủ' => 'ủ', 'Ứ' => 'ứ', 'Ừ' => 'ừ', 'Ử' => 'ử', 'Ữ' => 'ữ',
'Ự' => 'ự', 'Ỳ' => 'ỳ', 'Ỵ' => 'ỵ', 'Ỷ' => 'ỷ', 'Ỹ' => 'ỹ', 'Ἀ' => 'ἀ',
'Ἁ' => 'ἁ', 'Ἂ' => 'ἂ', 'Ἃ' => 'ἃ', 'Ἄ' => 'ἄ', 'Ἅ' => 'ἅ', 'Ἆ' => 'ἆ',
'Ἇ' => 'ἇ', 'Ἐ' => 'ἐ', 'Ἑ' => 'ἑ', 'Ἒ' => 'ἒ', 'Ἓ' => 'ἓ', 'Ἔ' => 'ἔ',
'Ἕ' => 'ἕ', 'Ἠ' => 'ἠ', 'Ἡ' => 'ἡ', 'Ἢ' => 'ἢ', 'Ἣ' => 'ἣ', 'Ἤ' => 'ἤ',
'Ἥ' => 'ἥ', 'Ἦ' => 'ἦ', 'Ἧ' => 'ἧ', 'Ἰ' => 'ἰ', 'Ἱ' => 'ἱ', 'Ἲ' => 'ἲ',
'Ἳ' => 'ἳ', 'Ἴ' => 'ἴ', 'Ἵ' => 'ἵ', 'Ἶ' => 'ἶ', 'Ἷ' => 'ἷ', 'Ὀ' => 'ὀ',
'Ὁ' => 'ὁ', 'Ὂ' => 'ὂ', 'Ὃ' => 'ὃ', 'Ὄ' => 'ὄ', 'Ὅ' => 'ὅ', 'Ὑ' => 'ὑ',
'Ὓ' => 'ὓ', 'Ὕ' => 'ὕ', 'Ὗ' => 'ὗ', 'Ὠ' => 'ὠ', 'Ὡ' => 'ὡ', 'Ὢ' => 'ὢ',
'Ὣ' => 'ὣ', 'Ὤ' => 'ὤ', 'Ὥ' => 'ὥ', 'Ὦ' => 'ὦ', 'Ὧ' => 'ὧ', 'ᾈ' => 'ᾀ',
'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ', 'ᾎ' => 'ᾆ',
'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ', 'ᾜ' => 'ᾔ',
'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ', 'ᾪ' => 'ᾢ',
'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ', 'Ᾰ' => 'ᾰ',
'Ᾱ' => 'ᾱ', 'Ῐ' => 'ῐ', 'Ῑ' => 'ῑ', 'Ῠ' => 'ῠ', 'Ῡ' => 'ῡ', 'Ⓐ' => 'ⓐ',
'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ', 'Ⓖ' => 'ⓖ',
'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ', 'Ⓜ' => 'ⓜ',
'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ', 'Ⓢ' => 'ⓢ',
'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ', 'Ⓨ' => 'ⓨ',
'Ⓩ' => 'ⓩ', 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k',
'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q',
'R' => 'r', 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w',
'X' => 'x', 'Y' => 'y', 'Z' => 'z'
);
/**
* A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
*
* Includes elements form the following unicode blocks:
*
* - Latin-1 Supplement
* - Latin Extended-A
* - Latin Extended-B
* - IPA Extensions
* - Latin Extended Additional
* - General Punctuation
* - Letterlike symbols
* - Number Forms
*
* @var array
*/
static private $utf8_to_ascii = array(
// Latin-1 Supplement
'©' => '(c)', '«' => '<<', '®' => '(R)', '»' => '>>', '¼' => '1/4',
'½' => '1/2', '¾' => '3/4', 'À' => 'A', 'Á' => 'A', 'Â' => 'A',
'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C',
'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I',
'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O',
'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O',
'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y',
'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a',
'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i',
'ï' => 'i', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o',
'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u',
'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'ÿ' => 'y',
// Latin Extended-A
'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A',
'ą' => 'a', 'Ć' => 'C', 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c',
'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c', 'Ď' => 'D',
'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e',
'Ĕ' => 'E', 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E',
'ę' => 'e', 'Ě' => 'E', 'ě' => 'e', 'Ĝ' => 'G', 'ĝ' => 'g',
'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h',
'Ĩ' => 'I', 'ĩ' => 'i', 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I',
'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I', 'ı' => 'i',
'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K',
'ķ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l',
'Ľ' => 'L', 'ľ' => 'l', 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L',
'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N', 'ņ' => 'n',
'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'N', 'ŋ' => 'n',
'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O',
'ő' => 'o', 'Œ' => 'OE', 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r',
'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r', 'Ś' => 'S',
'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's',
'Š' => 'S', 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T',
'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't', 'Ũ' => 'U', 'ũ' => 'u',
'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u',
'Ŵ' => 'W', 'ŵ' => 'w', 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y',
'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z', 'Ž' => 'Z',
'ž' => 'z',
// Latin Extended-B
'ƀ' => 'b', 'Ɓ' => 'B', 'Ƃ' => 'B', 'ƃ' => 'b', 'Ɔ' => 'O',
'Ƈ' => 'C', 'ƈ' => 'c', 'Ɖ' => 'D', 'Ɗ' => 'D', 'Ƌ' => 'D',
'ƌ' => 'd', 'Ǝ' => 'E', 'Ɛ' => 'E', 'Ƒ' => 'F', 'ƒ' => 'f',
'Ɠ' => 'G', 'Ɨ' => 'I', 'Ƙ' => 'K', 'ƙ' => 'k', 'ƚ' => 'l',
'Ɯ' => 'M', 'Ɲ' => 'N', 'ƞ' => 'n', 'Ɵ' => 'O', 'Ơ' => 'O',
'ơ' => 'o', 'Ƣ' => 'OI', 'ƣ' => 'oi', 'Ƥ' => 'P', 'ƥ' => 'p',
'ƫ' => 't', 'Ƭ' => 'T', 'ƭ' => 't', 'Ʈ' => 'T', 'Ư' => 'U',
'ư' => 'u', 'Ʋ' => 'V', 'Ƴ' => 'Y', 'ƴ' => 'y', 'Ƶ' => 'Z',
'ƶ' => 'z', 'ƻ' => '2', 'DŽ' => 'DZ', 'Dž' => 'Dz', 'dž' => 'dz',
'LJ' => 'LJ', 'Lj' => 'Lj', 'lj' => 'lj', 'NJ' => 'Nj', 'Nj' => 'Nj',
'nj' => 'nj', 'Ǎ' => 'A', 'ǎ' => 'a', 'Ǐ' => 'I', 'ǐ' => 'i',
'Ǒ' => 'O', 'ǒ' => 'o', 'Ǔ' => 'U', 'ǔ' => 'u', 'Ǖ' => 'U',
'ǖ' => 'u', 'Ǘ' => 'U', 'ǘ' => 'u', 'Ǚ' => 'U', 'ǚ' => 'u',
'Ǜ' => 'U', 'ǜ' => 'u', 'ǝ' => 'e', 'Ǟ' => 'A', 'ǟ' => 'a',
'Ǡ' => 'A', 'ǡ' => 'a', 'Ǣ' => 'AE', 'ǣ' => 'ae', 'Ǥ' => 'G',
'ǥ' => 'g', 'Ǧ' => 'G', 'ǧ' => 'g', 'Ǩ' => 'K', 'ǩ' => 'k',
'Ǫ' => 'O', 'ǫ' => 'o', 'Ǭ' => 'O', 'ǭ' => 'o', 'ǰ' => 'j',
'DZ' => 'DZ', 'Dz' => 'Dz', 'dz' => 'dz', 'Ǵ' => 'G', 'ǵ' => 'g',
'Ǹ' => 'N', 'ǹ' => 'n', 'Ǻ' => 'A', 'ǻ' => 'a', 'Ǽ' => 'AE',
'ǽ' => 'ae', 'Ǿ' => 'O', 'ǿ' => 'o', 'Ȁ' => 'A', 'ȁ' => 'a',
'Ȃ' => 'A', 'ȃ' => 'a', 'Ȅ' => 'E', 'ȅ' => 'e', 'Ȇ' => 'E',
'ȇ' => 'e', 'Ȉ' => 'I', 'ȉ' => 'i', 'Ȋ' => 'I', 'ȋ' => 'i',
'Ȍ' => 'O', 'ȍ' => 'o', 'Ȏ' => 'O', 'ȏ' => 'o', 'Ȑ' => 'R',
'ȑ' => 'r', 'Ȓ' => 'R', 'ȓ' => 'r', 'Ȕ' => 'U', 'ȕ' => 'u',
'Ȗ' => 'U', 'ȗ' => 'u', 'Ș' => 'S', 'ș' => 's', 'Ț' => 'T',
'ț' => 't', 'Ȟ' => 'H', 'ȟ' => 'h', 'Ƞ' => 'N', 'ȡ' => 'd',
'Ȥ' => 'Z', 'ȥ' => 'z', 'Ȧ' => 'A', 'ȧ' => 'a', 'Ȩ' => 'E',
'ȩ' => 'e', 'Ȫ' => 'O', 'ȫ' => 'o', 'Ȭ' => 'O', 'ȭ' => 'o',
'Ȯ' => 'O', 'ȯ' => 'o', 'Ȱ' => 'O', 'ȱ' => 'o', 'Ȳ' => 'Y',
'ȳ' => 'y', 'ȴ' => 'l', 'ȵ' => 'n', 'ȶ' => 't', 'ȷ' => 'j',
'ȸ' => 'db', 'ȹ' => 'qp', 'Ⱥ' => 'A', 'Ȼ' => 'C', 'ȼ' => 'c',
'Ƚ' => 'L', 'Ⱦ' => 'T', 'ȿ' => 's', 'ɀ' => 'z', 'Ƀ' => 'B',
'Ʉ' => 'U', 'Ʌ' => 'V', 'Ɇ' => 'E', 'ɇ' => 'e', 'Ɉ' => 'J',
'ɉ' => 'j', 'Ɋ' => 'Q', 'ɋ' => 'q', 'Ɍ' => 'R', 'ɍ' => 'r',
'Ɏ' => 'Y', 'ɏ' => 'y',
// IPA Extensions
'ɐ' => 'a', 'ɓ' => 'b', 'ɔ' => 'o', 'ɕ' => 'c', 'ɖ' => 'd',
'ɗ' => 'd', 'ɘ' => 'e', 'ɛ' => 'e', 'ɜ' => 'e', 'ɝ' => 'e',
'ɞ' => 'e', 'ɟ' => 'j', 'ɠ' => 'g', 'ɡ' => 'g', 'ɢ' => 'G',
'ɥ' => 'h', 'ɦ' => 'h', 'ɨ' => 'i', 'ɪ' => 'I', 'ɫ' => 'l',
'ɬ' => 'l', 'ɭ' => 'l', 'ɯ' => 'm', 'ɰ' => 'm', 'ɱ' => 'm',
'ɲ' => 'n', 'ɳ' => 'n', 'ɴ' => 'N', 'ɵ' => 'o', 'ɶ' => 'OE',
'ɹ' => 'r', 'ɺ' => 'r', 'ɻ' => 'r', 'ɼ' => 'r', 'ɽ' => 'r',
'ɾ' => 'r', 'ɿ' => 'r', 'ʀ' => 'R', 'ʁ' => 'R', 'ʂ' => 's',
'ʇ' => 't', 'ʈ' => 't', 'ʉ' => 'u', 'ʋ' => 'v', 'ʌ' => 'v',
'ʍ' => 'w', 'ʎ' => 'y', 'ʏ' => 'Y', 'ʐ' => 'z', 'ʑ' => 'z',
'ʗ' => 'C', 'ʙ' => 'B', 'ʚ' => 'e', 'ʛ' => 'G', 'ʜ' => 'H',
'ʝ' => 'j', 'ʞ' => 'k', 'ʟ' => 'L', 'ʠ' => 'q', 'ʣ' => 'dz',
'ʥ' => 'dz', 'ʦ' => 'ts', 'ʨ' => 'tc', 'ʪ' => 'ls', 'ʫ' => 'lz',
'ʮ' => 'h', 'ʯ' => 'h',
// Latin Extended Additional
'Ḁ' => 'A', 'ḁ' => 'a', 'Ḃ' => 'B', 'ḃ' => 'b', 'Ḅ' => 'B',
'ḅ' => 'b', 'Ḇ' => 'B', 'ḇ' => 'b', 'Ḉ' => 'C', 'ḉ' => 'c',
'Ḋ' => 'D', 'ḋ' => 'd', 'Ḍ' => 'D', 'ḍ' => 'd', 'Ḏ' => 'D',
'ḏ' => 'd', 'Ḑ' => 'D', 'ḑ' => 'd', 'Ḓ' => 'D', 'ḓ' => 'd',
'Ḕ' => 'E', 'ḕ' => 'e', 'Ḗ' => 'E', 'ḗ' => 'e', 'Ḙ' => 'E',
'ḙ' => 'e', 'Ḛ' => 'E', 'ḛ' => 'e', 'Ḝ' => 'E', 'ḝ' => 'e',
'Ḟ' => 'F', 'ḟ' => 'f', 'Ḡ' => 'G', 'ḡ' => 'g', 'Ḣ' => 'H',
'ḣ' => 'h', 'Ḥ' => 'H', 'ḥ' => 'h', 'Ḧ' => 'H', 'ḧ' => 'h',
'Ḩ' => 'H', 'ḩ' => 'h', 'Ḫ' => 'H', 'ḫ' => 'h', 'Ḭ' => 'I',
'ḭ' => 'i', 'Ḯ' => 'I', 'ḯ' => 'i', 'Ḱ' => 'K', 'ḱ' => 'k',
'Ḳ' => 'K', 'ḳ' => 'k', 'Ḵ' => 'K', 'ḵ' => 'k', 'Ḷ' => 'L',
'ḷ' => 'l', 'Ḹ' => 'L', 'ḹ' => 'l', 'Ḻ' => 'L', 'ḻ' => 'l',
'Ḽ' => 'L', 'ḽ' => 'l', 'Ḿ' => 'M', 'ḿ' => 'm', 'Ṁ' => 'M',
'ṁ' => 'm', 'Ṃ' => 'M', 'ṃ' => 'm', 'Ṅ' => 'N', 'ṅ' => 'n',
'Ṇ' => 'N', 'ṇ' => 'n', 'Ṉ' => 'N', 'ṉ' => 'n', 'Ṋ' => 'N',
'ṋ' => 'n', 'Ṍ' => 'O', 'ṍ' => 'o', 'Ṏ' => 'O', 'ṏ' => 'o',
'Ṑ' => 'O', 'ṑ' => 'o', 'Ṓ' => 'O', 'ṓ' => 'o', 'Ṕ' => 'P',
'ṕ' => 'p', 'Ṗ' => 'P', 'ṗ' => 'p', 'Ṙ' => 'R', 'ṙ' => 'r',
'Ṛ' => 'R', 'ṛ' => 'r', 'Ṝ' => 'R', 'ṝ' => 'r', 'Ṟ' => 'R',
'ṟ' => 'r', 'Ṡ' => 'S', 'ṡ' => 's', 'Ṣ' => 'S', 'ṣ' => 's',
'Ṥ' => 'S', 'ṥ' => 's', 'Ṧ' => 'S', 'ṧ' => 's', 'Ṩ' => 'S',
'ṩ' => 's', 'Ṫ' => 'T', 'ṫ' => 't', 'Ṭ' => 'T', 'ṭ' => 't',
'Ṯ' => 'T', 'ṯ' => 't', 'Ṱ' => 'T', 'ṱ' => 't', 'Ṳ' => 'U',
'ṳ' => 'u', 'Ṵ' => 'U', 'ṵ' => 'u', 'Ṷ' => 'U', 'ṷ' => 'u',
'Ṹ' => 'U', 'ṹ' => 'u', 'Ṻ' => 'U', 'ṻ' => 'u', 'Ṽ' => 'V',
'ṽ' => 'v', 'Ṿ' => 'V', 'ṿ' => 'v', 'Ẁ' => 'W', 'ẁ' => 'w',
'Ẃ' => 'W', 'ẃ' => 'w', 'Ẅ' => 'W', 'ẅ' => 'w', 'Ẇ' => 'W',
'ẇ' => 'w', 'Ẉ' => 'W', 'ẉ' => 'w', 'Ẋ' => 'X', 'ẋ' => 'x',
'Ẍ' => 'X', 'ẍ' => 'x', 'Ẏ' => 'Y', 'ẏ' => 'y', 'Ẑ' => 'Z',
'ẑ' => 'z', 'Ẓ' => 'Z', 'ẓ' => 'z', 'Ẕ' => 'Z', 'ẕ' => 'z',
'ẖ' => 'h', 'ẗ' => 't', 'ẘ' => 'w', 'ẙ' => 'y', 'ẚ' => 'a',
'Ạ' => 'A', 'ạ' => 'a', 'Ả' => 'A', 'ả' => 'a', 'Ấ' => 'A',
'ấ' => 'a', 'Ầ' => 'A', 'ầ' => 'a', 'Ẩ' => 'A', 'ẩ' => 'a',
'Ẫ' => 'A', 'ẫ' => 'a', 'Ậ' => 'A', 'ậ' => 'a', 'Ắ' => 'A',
'ắ' => 'a', 'Ằ' => 'A', 'ằ' => 'a', 'Ẳ' => 'A', 'ẳ' => 'a',
'Ẵ' => 'A', 'ẵ' => 'a', 'Ặ' => 'A', 'ặ' => 'a', 'Ẹ' => 'E',
'ẹ' => 'e', 'Ẻ' => 'E', 'ẻ' => 'e', 'Ẽ' => 'E', 'ẽ' => 'e',
'Ế' => 'E', 'ế' => 'e', 'Ề' => 'E', 'ề' => 'e', 'Ể' => 'E',
'ể' => 'e', 'Ễ' => 'E', 'ễ' => 'e', 'Ệ' => 'E', 'ệ' => 'e',
'Ỉ' => 'I', 'ỉ' => 'i', 'Ị' => 'I', 'ị' => 'i', 'Ọ' => 'O',
'ọ' => 'o', 'Ỏ' => 'O', 'ỏ' => 'o', 'Ố' => 'O', 'ố' => 'o',
'Ồ' => 'O', 'ồ' => 'o', 'Ổ' => 'O', 'ổ' => 'o', 'Ỗ' => 'O',
'ỗ' => 'o', 'Ộ' => 'O', 'ộ' => 'o', 'Ớ' => 'O', 'ớ' => 'o',
'Ờ' => 'O', 'ờ' => 'o', 'Ở' => 'O', 'ở' => 'o', 'Ỡ' => 'O',
'ỡ' => 'o', 'Ợ' => 'O', 'ợ' => 'o', 'Ụ' => 'U', 'ụ' => 'u',
'Ủ' => 'U', 'ủ' => 'u', 'Ứ' => 'U', 'ứ' => 'u', 'Ừ' => 'U',
'ừ' => 'u', 'Ử' => 'U', 'ử' => 'u', 'Ữ' => 'U', 'ữ' => 'u',
'Ự' => 'U', 'ự' => 'u', 'Ỳ' => 'Y', 'ỳ' => 'y', 'Ỵ' => 'Y',
'ỵ' => 'y', 'Ỷ' => 'Y', 'ỷ' => 'y', 'Ỹ' => 'Y', 'ỹ' => 'y',
// General Punctuation
' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ',
' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ',
' ' => ' ', '​' => '', '‌' => '', '‍' => '', '‐' => '-',
'‑' => '-', '‒' => '-', '–' => '-', '—' => '-', '―' => '-',
'‖' => '||', '‘' => "'", '’' => "'", '‚' => ',', '‛' => "'",
'“' => '"', '”' => '"', '‟' => '"', '․' => '.', '‥' => '..',
'…' => '...', ' ' => ' ', '′' => "'", '″' => '"', '‴' => '\'"',
'‵' => "'", '‶' => '"', '‷' => '"\'', '‹' => '<', '›' => '>',
'‼' => '!!', '‽' => '?!', '⁄' => '/', '⁇' => '?/', '⁈' => '?!',
'⁉' => '!?',
// Letterlike Symbols
'℠' => 'SM', '™' => 'TM',
// Number Forms
'⅓' => '1/3', '⅔' => '2/3', '⅕' => '1/5', '⅖' => '2/5', '⅗' => '3/5',
'⅘' => '4/5', '⅙' => '1/6', '⅚' => '5/6', '⅛' => '1/8', '⅜' => '3/8',
'⅝' => '5/8', '⅞' => '7/8', 'Ⅰ' => 'I', 'Ⅱ' => 'II', 'Ⅲ' => 'III',
'Ⅳ' => 'IV', 'Ⅴ' => 'V', 'Ⅵ' => 'Vi', 'Ⅶ' => 'VII', 'Ⅷ' => 'VIII',
'Ⅸ' => 'IX', 'Ⅹ' => 'X', 'Ⅺ' => 'XI', 'Ⅻ' => 'XII', 'Ⅼ' => 'L',
'Ⅽ' => 'C', 'Ⅾ' => 'D', 'Ⅿ' => 'M', 'ⅰ' => 'i', 'ⅱ' => 'ii',
'ⅲ' => 'iii', 'ⅳ' => 'iv', 'ⅴ' => 'v', 'ⅵ' => 'vi', 'ⅶ' => 'vii',
'ⅷ' => 'viii','ⅸ' => 'ix', 'ⅹ' => 'x', 'ⅺ' => 'xi', 'ⅻ' => 'xii',
'ⅼ' => 'l', 'ⅽ' => 'c', 'ⅾ' => 'd', 'ⅿ' => 'm'
);
/**
* If the [http://php.net/mbstring mbstring] extension is available
*
* @var boolean
*/
static private $mbstring_available = NULL;
/**
* Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
*
* Any characters or symbols that can not be translated will be removed.
*
* This function is most useful for situation that only allows ASCII, such
* as in URLs.
*
* Translates elements form the following unicode blocks:
*
* - Latin-1 Supplement
* - Latin Extended-A
* - Latin Extended-B
* - IPA Extensions
* - Latin Extended Additional
* - General Punctuation
* - Letterlike symbols
* - Number Forms
*
* @internal
*
* @param string $string The string to convert
* @return string The input string in pure ASCII
*/
static public function ascii($string)
{
if (!self::detect($string)) {
return $string;
}
$string = strtr($string, self::$utf8_to_ascii);
return preg_replace('#[^\x00-\x7F]#', '', $string);
}
/**
* Checks to see if the [http://php.net/mbstring mbstring] extension is available
*
* @return void
*/
static private function checkMbString()
{
self::$mbstring_available = extension_loaded('mbstring');
}
/**
* Converts a unicode value into a UTF-8 character
*
* @param mixed $unicode_code_point The character to create, either the `U+hex` or decimal code point
* @return string The UTF-8 character
*/
static public function chr($unicode_code_point)
{
if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
$unicode_code_point = substr($unicode_code_point, 2);
$unicode_code_point = hexdec($unicode_code_point);
}
$bin = decbin($unicode_code_point);
$digits = strlen($bin);
$first = $second = $third = $fourth = NULL;
// One byte characters
if ($digits <= 7) {
$first = chr(bindec($bin));
// Two byte characters
} elseif ($digits <= 11) {
$first = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
$second = chr(bindec('10' . substr($bin, -6)));
// Three byte characters
} elseif ($digits <= 16) {
$first = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
$second = chr(bindec('10' . substr($bin, -12, -6)));
$third = chr(bindec('10' . substr($bin, -6)));
// Four byte characters
} elseif ($digits <= 21) {
$first = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
$second = chr(bindec('10' . substr($bin, -18, -12)));
$third = chr(bindec('10' . substr($bin, -12, -6)));
$fourth = chr(bindec('10' . substr($bin, -6)));
}
$ord = ord($first);
if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
throw new fProgrammerException(
'The code point specified, %s, is invalid.',
$unicode_code_point
);
}
return $first . $second . $third . $fourth;
}
/**
* Removes any invalid UTF-8 characters from a string or array of strings
*
* @param array|string $value The string or array of strings to clean
* @return string The cleaned string
*/
static public function clean($value)
{
if (!is_array($value)) {
self::checkMbString();
if (self::$mbstring_available) {
$old_sub = ini_get('mbstring.substitute_character');
ini_set('mbstring.substitute_character', 'none');
$value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
ini_set('mbstring.substitute_character', $old_sub);
return $value;
}
if (self::$can_ignore_invalid === NULL) {
self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));
}
fCore::startErrorCapture(E_NOTICE);
$value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
fCore::stopErrorCapture();
return $value;
}
$keys = array_keys($value);
$num_keys = sizeof($keys);
for ($i=0; $i<$num_keys; $i++) {
$value[$keys[$i]] = self::clean($value[$keys[$i]]);
}
return $value;
}
/**
* Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
*
* Please note that this function sorts based on English language sorting
* rules only. Locale-sepcific sorting is done by
* [http://php.net/strcoll strcoll()], however there are technical
* limitations.
*
* @param string $str1 The first string to compare
* @param string $str2 The second string to compare
* @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
*/
static public function cmp($str1, $str2)
{
$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
$res = strcmp($ascii_str1, $ascii_str2);
// If the ASCII representations are the same, sort by the UTF-8 representations
if ($res === 0) {
$res = strcmp($str1, $str2);
}
return $res;
}
/**
* Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
*
* @param string $string The string to base the offset on
* @param integer $offset The character offset to conver to bytes
* @return integer The converted offset
*/
static private function convertOffsetToBytes($string, $offset)
{
if ($offset == 0) {
return 0;
}
$len = strlen($string);
$byte_offset = 0;
$measured_offset = 0;
$sign = 1;
// Negative offsets require us to reverse some stuff
if ($offset < 0) {
$string = strrev($string);
$sign = -1;
$offset = abs($offset);
}
for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
$char = $string[$i];
++$byte_offset;
if (ord($char) < 0x80) {
++$measured_offset;
} else {
switch (ord($char) & 0xF0) {
case 0xF0:
case 0xE0:
case 0xD0:
case 0xC0:
++$measured_offset;
break;
}
}
}
return $byte_offset * $sign;
}
/**
* Detects if a UTF-8 string contains any non-ASCII characters
*
* @param string $string The string to check
* @return boolean If the string contains any non-ASCII characters
*/
static private function detect($string)
{
return (boolean) preg_match('#[^\x00-\x7F]#', $string);
}
/**
* Explodes a string on a delimiter
*
* If no delimiter is provided, the string will be exploded with each
* characters being an element in the array.
*
* @param string $string The string to explode
* @param string $delimiter The string to explode on. If `NULL` or `''` this method will return one character per array index.
* @return array The exploded string
*/
static public function explode($string, $delimiter=NULL)
{
// If a delimiter was passed, we just do an explode
if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
return explode($delimiter, $string);
}
// If no delimiter was passed, we explode the characters into an array
preg_match_all('#.|^\z#us', $string, $matches);
return $matches[0];
}
/**
* This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
* does not seem to properly assign the return value to a variable, but
* does work when returning the value.
*
* @param string $in_charset The incoming character encoding
* @param string $out_charset The outgoing character encoding
* @param string $string The string to convert
* @return string The converted string
*/
static private function iconv($in_charset, $out_charset, $string)
{
return iconv($in_charset, $out_charset, $string);
}
/**
* Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
*
* Please note that this function sorts based on English language sorting
* rules only. Locale-sepcific sorting is done by
* [http://php.net/strcoll strcoll()], however there are technical
* limitations.
*
* @param string $str1 The first string to compare
* @param string $str2 The second string to compare
* @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
*/
static public function icmp($str1, $str2)
{
$str1 = self::lower($str1);
$str2 = self::lower($str2);
return self::cmp($str1, $str2);
}
/**
* Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
*
* Please note that this function sorts based on English language sorting
* rules only. Locale-sepcific sorting is done by
* [http://php.net/strcoll strcoll()], however there are technical
* limitations.
*
* @param string $str1 The first string to compare
* @param string $str2 The second string to compare
* @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
*/
static public function inatcmp($str1, $str2)
{
$str1 = self::lower($str1);
$str2 = self::lower($str2);
return self::natcmp($str1, $str2);
}
/**
* Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
*
* @param string $haystack The string to search in
* @param string $needle The string to search for. This match will be done in a case-insensitive manner.
* @param integer $offset The character position to start searching from
* @return mixed The integer character position of the first occurence of the needle or `FALSE` if no match
*/
static public function ipos($haystack, $needle, $offset=0)
{
// We get better performance falling back for ASCII strings
if (!self::detect($haystack)) {
return stripos($haystack, $needle, $offset);
}
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available && function_exists('mb_stripos')) {
return mb_stripos($haystack, $needle, $offset, 'UTF-8');
}
$haystack = self::lower($haystack);
$needle = self::lower($needle);
return self::pos($haystack, $needle, $offset);
}
/**
* Replaces matching parts of the string, with matches being done in a a case-insensitive manner
*
* If `$search` and `$replace` are both arrays and `$replace` is shorter,
* the extra `$search` string will be replaced with an empty string. If
* `$search` is an array and `$replace` is a string, all `$search` values
* will be replaced with the string specified.
*
* @param string $string The string to perform the replacements on
* @param mixed $search The string (or array of strings) to search for - see method description for details
* @param mixed $replace The string (or array of strings) to replace with - see method description for details
* @return string The input string with the specified replacements
*/
static public function ireplace($string, $search, $replace)
{
if (is_array($search)) {
foreach ($search as &$needle) {
$needle = '#' . preg_quote($needle, '#') . '#ui';
}
} else {
$search = '#' . preg_quote($search, '#') . '#ui';
}
return preg_replace(
$search,
strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
$string
);
}
/**
* Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
*
* @param string $haystack The string to search in
* @param string $needle The string to search for. This match will be done in a case-insensitive manner.
* @param integer $offset The character position to start searching from. A negative value will stop looking that many characters from the end of the string
* @return mixed The integer character position of the last occurence of the needle or `FALSE` if no match
*/
static public function irpos($haystack, $needle, $offset=0)
{
// We get better performance falling back for ASCII strings
if (!self::detect($haystack)) {
return strripos($haystack, $needle, $offset);
}
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available && function_exists('mb_strripos')) {
return mb_strripos($haystack, $needle, $offset, 'UTF-8');
}
$haystack = self::lower($haystack);
$needle = self::lower($needle);
return self::rpos($haystack, $needle, $offset);
}
/**
* Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
*
* Can optionally return the part of the haystack before the needle. Matching
* is done in a case-insensitive manner.
*
* @param string $haystack The string to search in
* @param string $needle The string to search for. This match will be done in a case-insensitive manner.
* @param boolean $before_needle If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
* @return mixed The specified part of the haystack, or `FALSE` if the needle was not found
*/
static public function istr($haystack, $needle, $before_needle=FALSE)
{
// We get better performance falling back for ASCII strings
if ($before_needle == FALSE && !self::detect($haystack)) {
return stristr($haystack, $needle);
}
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available && function_exists('mb_stristr')) {
return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
}
$lower_haystack = self::lower($haystack);
$lower_needle = self::lower($needle);
$pos = strpos($lower_haystack, $lower_needle);
if ($before_needle) {
return substr($haystack, 0, $pos);
}
return substr($haystack, $pos);
}
/**
* Determines the length (in characters) of a string
*
* @param string $string The string to measure
* @return integer The number of characters in the string
*/
static public function len($string)
{
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available) {
return mb_strlen($string, 'UTF-8');
}
return strlen(utf8_decode($string));
}
/**
* Converts all uppercase characters to lowercase
*
* @param string $string The string to convert
* @return string The input string with all uppercase characters in lowercase
*/
static public function lower($string)
{
// We get better performance falling back for ASCII strings
if (!self::detect($string)) {
return strtolower($string);
}
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available) {
$string = mb_strtolower($string, 'utf-8');
// For some reason mb_strtolower misses some character
return strtr($string, self::$mb_upper_to_lower_fix);
}
return strtr($string, self::$upper_to_lower);
}
/**
* Trims whitespace, or any specified characters, from the beginning of a string
*
* @param string $string The string to trim
* @param string $charlist The characters to trim
* @return string The trimmed string
*/
static public function ltrim($string, $charlist=NULL)
{
if (strlen($charlist) === 0) {
return ltrim($string);
}
$search = preg_quote($charlist, '#');
$search = str_replace('-', '\-', $search);
$search = str_replace('\.\.', '-', $search);
return preg_replace('#^[' . $search . ']+#Du', '', $string);
}
/**
* Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
*
* Please note that this function sorts based on English language sorting
* rules only. Locale-sepcific sorting is done by
* [http://php.net/strcoll strcoll()], however there are technical
* limitations.
*
* @param string $str1 The first string to compare
* @param string $str2 The second string to compare
* @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
*/
static public function natcmp($str1, $str2)
{
$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
$res = strnatcmp($ascii_str1, $ascii_str2);
// If the ASCII representations are the same, sort by the UTF-8 representations
if ($res === 0) {
$res = strnatcmp($str1, $str2);
}
return $res;
}
/**
* Converts a UTF-8 character to a unicode code point
*
* @param string $character The character to decode
* @return string The U+hex unicode code point for the character
*/
static public function ord($character)
{
$b = array_map('ord', str_split($character));
$invalid = FALSE;
switch (strlen($character)) {
case 1:
if ($b[0] > 0x7F) {
$invalid = TRUE;
break;
}
$bin = decbin($b[0]);
break;
case 2:
if ($b[0] < 0xC2 || $b[0] > 0xDF ||
$b[1] < 0x80 || $b[1] > 0xBF) {
$invalid = TRUE;
break;
}
$bin = substr(decbin($b[0]), 3) .
substr(decbin($b[1]), 2);
break;
case 3:
if ($b[0] < 0xE0 || $b[0] > 0xEF ||
$b[1] < 0x80 || $b[1] > 0xBF ||
$b[2] < 0x80 || $b[2] > 0xBF) {
$invalid = TRUE;
break;
}
$bin = substr(decbin($b[0]), 4) .
substr(decbin($b[1]), 2) .
substr(decbin($b[2]), 2);
break;
case 4:
if ($b[0] < 0xF0 || $b[0] > 0xF4 ||
$b[1] < 0x80 || $b[1] > 0xBF ||
$b[2] < 0x80 || $b[2] > 0xBF ||
$b[3] < 0x80 || $b[3] > 0xBF) {
$invalid = TRUE;
break;
}
$bin = substr(decbin($b[0]), 5) .
substr(decbin($b[1]), 2) .
substr(decbin($b[2]), 2) .
substr(decbin($b[3]), 2);
break;
default:
$invalid = TRUE;
break;
}
if ($invalid) {
throw new fProgrammerException(
'The UTF-8 character specified is invalid'
);
}
$hex = strtoupper(dechex(bindec($bin)));
return 'U+' . str_pad($hex, 4, '0', STR_PAD_LEFT);
}
/**
* Pads a string to the number of characters specified
*
* @param string $string The string to pad
* @param integer $pad_length The character length to pad the string to
* @param string $pad_string The string to pad the source string with
* @param string $pad_type The type of padding to do: `'left'`, `'right'`, `'both'`
* @return string The input string padded to the specified character width
*/
static public function pad($string, $pad_length, $pad_string=' ', $pad_type='right')
{
$valid_pad_types = array('right', 'left', 'both');
if (!in_array($pad_type, $valid_pad_types)) {
throw new fProgrammerException(
'The pad type specified, %1$s, is not valid. Must be one of: %2$s.',
$pad_type,
join(', ', $valid_pad_types)
);
}
// We get better performance falling back for ASCII strings
if (!self::detect($string) && !self::detect($pad_string)) {
static $type_map = array(
'left' => STR_PAD_LEFT,
'right' => STR_PAD_RIGHT,
'both' => STR_PAD_BOTH
);
return str_pad($string, $pad_length, $pad_string, $type_map[$pad_type]);
}
$string_length = self::len($string);
$pad_string_length = self::len($pad_string);
$pad_to_length = $pad_length - $string_length;
if ($pad_to_length < 1) {
return $string;
}
$padded = 0;
$next_side = 'left';
$left_pad_string = '';
$right_pad_string = '';
while ($padded < $pad_to_length) {
// For pad strings over 1 characters long, they may be too long to fit
if ($pad_to_length - $padded < $pad_string_length) {
$pad_string = self::sub($pad_string, 0, $pad_to_length - $padded);
}
switch (($pad_type != 'both') ? $pad_type : $next_side) {
case 'right':
$right_pad_string .= $pad_string;
$next_side = 'left';
break;
case 'left':
$left_pad_string .= $pad_string;
$next_side = 'right';
break;
}
$padded += $pad_string_length;
}
return $left_pad_string . $string . $right_pad_string;
}
/**
* Finds the first position (in characters) of the search value in the string
*
* @param string $haystack The string to search in
* @param string $needle The string to search for
* @param integer $offset The character position to start searching from
* @return mixed The integer character position of the first occurence of the needle or `FALSE` if no match
*/
static public function pos($haystack, $needle, $offset=0)
{
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available) {
return mb_strpos($haystack, $needle, $offset, 'UTF-8');
}
$offset = self::convertOffsetToBytes($haystack, $offset);
$position = strpos($haystack, $needle, $offset);
if ($position === FALSE) {
return FALSE;
}
return strlen(utf8_decode(substr($haystack, 0, $position)));
}
/**
* Replaces matching parts of the string
*
* If `$search` and `$replace` are both arrays and `$replace` is shorter,
* the extra `$search` string will be replaced with an empty string. If
* `$search` is an array and `$replace` is a string, all `$search` values
* will be replaced with the string specified.
*
* @param string $string The string to perform the replacements on
* @param mixed $search The string (or array of strings) to search for - see method description for details
* @param mixed $replace The string (or array of strings) to replace with - see method description for details
* @return string The input string with the specified replacements
*/
static public function replace($string, $search, $replace)
{
return str_replace($search, $replace, $string);
}
/**
* Resets the configuration of the class
*
* @internal
*
* @return void
*/
static public function reset()
{
self::$mbstring_available = NULL;
}
/**
* Reverses a string
*
* @param string $string The string to reverse
* @return string The reversed string
*/
static public function rev($string)
{
$output = '';
$len = strlen($string);
static $char_lens = array(
0xF0 => 4,
0xE0 => 3,
0xD0 => 2,
0xC0 => 2
);
$mb_char = '';
for ($i=0; $i<$len; $i++) {
$char = $string[$i];
if (ord($char) < 128) {
$output = $char . $output;
} else {
switch (ord($char) & 0xF0) {
case 0xF0:
$output = $string[$i] . $string[$i+1] . $string[$i+2] . $string[$i+3] . $output;
$i += 3;
break;
case 0xE0:
$output = $string[$i] . $string[$i+1] . $string[$i+2] . $output;
$i += 2;
break;
case 0xD0:
case 0xC0:
$output = $string[$i] . $string[$i+1] . $output;
$i += 1;
break;
}
}
}
return $output;
}
/**
* Finds the last position (in characters) of the search value in the string
*
* @param string $haystack The string to search in
* @param string $needle The string to search for.
* @param integer $offset The character position to start searching from. A negative value will stop looking that many characters from the end of the string
* @return mixed The integer character position of the last occurence of the needle or `FALSE` if no match
*/
static public function rpos($haystack, $needle, $offset=0)
{
// We get better performance falling back for ASCII strings
if (!self::detect($haystack)) {
return strrpos($haystack, $needle, $offset);
}
// We don't even both trying mb_strrpos since this method is faster
$offset = self::convertOffsetToBytes($haystack, $offset);
$position = strrpos($haystack, $needle, $offset);
if ($position === FALSE) {
return FALSE;
}
return strlen(utf8_decode(substr($haystack, 0, $position)));
}
/**
* Trims whitespace, or any specified characters, from the end of a string
*
* @param string $string The string to trim
* @param string $charlist The characters to trim
* @return string The trimmed string
*/
static public function rtrim($string, $charlist=NULL)
{
if (strlen($charlist) === 0) {
return rtrim($string);
}
$search = preg_quote($charlist, '#');
$search = str_replace('-', '\-', $search);
$search = str_replace('\.\.', '-', $search);
return preg_replace('#[' . $search . ']+$#Du', '', $string);
}
/**
* Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
*
* Can optionally return the part of the haystack before the needle.
*
* @param string $haystack The string to search in
* @param string $needle The string to search for
* @param boolean $before_needle If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
* @return mixed The specified part of the haystack, or `FALSE` if the needle was not found
*/
static public function str($haystack, $needle, $before_needle=FALSE)
{
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available && function_exists('mb_strstr')) {
return mb_strstr($haystack, $needle, $before_needle, 'UTF-8');
}
$pos = strpos($haystack, $needle);
if ($pos === FALSE) {
return $pos;
}
if ($before_needle) {
return substr($haystack, 0, $pos);
}
return substr($haystack, $pos);
}
/**
* Extracts part of a string
*
* @param string $string The string to extract from
* @param integer $start The zero-based starting index to extract from. Negative values will start the extraction that many characters from the end of the string.
* @param integer $length The length of the string to extract. If an empty value is provided, the remainder of the string will be returned.
* @return mixed The extracted subtring or `FALSE` if the start is out of bounds
*/
static public function sub($string, $start, $length=NULL)
{
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available) {
$str_len = mb_strlen($string, 'UTF-8');
if (abs($start) > $str_len) {
return FALSE;
}
if ($length === NULL) {
if ($start >= 0) {
$length = $str_len-$start;
} else {
$length = abs($start);
}
}
return mb_substr($string, $start, $length, 'UTF-8');
}
// We get better performance falling back for ASCII strings
if (!self::detect($string)) {
if ($length === NULL) {
if ($start >= 0) {
$length = strlen($string)-$start;
} else {
$length = abs($start);
}
}
return substr($string, $start, $length);
}
// This is the slowest version
$str_len = strlen(utf8_decode($string));
if (abs($start) > $str_len) {
return FALSE;
}
// Optimize looking by changing to negative start positions if the
// start is in the second half of the string
if ($start > $str_len/2) {
$start = 0-($str_len-$start);
}
// Substrings to the end of the string are pretty simple
$start = self::convertOffsetToBytes($string, $start);
$string = substr($string, $start);
if ($length === NULL) {
return $string;
}
$length = self::convertOffsetToBytes($string, $length);
return substr($string, 0, $length);
}
/**
* Trims whitespace, or any specified characters, from the beginning and end of a string
*
* @param string $string The string to trim
* @param string $charlist The characters to trim, .. indicates a range
* @return string The trimmed string
*/
static public function trim($string, $charlist=NULL)
{
if (strlen($charlist) === 0) {
return trim($string);
}
$search = preg_quote($charlist, '#');
$search = str_replace('-', '\-', $search);
$search = str_replace('\.\.', '-', $search);
return preg_replace('#^[' . $search . ']+|[' . $search . ']+$#Du', '', $string);
}
/**
* Converts the first character of the string to uppercase.
*
* @param string $string The string to process
* @return string The processed string
*/
static public function ucfirst($string)
{
return self::upper(self::sub($string, 0, 1)) . self::sub($string, 1);
}
/**
* Converts the first character of every word to uppercase
*
* Words are considered to start at the beginning of the string, or after any
* whitespace character.
*
* @param string $string The string to process
* @return string The processed string
*/
static public function ucwords($string)
{
return preg_replace_callback(
'#(?<=^|\s|[\x{2000}-\x{200A}]|/|-|\(|\[|\{|\||"|^\'|\s\'|‘|“)(.)#u',
array('self', 'ucwordsCallback'),
$string
);
}
/**
* Handles converting a character to uppercase for ::ucwords()
*
* @param array $match The regex match from ::ucwords()
* @return string The uppercase character
*/
static private function ucwordsCallback($match)
{
return self::upper($match[1]);
}
/**
* Converts all lowercase characters to uppercase
*
* @param string $string The string to convert
* @return string The input string with all lowercase characters in uppercase
*/
static public function upper($string)
{
// We get better performance falling back for ASCII strings
if (!self::detect($string)) {
return strtoupper($string);
}
if (self::$mbstring_available === NULL) {
self::checkMbString();
}
if (self::$mbstring_available) {
$string = mb_strtoupper($string, 'utf-8');
// For some reason mb_strtoupper misses some character
return strtr($string, self::$mb_lower_to_upper_fix);
}
return strtr($string, self::$lower_to_upper);
}
/**
* Wraps a string to a specific character width
*
* @param string $string The string to wrap
* @param integer $width The character width to wrap to
* @param string $break The string to insert as a break
* @param boolean $cut If words longer than the character width should be split to fit
* @return string The input string with all lowercase characters in uppercase
*/
static public function wordwrap($string, $width=75, $break="\n", $cut=FALSE)
{
// We get better performance falling back for ASCII strings
if (!self::detect($string)) {
return wordwrap($string, $width, $break, $cut);
}
$words = preg_split('#(?<=\s|[\x{2000}-\x{200A}])#ue', $string);
$output = '';
$line_len = 0;
foreach ($words as $word) {
$word_len = self::len($word);
// Shorten up words that are too long
while ($cut && $word_len > $width) {
$output .= $break;
$output .= self::sub($word, 0, $width);
$line_len = $width;
$word = self::sub($word, $width);
$word_len = self::len($word);
}
if ($line_len && $line_len + $word_len > $width) {
$output .= $break;
$line_len = 0;
}
$output .= $word;
$line_len += $word_len;
}
return $output;
}
/**
* Forces use as a static class
*
* @return fUTF8
*/
private function __construct() { }
}
/**
* Copyright (c) 2008-2012 Will Bond <will@flourishlib.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/