Permalink
Browse files

Merge pull request #227 from jbergler/master

[DDC-2160] Smart Pluralize/Singularize support for Doctrine/Common/Util/Inflector
  • Loading branch information...
2 parents bc6bc47 + 4de779a commit 28b0e7fde8007d5eba23c8ca3fe741d2757cf70d @beberlei beberlei committed Jan 10, 2013
Showing with 505 additions and 2 deletions.
  1. +319 −2 lib/Doctrine/Common/Util/Inflector.php
  2. +186 −0 tests/Doctrine/Tests/Common/Util/InflectorTest.php
@@ -26,7 +26,9 @@
*
* The methods in these classes are from several different sources collected
* across several different php projects and several different authors. The
- * original author names and emails are not known
+ * original author names and emails are not known.
+ *
+ * Plurialize & Singularize implementation are borrowed from CakePHP with some modifications.
*
* @license http://www.opensource.org/licenses/lgpl-license.php LGPL
* @link www.doctrine-project.org
@@ -38,6 +40,161 @@
class Inflector
{
/**
+ * Plural inflector rules
+ *
+ * @var array
+ */
+ private static $plural = array(
+ 'rules' => array(
+ '/(s)tatus$/i' => '\1\2tatuses',
+ '/(quiz)$/i' => '\1zes',
+ '/^(ox)$/i' => '\1\2en',
+ '/([m|l])ouse$/i' => '\1ice',
+ '/(matr|vert|ind)(ix|ex)$/i' => '\1ices',
+ '/(x|ch|ss|sh)$/i' => '\1es',
+ '/([^aeiouy]|qu)y$/i' => '\1ies',
+ '/(hive)$/i' => '\1s',
+ '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
+ '/sis$/i' => 'ses',
+ '/([ti])um$/i' => '\1a',
+ '/(p)erson$/i' => '\1eople',
+ '/(m)an$/i' => '\1en',
+ '/(c)hild$/i' => '\1hildren',
+ '/(buffal|tomat)o$/i' => '\1\2oes',
+ '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
+ '/us$/i' => 'uses',
+ '/(alias)$/i' => '\1es',
+ '/(ax|cris|test)is$/i' => '\1es',
+ '/s$/' => 's',
+ '/^$/' => '',
+ '/$/' => 's',
+ ),
+ 'uninflected' => array(
+ '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'people', 'cookie'
+ ),
+ 'irregular' => array(
+ 'atlas' => 'atlases',
+ 'beef' => 'beefs',
+ 'brother' => 'brothers',
+ 'cafe' => 'cafes',
+ 'child' => 'children',
+ 'cookie' => 'cookies',
+ 'corpus' => 'corpuses',
+ 'cow' => 'cows',
+ 'ganglion' => 'ganglions',
+ 'genie' => 'genies',
+ 'genus' => 'genera',
+ 'graffito' => 'graffiti',
+ 'hoof' => 'hoofs',
+ 'loaf' => 'loaves',
+ 'man' => 'men',
+ 'money' => 'monies',
+ 'mongoose' => 'mongooses',
+ 'move' => 'moves',
+ 'mythos' => 'mythoi',
+ 'niche' => 'niches',
+ 'numen' => 'numina',
+ 'occiput' => 'occiputs',
+ 'octopus' => 'octopuses',
+ 'opus' => 'opuses',
+ 'ox' => 'oxen',
+ 'penis' => 'penises',
+ 'person' => 'people',
+ 'sex' => 'sexes',
+ 'soliloquy' => 'soliloquies',
+ 'testis' => 'testes',
+ 'trilby' => 'trilbys',
+ 'turf' => 'turfs'
+ )
+ );
+
+ /**
+ * Singular inflector rules
+ *
+ * @var array
+ */
+ private static $singular = array(
+ 'rules' => array(
+ '/(s)tatuses$/i' => '\1\2tatus',
+ '/^(.*)(menu)s$/i' => '\1\2',
+ '/(quiz)zes$/i' => '\\1',
+ '/(matr)ices$/i' => '\1ix',
+ '/(vert|ind)ices$/i' => '\1ex',
+ '/^(ox)en/i' => '\1',
+ '/(alias)(es)*$/i' => '\1',
+ '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
+ '/([ftw]ax)es/i' => '\1',
+ '/(cris|ax|test)es$/i' => '\1is',
+ '/(shoe|slave)s$/i' => '\1',
+ '/(o)es$/i' => '\1',
+ '/ouses$/' => 'ouse',
+ '/([^a])uses$/' => '\1us',
+ '/([m|l])ice$/i' => '\1ouse',
+ '/(x|ch|ss|sh)es$/i' => '\1',
+ '/(m)ovies$/i' => '\1\2ovie',
+ '/(s)eries$/i' => '\1\2eries',
+ '/([^aeiouy]|qu)ies$/i' => '\1y',
+ '/([lr])ves$/i' => '\1f',
+ '/(tive)s$/i' => '\1',
+ '/(hive)s$/i' => '\1',
+ '/(drive)s$/i' => '\1',
+ '/([^fo])ves$/i' => '\1fe',
+ '/(^analy)ses$/i' => '\1sis',
+ '/(analy|diagno|^ba|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
+ '/([ti])a$/i' => '\1um',
+ '/(p)eople$/i' => '\1\2erson',
+ '/(m)en$/i' => '\1an',
+ '/(c)hildren$/i' => '\1\2hild',
+ '/(n)ews$/i' => '\1\2ews',
+ '/eaus$/' => 'eau',
+ '/^(.*us)$/' => '\\1',
+ '/s$/i' => ''
+ ),
+ 'uninflected' => array(
+ '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss'
+ ),
+ 'irregular' => array(
+ 'foes' => 'foe',
+ 'waves' => 'wave',
+ 'curves' => 'curve'
+ )
+ );
+
+ /**
+ * Words that should not be inflected
+ *
+ * @var array
+ */
+ private static $uninflected = array(
+ 'Amoyese', 'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus',
+ 'carp', 'chassis', 'clippers', 'cod', 'coitus', 'Congoese', 'contretemps', 'corps',
+ 'debris', 'diabetes', 'djinn', 'eland', 'elk', 'equipment', 'Faroese', 'flounder',
+ 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
+ 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings',
+ 'jackanapes', 'Kiplingese', 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', '.*?media',
+ 'mews', 'moose', 'mumps', 'Nankingese', 'news', 'nexus', 'Niasese',
+ 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese',
+ 'proceedings', 'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors',
+ 'sea[- ]bass', 'series', 'Shavese', 'shears', 'siemens', 'species', 'swine', 'testes',
+ 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese', 'whiting', 'wildebeest',
+ 'Yengeese'
+ );
+
+ /**
+ * Method cache array.
+ *
+ * @var array
+ */
+ private static $cache = array();
+
+ /**
+ * The initial state of Inflector so reset() works.
+ *
+ * @var array
+ */
+ private static $initialState = array();
+
+ /**
* Convert word in to the format for a Doctrine table name. Converts 'ModelName' to 'model_name'
*
* @param string $word Word to tableize
@@ -69,4 +226,164 @@ public static function camelize($word)
{
return lcfirst(self::classify($word));
}
-}
+
+ /**
+ * Clears Inflectors inflected value caches. And resets the inflection
+ * rules to the initial values.
+ *
+ * @return void
+ */
+ public static function reset()
+ {
+ if (empty(self::$initialState)) {
+ self::$initialState = get_class_vars('Inflector');
+ return;
+ }
+ foreach (self::$initialState as $key => $val) {
+ if ($key != 'initialState') {
+ self::${$key} = $val;
+ }
+ }
+ }
+
+ /**
+ * Adds custom inflection $rules, of either 'plural' or 'singular' $type.
+ *
+ * ### Usage:
+ *
+ * {{{
+ * Inflector::rules('plural', array('/^(inflect)or$/i' => '\1ables'));
+ * Inflector::rules('plural', array(
+ * 'rules' => array('/^(inflect)ors$/i' => '\1ables'),
+ * 'uninflected' => array('dontinflectme'),
+ * 'irregular' => array('red' => 'redlings')
+ * ));
+ * }}}
+ *
+ * @param string $type The type of inflection, either 'plural' or 'singular'
+ * @param array $rules Array of rules to be added.
+ * @param boolean $reset If true, will unset default inflections for all
+ * new rules that are being defined in $rules.
+ * @return void
+ */
+ public static function rules($type, $rules, $reset = false)
+ {
+ foreach ($rules as $rule => $pattern) {
+ if (is_array($pattern)) {
+ if ($reset) {
+ self::${$type}[$rule] = $pattern;
+ } else {
+ if ($rule === 'uninflected') {
+ self::${$type}[$rule] = array_merge($pattern, self::${$type}[$rule]);
+ } else {
+ self::${$type}[$rule] = $pattern + self::${$type}[$rule];
+ }
+ }
+ unset($rules[$rule], self::${$type}['cache' . ucfirst($rule)]);
+ if (isset(self::${$type}['merged'][$rule])) {
+ unset(self::${$type}['merged'][$rule]);
+ }
+ if ($type === 'plural') {
+ self::$cache['pluralize'] = self::$cache['tableize'] = array();
+ } elseif ($type === 'singular') {
+ self::$cache['singularize'] = array();
+ }
+ }
+ }
+ self::${$type}['rules'] = $rules + self::${$type}['rules'];
+ }
+
+ /**
+ * Return $word in plural form.
+ *
+ * @param string $word Word in singular
+ * @return string Word in plural
+ */
+ public static function pluralize($word)
+ {
+ if (isset(self::$cache['pluralize'][$word])) {
+ return self::$cache['pluralize'][$word];
+ }
+
+ if (!isset(self::$plural['merged']['irregular'])) {
+ self::$plural['merged']['irregular'] = self::$plural['irregular'];
+ }
+
+ if (!isset(self::$plural['merged']['uninflected'])) {
+ self::$plural['merged']['uninflected'] = array_merge(self::$plural['uninflected'], self::$uninflected);
+ }
+
+ if (!isset(self::$plural['cacheUninflected']) || !isset(self::$plural['cacheIrregular'])) {
+ self::$plural['cacheUninflected'] = '(?:' . implode('|', self::$plural['merged']['uninflected']) . ')';
+ self::$plural['cacheIrregular'] = '(?:' . implode('|', array_keys(self::$plural['merged']['irregular'])) . ')';
+ }
+
+ if (preg_match('/(.*)\\b(' . self::$plural['cacheIrregular'] . ')$/i', $word, $regs)) {
+ self::$cache['pluralize'][$word] = $regs[1] . substr($word, 0, 1) . substr(self::$plural['merged']['irregular'][strtolower($regs[2])], 1);
+ return self::$cache['pluralize'][$word];
+ }
+
+ if (preg_match('/^(' . self::$plural['cacheUninflected'] . ')$/i', $word, $regs)) {
+ self::$cache['pluralize'][$word] = $word;
+ return $word;
+ }
+
+ foreach (self::$plural['rules'] as $rule => $replacement) {
+ if (preg_match($rule, $word)) {
+ self::$cache['pluralize'][$word] = preg_replace($rule, $replacement, $word);
+ return self::$cache['pluralize'][$word];
+ }
+ }
+ }
+
+ /**
+ * Return $word in singular form.
+ *
+ * @param string $word Word in plural
+ * @return string Word in singular
+ */
+ public static function singularize($word)
+ {
+ if (isset(self::$cache['singularize'][$word])) {
+ return self::$cache['singularize'][$word];
+ }
+
+ if (!isset(self::$singular['merged']['uninflected'])) {
+ self::$singular['merged']['uninflected'] = array_merge(
+ self::$singular['uninflected'],
+ self::$uninflected
+ );
+ }
+
+ if (!isset(self::$singular['merged']['irregular'])) {
+ self::$singular['merged']['irregular'] = array_merge(
+ self::$singular['irregular'],
+ array_flip(self::$plural['irregular'])
+ );
+ }
+
+ if (!isset(self::$singular['cacheUninflected']) || !isset(self::$singular['cacheIrregular'])) {
+ self::$singular['cacheUninflected'] = '(?:' . join('|', self::$singular['merged']['uninflected']) . ')';
+ self::$singular['cacheIrregular'] = '(?:' . join('|', array_keys(self::$singular['merged']['irregular'])) . ')';
+ }
+
+ if (preg_match('/(.*)\\b(' . self::$singular['cacheIrregular'] . ')$/i', $word, $regs)) {
+ self::$cache['singularize'][$word] = $regs[1] . substr($word, 0, 1) . substr(self::$singular['merged']['irregular'][strtolower($regs[2])], 1);
+ return self::$cache['singularize'][$word];
+ }
+
+ if (preg_match('/^(' . self::$singular['cacheUninflected'] . ')$/i', $word, $regs)) {
+ self::$cache['singularize'][$word] = $word;
+ return $word;
+ }
+
+ foreach (self::$singular['rules'] as $rule => $replacement) {
+ if (preg_match($rule, $word)) {
+ self::$cache['singularize'][$word] = preg_replace($rule, $replacement, $word);
+ return self::$cache['singularize'][$word];
+ }
+ }
+ self::$cache['singularize'][$word] = $word;
+ return $word;
+ }
+}
Oops, something went wrong.

0 comments on commit 28b0e7f

Please sign in to comment.