Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

unicode: Added unicode-aware slugify filter (in Python) and better no…

…n-ASCII

handling for the Javascript slug creator in admin. Can never be perfect here,
but this is more tolerant in many cases. Fixed #4365. Thanks, Bill de hÓra,
Baptiste, orestis@orestis.gr, Ahmet and Jonas for contributions to this.


git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@5608 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 5664a678b29ab04cad425c15b2792f4519f43928 1 parent 216fae2
Malcolm Tredinnick authored July 04, 2007
95  django/contrib/admin/media/js/urlify.js
... ...
@@ -1,15 +1,110 @@
  1
+var LATIN_MAP =
  2
+{
  3
+    'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç':
  4
+    'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I',
  5
+    'Ï': 'I', 'Ð': 'D', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö':
  6
+    'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ý': 'Y', 'Þ': 'TH',
  7
+    'ß': 'ss', 'à':'a', 'á':'a', 'â': 'a', 'ã': 'a', 'ä': 'a', 'å': 'a', 'æ':
  8
+    'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ì': 'i', 'í': 'i',
  9
+    'î': 'i', 'ï': 'i', 'ð': 'o', 'ñ': 'n', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ':
  10
+    'o', 'ö': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ý': 'y',
  11
+    'þ': 'th', 'ÿ': 'y',
  12
+}
  13
+var LATIN_SYMBOLS_MAP =
  14
+{
  15
+    '©':'(c)',
  16
+}
  17
+var GREEK_MAP =
  18
+{
  19
+    'α':'a', 'β':'b', 'γ':'g', 'δ':'d', 'ε':'e', 'ζ':'z', 'η':'h', 'θ':'8',
  20
+    'ι':'i', 'κ':'k', 'λ':'l', 'μ':'m', 'ν':'n', 'ξ':'3', 'ο':'o', 'π':'p',
  21
+    'ρ':'r', 'σ':'s', 'τ':'t', 'υ':'y', 'φ':'f', 'χ':'x', 'ψ':'ps', 'ω':'w',
  22
+    'ά':'a', 'έ':'e', 'ί':'i', 'ό':'o', 'ύ':'y', 'ή':'h', 'ώ':'w', 'ς':'s',
  23
+    'ϊ':'i', 'ΰ':'y', 'ϋ':'y', 'ΐ':'i',
  24
+    'Α':'A', 'Β':'B', 'Γ':'G', 'Δ':'D', 'Ε':'E', 'Ζ':'Z', 'Η':'H', 'Θ':'8',
  25
+    'Ι':'I', 'Κ':'K', 'Λ':'L', 'Μ':'M', 'Ν':'N', 'Ξ':'3', 'Ο':'O', 'Π':'P',
  26
+    'Ρ':'R', 'Σ':'S', 'Τ':'T', 'Υ':'Y', 'Φ':'F', 'Χ':'X', 'Ψ':'PS', 'Ω':'W',
  27
+    'Ά':'A', 'Έ':'E', 'Ί':'I', 'Ό':'O', 'Ύ':'Y', 'Ή':'H', 'Ώ':'W', 'Ϊ':'I',
  28
+    'Ϋ':'Y'
  29
+}
  30
+var TURKISH_MAP = {
  31
+    'ş':'s', 'Ş':'S', 'ı':'i', 'İ':'I', 'ç':'c', 'Ç':'C', 'ü':'u', 'Ü':'U',
  32
+    'ö':'o', 'Ö':'O', 'ğ':'g', 'Ğ':'G',
  33
+}
  34
+// var RUSSIAN_MAP =
  35
+// {
  36
+// }
  37
+
  38
+var ALL_DOWNCODE_MAPS=new Array()
  39
+ALL_DOWNCODE_MAPS[0]=LATIN_MAP
  40
+ALL_DOWNCODE_MAPS[1]=LATIN_SYMBOLS_MAP
  41
+ALL_DOWNCODE_MAPS[2]=GREEK_MAP
  42
+ALL_DOWNCODE_MAPS[3]=TURKISH_MAP
  43
+//ALL_DOWNCODE_MAPS[4]=RUSSIAN_MAP
  44
+
  45
+var Downcoder = new Object();
  46
+Downcoder.Initialize = function()
  47
+{
  48
+    if (Downcoder.map) // already made
  49
+        return ;
  50
+    Downcoder.map ={}
  51
+    Downcoder.chars = '' ;
  52
+    for(var i in ALL_DOWNCODE_MAPS)
  53
+    {
  54
+        var lookup = ALL_DOWNCODE_MAPS[i]
  55
+        for (var c in lookup)
  56
+        {
  57
+            Downcoder.map[c] = lookup[c] ;
  58
+            Downcoder.chars += c ;
  59
+        }
  60
+     }
  61
+    Downcoder.regex = new RegExp('[' + Downcoder.chars + ']|[^' + Downcoder.chars + ']+','g') ;
  62
+}
  63
+
  64
+downcode= function( slug )
  65
+{
  66
+    Downcoder.Initialize() ;
  67
+    var downcoded =""
  68
+    var pieces = slug.match(Downcoder.regex);
  69
+    if(pieces)
  70
+    {
  71
+        for (var i = 0 ; i < pieces.length ; i++)
  72
+        {
  73
+            if (pieces[i].length == 1)
  74
+            {
  75
+                var mapped = Downcoder.map[pieces[i]] ;
  76
+                if (mapped != null)
  77
+                {
  78
+                    downcoded+=mapped;
  79
+                    continue ;
  80
+                }
  81
+            }
  82
+            downcoded+=pieces[i];
  83
+        }
  84
+    }
  85
+    else
  86
+    {
  87
+        downcoded = slug;
  88
+    }
  89
+    return downcoded;
  90
+}
  91
+
  92
+
1 93
 function URLify(s, num_chars) {
2 94
     // changes, e.g., "Petty theft" to "petty_theft"
3 95
     // remove all these words from the string before urlifying
  96
+    s = downcode(s);
4 97
     removelist = ["a", "an", "as", "at", "before", "but", "by", "for", "from",
5 98
                   "is", "in", "into", "like", "of", "off", "on", "onto", "per",
6 99
                   "since", "than", "the", "this", "that", "to", "up", "via",
7 100
                   "with"];
8 101
     r = new RegExp('\\b(' + removelist.join('|') + ')\\b', 'gi');
9 102
     s = s.replace(r, '');
  103
+    // if downcode doesn't hit, the char will be stripped here
10 104
     s = s.replace(/[^-\w\s]/g, '');  // remove unneeded chars
11 105
     s = s.replace(/^\s+|\s+$/g, ''); // trim leading/trailing spaces
12 106
     s = s.replace(/[-\s]+/g, '-');   // convert spaces to hyphens
13 107
     s = s.toLowerCase();             // convert to lowercase
14 108
     return s.substring(0, num_chars);// trim to first num_chars chars
15 109
 }
  110
+
11  django/template/defaultfilters.py
@@ -115,10 +115,13 @@ def make_list(value):
115 115
 make_list = stringfilter(make_list)
116 116
 
117 117
 def slugify(value):
118  
-    "Converts to lowercase, removes non-alpha chars and converts spaces to hyphens"
119  
-    # Don't compile patterns as unicode because \w then would mean any letter.
120  
-    # Slugify is effectively a conversion to ASCII.
121  
-    value = re.sub('[^\w\s-]', '', value).strip().lower()
  118
+    """
  119
+    Normalizes string, converts to lowercase, removes non-alpha chars and
  120
+    converts spaces to hyphens.
  121
+    """
  122
+    import unicodedata
  123
+    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
  124
+    value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
122 125
     return re.sub('[-\s]+', '-', value)
123 126
 slugify = stringfilter(slugify)
124 127
 
3  tests/regressiontests/defaultfilters/tests.py
@@ -67,6 +67,9 @@
67 67
 >>> slugify(' Jack & Jill like numbers 1,2,3 and 4 and silly characters ?%.$!/')
68 68
 u'jack-jill-like-numbers-123-and-4-and-silly-characters'
69 69
 
  70
+>>> slugify(u"Un \xe9l\xe9phant \xe0 l'or\xe9e du bois")
  71
+u'un-elephant-a-loree-du-bois'
  72
+
70 73
 >>> stringformat(1, u'03d')
71 74
 u'001'
72 75
 

0 notes on commit 5664a67

Please sign in to comment.
Something went wrong with that request. Please try again.