From 50f92addd3fb5faa3edce8492b7af15fffc03305 Mon Sep 17 00:00:00 2001 From: Chris Warrick Date: Sat, 11 Jul 2015 12:07:02 +0200 Subject: [PATCH] Fix #1885 -- always return unicode in slugify Signed-off-by: Chris Warrick --- CHANGES.txt | 1 + nikola/utils.py | 6 +++--- tests/test_slugify.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 tests/test_slugify.py diff --git a/CHANGES.txt b/CHANGES.txt index 1f1add10cf..b5e7d59d6a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,6 +4,7 @@ New in master Features -------- +* Always return unicode in slugify (Issue #1885) * Remove logging handlers (Issue #1797) * Add ``-d``, ``--detach`` option to ``nikola serve`` (Issue #1871) * Use provided teaser format (``*_READ_MORE_LINK``) with custom teaser text diff --git a/nikola/utils.py b/nikola/utils.py index 8d2778bd38..71f8c4ab2e 100644 --- a/nikola/utils.py +++ b/nikola/utils.py @@ -728,9 +728,9 @@ def slugify(value, force=False): if USE_SLUGIFY or force: # This is the standard state of slugify, which actually does some work. # It is the preferred style, especially for Western languages. - value = unidecode(value) - value = str(_slugify_strip_re.sub('', value).strip().lower()) - return _slugify_hyphenate_re.sub('-', value) + value = unicode_str(unidecode(value)) + value = _slugify_strip_re.sub('', value, re.UNICODE).strip().lower() + return _slugify_hyphenate_re.sub('-', value, re.UNICODE) else: # This is the “disarmed” state of slugify, which lets the user # have any character they please (be it regular ASCII with spaces, diff --git a/tests/test_slugify.py b/tests/test_slugify.py new file mode 100644 index 0000000000..857c93a81f --- /dev/null +++ b/tests/test_slugify.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +import nikola.utils + +def test_ascii(): + o = nikola.utils.slugify(u'abcdef') + assert o == u'abcdef' + assert isinstance(o, nikola.utils.unicode_str) + +def test_ascii_dash(): + o = nikola.utils.slugify(u'abc-def') + assert o == u'abc-def' + assert isinstance(o, nikola.utils.unicode_str) + +def test_pl(): + o = nikola.utils.slugify(u'ąbćdef') + assert o == u'abcdef' + assert isinstance(o, nikola.utils.unicode_str) + +def test_pl_dash(): + o = nikola.utils.slugify(u'ąbć-def') + assert o == u'abc-def' + assert isinstance(o, nikola.utils.unicode_str) + +def test_disarmed(): + nikola.utils.USE_SLUGIFY = False + o = nikola.utils.slugify(u'ąbć-def') + assert o == u'ąbć-def' + assert isinstance(o, nikola.utils.unicode_str) + nikola.utils.USE_SLUGIFY = True + +def test_disarmed_weird(): + nikola.utils.USE_SLUGIFY = False + o = nikola.utils.slugify(u'ąbć-def "Hello World"?#Hl/l\\o:W\'o\rr*l\td|!\n') + assert o == u'ąbć-def -Hello World---H-e-l-l-o-W-o-r-l-d-!-' + assert isinstance(o, nikola.utils.unicode_str) + nikola.utils.USE_SLUGIFY = True