Skip to content
Permalink
Browse files
Fix #2606, fix #3121 — use Babel for locales
Signed-off-by: Chris Warrick <kwpolska@gmail.com>
  • Loading branch information
Kwpolska committed Jul 28, 2018
1 parent b7632e5 commit 08f34a6352327eda97353e2170dc3c60526fa8e3
@@ -10,6 +10,9 @@ Features
find a file (i.e. when an 404 error occurs).
* Better error messages for JSON download failures in ``nikola
plugin`` and ``nikola theme`` (Issue getnikola/plugins#282)
* Use Babel instead of the locale module to better handle
localizations (Issues #2606, #3121)
* Change ``DATE_FORMAT`` formats to CLDR formats (Issue #2606)

Bugfixes
--------
@@ -176,11 +176,12 @@ TIMEZONE = ${TIMEZONE}
# FORCE_ISO8601 = False

# Date format used to display post dates. (translatable)
# (str used by datetime.datetime.strftime)
# DATE_FORMAT = '%Y-%m-%d %H:%M'
# Used by babel.dates, CLDR style: http://cldr.unicode.org/translation/date-time
# You can also use 'full', 'long', 'medium', or 'short'
# DATE_FORMAT = 'YYYY-MM-dd HH:mm'

# Date format used to display post dates, if local dates are used. (translatable)
# (str used by moment.js)
# Used by moment.js: https://momentjs.com/docs/#/displaying/format/
# JS_DATE_FORMAT = 'YYYY-MM-DD HH:mm'

# Date fanciness.
@@ -192,20 +193,9 @@ TIMEZONE = ${TIMEZONE}
# Your theme must support it, Bootstrap already does.
# DATE_FANCINESS = 0

# While Nikola can select a sensible locale for each language,
# sometimes explicit control can come handy.
# In this file we express locales in the string form that
# python's locales will accept in your OS, by example
# "en_US.utf8" in Unix-like OS, "English_United States" in Windows.
# LOCALES = dict mapping language --> explicit locale for the languages
# in TRANSLATIONS. You can omit one or more keys.
# LOCALE_FALLBACK = locale to use when an explicit locale is unavailable
# LOCALE_DEFAULT = locale to use for languages not mentioned in LOCALES; if
# not set the default Nikola mapping is used.

# Customize the locale/region used for a language.
# For example, to use British instead of US English: LOCALES = {'en': 'en_GB'}
# LOCALES = {}
# LOCALE_FALLBACK = None
# LOCALE_DEFAULT = None

# One or more folders containing files to be copied as-is into the output.
# The format is a dictionary of {source: relative destination}.
@@ -24,8 +24,8 @@
"Posts about %s": "Posts about %s",
"Posts by %s": "Posts by %s",
"Posts for year %s": "Posts for year %s",
"Posts for {month} {day}, {year}": "Posts for {month} {day}, {year}",
"Posts for {month} {year}": "Posts for {month} {year}",
"Posts for {month_day_year}": "Posts for {month_day_year}",
"Posts for {month_year}": "Posts for {month_year}",
"Previous post": "Previous post",
"Previous": "Previous",
"Publication date": "Publication date",
@@ -45,6 +45,4 @@
"Write your post here.": "Write your post here.",
"old posts, page %d": "old posts, page %d",
"page %d": "page %d",
"{month} {day}, {year}": "{month} {day}, {year}",
"{month} {year}": "{month} {year}",
}
@@ -6,7 +6,7 @@
"(active)": "(aktywne)",
"Also available in:": "Również dostępny w językach:",
"Archive": "Archiwum",
"Atom feed": "",
"Atom feed": "Kanał Atom",
"Authors": "Autorzy",
"Categories": "Kategorie",
"Comments": "Komentarze",
@@ -24,8 +24,8 @@
"Posts about %s": "Posty o %s",
"Posts by %s": "Posty autora %s",
"Posts for year %s": "Posty z roku %s",
"Posts for {month} {day}, {year}": "Posty z {day} {month} {year}",
"Posts for {month} {year}": "Posty z {month} {year}",
"Posts for {month_day_year}": "Posty z {month_day_year}",
"Posts for {month_year}": "Posty z {month_year:MMMM yyyy}",
"Previous post": "Poprzedni post",
"Previous": "Poprzednie",
"Publication date": "Data publikacji",
@@ -45,6 +45,4 @@
"Write your post here.": "Tu wpisz treść postu.",
"old posts, page %d": "stare posty, strona %d",
"page %d": "strona %d",
"{month} {day}, {year}": "{day} {month} {year}",
"{month} {year}": "{month} {year}",
}
@@ -32,7 +32,6 @@
from pkg_resources import resource_filename
import datetime
import functools
import locale
import operator
import os
import json
@@ -155,56 +154,6 @@
'zh_cn': 'Chinese (Simplified)',
'zh_tw': 'Chinese (Traditional)'
},
'_WINDOWS_LOCALE_GUESSES': {
# TODO incomplete
# some languages may need that the appropriate Microsoft Language Pack be installed.
"ar": "Arabic",
"az": "Azeri (Latin)",
"bg": "Bulgarian",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
# "eo": "Esperanto", # Not available
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian", # Persian
"fi": "Finnish",
"fr": "French",
"gl": "Galician",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"ko": "Korean",
"nb": "Norwegian", # Not Bokmål, as Windows doesn't find it for unknown reasons.
"nl": "Dutch",
"pa": "Punjabi",
"pl": "Polish",
"pt": "Portuguese_Portugal",
"pt_br": "Portuguese_Brazil",
"ru": "Russian",
"sk": "Slovak",
"sl": "Slovenian",
"sq": "Albanian",
"sr": "Serbian",
"sr_latin": "Serbian (Latin)",
"sv": "Swedish",
"te": "Telugu",
"th": "Thai",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"zh_cn": "Chinese_China", # Chinese (Simplified)
"zh_tw": "Chinese_Taiwan", # Chinese (Traditional)
},
'_TRANSLATIONS_WITH_COUNTRY_SPECIFIERS': {
# This dict is used in `init` in case of locales that exist with a
# country specifier. If there is no other locale that has the same
@@ -460,7 +409,7 @@ def __init__(self, **config):
'CREATE_SINGLE_ARCHIVE': False,
'CREATE_FULL_ARCHIVES': False,
'CREATE_DAILY_ARCHIVE': False,
'DATE_FORMAT': '%Y-%m-%d %H:%M',
'DATE_FORMAT': 'YYYY-MM-dd HH:mm',
'DISABLE_INDEXES': False,
'DISABLE_MAIN_ATOM_FEED': False,
'DISABLE_MAIN_RSS_FEED': False,
@@ -749,6 +698,18 @@ def __init__(self, **config):
utils.LOGGER.warn('The DISABLE_INDEXES_PLUGIN_RSS_FEED setting was renamed to DISABLE_MAIN_RSS_FEED.')
self.config['DISABLE_MAIN_RSS_FEED'] = self.config['DISABLE_INDEXES_PLUGIN_RSS_FEED']

for val in self.config['DATE_FORMAT'].values.values():
if '%' in val:
utils.LOGGER.error('The DATE_FORMAT setting needs to be upgraded.')
utils.LOGGER.notice("Nikola now uses CLDR-style date strings. http://cldr.unicode.org/translation/date-time")
utils.LOGGER.notice("Example: %Y-%m-%d %H:%M ==> YYYY-MM-dd HH:mm")
utils.LOGGER.notice("(note it’s different to what moment.js uses!)")
sys.exit(1)

# Silently upgrade LOCALES (remove encoding)
for k, v in self.config['LOCALES'].items():
self.config['LOCALES'][k] = v.split('.')[0]

if self.config.get('POSTS_SECTIONS'):
utils.LOGGER.warn("The sections feature has been removed and its functionality has been merged into categories.")
utils.LOGGER.warn("For more information on how to migrate, please read: https://getnikola.com/blog/upgrading-to-nikola-v8.html#sections-were-replaced-by-categories")
@@ -843,11 +804,7 @@ def __init__(self, **config):
self.default_lang = self.config['DEFAULT_LANG']
self.translations = self.config['TRANSLATIONS']

locale_fallback, locale_default, locales = sanitized_locales(
self.config.get('LOCALE_FALLBACK', None),
self.config.get('LOCALE_DEFAULT', None),
self.config.get('LOCALES', {}), self.translations)
utils.LocaleBorg.initialize(locales, self.default_lang)
utils.LocaleBorg.initialize(self.config.get('LOCALES', {}), self.default_lang)

# BASE_URL defaults to SITE_URL
if 'BASE_URL' not in self.config:
@@ -1232,6 +1189,7 @@ def _set_all_page_deps_from_config(self):
self.ALL_PAGE_DEPS['atom_filename_base'] = self.config.get('ATOM_FILENAME_BASE')
self.ALL_PAGE_DEPS['slug_author_path'] = self.config.get('SLUG_AUTHOR_PATH')
self.ALL_PAGE_DEPS['slug_tag_path'] = self.config.get('SLUG_TAG_PATH')
self.ALL_PAGE_DEPS['locale'] = self.config.get('LOCALE')

def _activate_plugins_of_category(self, category):
"""Activate all the plugins of a given category and return them."""
@@ -2682,168 +2640,3 @@ def generic_atom_renderer(self, lang, posts, context_source, kw, basename, class
def __repr__(self):
"""Representation of a Nikola site."""
return '<Nikola Site: {0!r}>'.format(self.config['BLOG_TITLE'](self.config['DEFAULT_LANG']))


def sanitized_locales(locale_fallback, locale_default, locales, translations):
"""Sanitize all locales availble in Nikola.
There will be one locale for each language in translations.
Locales for languages not in translations are ignored.
An explicit locale for a language can be specified in locales[language].
Locales at the input must be in the string style (like 'en', 'en.utf8'), and
the string can be unicode or bytes; at the output will be of type str, as
required by locale.setlocale.
Explicit but invalid locales are replaced with the sanitized locale_fallback
Languages with no explicit locale are set to
the sanitized locale_default if it was explicitly set
sanitized guesses compatible with v 6.0.4 if locale_default was None
NOTE: never use locale.getlocale(), it can return values that
locale.setlocale will not accept in Windows.
Examples: "Spanish", "French" can't do the full circle set / get / set
"""
if sys.platform != 'win32':
workaround_empty_LC_ALL_posix()

# locales for languages not in translations are ignored
extras = set(locales) - set(translations)
if extras:
msg = 'Unexpected languages in LOCALES, ignoring them: {0}'
utils.LOGGER.warn(msg.format(', '.join(extras)))
for lang in extras:
del locales[lang]

# py2x: get/setlocale related functions require the locale string as a str
# so convert
locale_fallback = str(locale_fallback) if locale_fallback else None
locale_default = str(locale_default) if locale_default else None
for lang in locales:
locales[lang] = str(locales[lang])

locale_fallback = valid_locale_fallback(locale_fallback)

# explicit but invalid locales are replaced with the sanitized locale_fallback
for lang in locales:
if not is_valid_locale(locales[lang]):
msg = 'Locale {0} for language {1} not accepted by python locale.'
utils.LOGGER.warn(msg.format(locales[lang], lang))
locales[lang] = locale_fallback

# languages with no explicit locale
missing = set(translations) - set(locales)
if locale_default:
# are set to the sanitized locale_default if it was explicitly set
if not is_valid_locale(locale_default):
msg = 'LOCALE_DEFAULT {0} could not be set, using {1}'
utils.LOGGER.warn(msg.format(locale_default, locale_fallback))
locale_default = locale_fallback
for lang in missing:
locales[lang] = locale_default
else:
# are set to sanitized guesses compatible with v 6.0.4 in Linux-Mac (was broken in Windows)
if sys.platform == 'win32':
guess_locale_fom_lang = guess_locale_from_lang_windows
else:
guess_locale_fom_lang = guess_locale_from_lang_posix
for lang in missing:
locale_n = guess_locale_fom_lang(lang)
if not locale_n:
locale_n = locale_fallback
msg = "Could not guess locale for language {0}, using locale {1}"
utils.LOGGER.warn(msg.format(lang, locale_n))
utils.LOGGER.warn("Please fix your OS locale configuration or use the LOCALES option in conf.py to specify your preferred locale.")
if sys.platform != 'win32':
utils.LOGGER.warn("Make sure to use an UTF-8 locale to ensure Unicode support.")
locales[lang] = locale_n

return locale_fallback, locale_default, locales


def is_valid_locale(locale_n):
"""Check if locale (type str) is valid."""
try:
locale.setlocale(locale.LC_ALL, locale_n)
return True
except locale.Error:
return False


def valid_locale_fallback(desired_locale=None):
"""Provide a default fallback_locale, a string that locale.setlocale will accept.
If desired_locale is provided must be of type str for py2x compatibility
"""
# Whenever fallbacks change, adjust test TestHarcodedFallbacksWork
candidates_windows = [str('English'), str('C')]
candidates_posix = [str('en_US.UTF-8'), str('C')]
candidates = candidates_windows if sys.platform == 'win32' else candidates_posix
if desired_locale:
candidates = list(candidates)
candidates.insert(0, desired_locale)
found_valid = False
for locale_n in candidates:
found_valid = is_valid_locale(locale_n)
if found_valid:
break
if not found_valid:
msg = 'Could not find a valid fallback locale, tried: {0}'
utils.LOGGER.warn(msg.format(candidates))
elif desired_locale and (desired_locale != locale_n):
msg = 'Desired fallback locale {0} could not be set, using: {1}'
utils.LOGGER.warn(msg.format(desired_locale, locale_n))
return locale_n


def guess_locale_from_lang_windows(lang):
"""Guess a locale, basing on Windows language."""
locale_n = str(LEGAL_VALUES['_WINDOWS_LOCALE_GUESSES'].get(lang, None))
if not is_valid_locale(locale_n):
locale_n = None
return locale_n


def guess_locale_from_lang_posix(lang):
"""Guess a locale, basing on POSIX system language."""
# compatibility v6.0.4
if is_valid_locale(str(lang)):
locale_n = str(lang)
else:
# Guess using locale.getdefaultlocale()
try:
# str() is the default string type: bytes on py2, unicode on py3
# only that type is accepted by the locale module
locale_n = str('.'.join(locale.getdefaultlocale()))
except (ValueError, TypeError):
locale_n = str()
# Use guess only if it’s the same language
if not locale_n.startswith(lang.lower()):
locale_n = str()
if not locale_n or not is_valid_locale(locale_n):
# this works in Travis when locale support set by Travis suggestion
locale_n = str((locale.normalize(lang).split('.')[0]) + '.UTF-8')
if not is_valid_locale(locale_n):
# http://thread.gmane.org/gmane.comp.web.nikola/337/focus=343
locale_n = str((locale.normalize(lang).split('.')[0]))
if not is_valid_locale(locale_n):
locale_n = None
return locale_n


def workaround_empty_LC_ALL_posix():
# clunky hack: we have seen some posix locales with all or most of LC_*
# defined to the same value, but with LC_ALL empty.
# Manually doing what we do here seems to work for nikola in that case.
# It is unknown if it will work when the LC_* aren't homogeneous
try:
lc_time = os.environ.get('LC_TIME', None)
lc_all = os.environ.get('LC_ALL', None)
if lc_time and not lc_all:
os.environ['LC_ALL'] = lc_time
except Exception:
pass
Loading

0 comments on commit 08f34a6

Please sign in to comment.