Skip to content

Commit

Permalink
Add global "locale" config option and make key-ordering rule locale-a…
Browse files Browse the repository at this point in the history
…ware

Support sorting by locale with strcoll(). Properly handle case and accents.
  • Loading branch information
wolfgangwalther committed Jul 15, 2020
1 parent a221898 commit 9e90c77
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 16 deletions.
19 changes: 19 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,22 @@ Here is a more complex example:
ignore: |
*.ignore-trailing-spaces.yaml
ascii-art/*
Setting the locale
------------------

It is possible to set the ``locale`` option globally. This is passed to Python's
`locale.setlocale
<https://docs.python.org/3/library/locale.html#locale.setlocale>`_,
so an empty string ``""`` will use the system default locale, while e.g.
``"en_US.UTF-8"`` will use that. If unset, the default is ``"C.UTF-8"``.

Currently this only affects the ``key-ordering`` rule. The default will order
by Unicode code point number, while other locales will sort case and accents
properly as well.

.. code-block:: yaml
extends: default
locale: en_US.UTF-8
42 changes: 38 additions & 4 deletions tests/rules/test_key_ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import locale

from tests.common import RuleTestCase


Expand Down Expand Up @@ -103,14 +105,46 @@ def test_accents(self):
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))
self.check('---\n'
'haïr: true\n'
'hais: true\n', conf,
problem=(3, 1))

def test_key_tokens_in_flow_sequences(self):
conf = 'key-ordering: enable'
self.check('---\n'
'[\n'
' key: value, mappings, in, flow: sequence\n'
']\n', conf)

def test_locale_case(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8')
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
't-shirt: 1\n'
'T-shirt: 2\n'
't-shirts: 3\n'
'T-shirts: 4\n', conf)
self.check('---\n'
't-shirt: 1\n'
't-shirts: 2\n'
'T-shirt: 3\n'
'T-shirts: 4\n', conf,
problem=(4, 1))

def test_locale_accents(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8')
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')
conf = ('key-ordering: enable')
self.check('---\n'
'hair: true\n'
'haïr: true\n'
'hais: true\n'
'haïssable: true\n', conf)
self.check('---\n'
'hais: true\n'
'haïr: true\n', conf,
problem=(3, 1))
57 changes: 48 additions & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ def setUpClass(cls):
# dos line endings yaml
'dos.yml': '---\r\n'
'dos: true',
# different key-ordering by locale
'c.yaml': '---\n'
'A: true\n'
'a: true',
'en.yaml': '---\n'
'a: true\n'
'A: true'
})

@classmethod
Expand All @@ -108,8 +115,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/directory.yaml/empty.yml'),
os.path.join(self.wd, 'sub/ok.yaml'),
Expand Down Expand Up @@ -146,6 +155,8 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
os.path.join(self.wd, 'sub/ok.yaml'),
os.path.join(self.wd, 'warn.yaml')]
Expand Down Expand Up @@ -175,8 +186,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
Expand All @@ -194,8 +207,10 @@ def test_find_files_recursively(self):
self.assertEqual(
sorted(cli.find_files_recursively([self.wd], conf)),
[os.path.join(self.wd, 'a.yaml'),
os.path.join(self.wd, 'c.yaml'),
os.path.join(self.wd, 'dos.yml'),
os.path.join(self.wd, 'empty.yml'),
os.path.join(self.wd, 'en.yaml'),
os.path.join(self.wd, 'no-yaml.json'),
os.path.join(self.wd, 'non-ascii/éçäγλνπ¥/utf-8'),
os.path.join(self.wd, 's/s/s/s/s/s/s/s/s/s/s/s/s/s/s/file.yaml'),
Expand Down Expand Up @@ -315,6 +330,39 @@ def test_run_with_user_yamllint_config_file_in_env(self):
cli.run((os.path.join(self.wd, 'a.yaml'), ))
self.assertEqual(ctx.returncode, 1)

def test_run_with_locale(self):
self.addCleanup(locale.setlocale, locale.LC_ALL, 'C.UTF-8')
try:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
except locale.Error:
self.skipTest('locale en_US.UTF-8 not available')

# C + en.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 1)

# en_US + en.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'en.yaml')))
self.assertEqual(ctx.returncode, 0)

# en_US + c.yaml should fail
with RunContext(self) as ctx:
cli.run(('-d', 'locale: en_US.UTF-8\n'
'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 1)

# C + c.yaml should pass
with RunContext(self) as ctx:
cli.run(('-d', 'rules: { key-ordering: enable }',
os.path.join(self.wd, 'c.yaml')))
self.assertEqual(ctx.returncode, 0)

def test_run_version(self):
with RunContext(self) as ctx:
cli.run(('--version', ))
Expand Down Expand Up @@ -373,15 +421,6 @@ def test_run_empty_file(self):
def test_run_non_ascii_file(self):
path = os.path.join(self.wd, 'non-ascii', 'éçäγλνπ¥', 'utf-8')

# Make sure the default localization conditions on this "system"
# support UTF-8 encoding.
loc = locale.getlocale()
try:
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
except locale.Error:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
self.addCleanup(locale.setlocale, locale.LC_ALL, loc)

with RunContext(self) as ctx:
cli.run(('-f', 'parsable', path))
self.assertEqual((ctx.returncode, ctx.stdout, ctx.stderr), (0, '', ''))
Expand Down
3 changes: 3 additions & 0 deletions yamllint/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import argparse
import io
import locale
import os
import platform
import sys
Expand Down Expand Up @@ -175,6 +176,8 @@ def run(argv=None):
print(e, file=sys.stderr)
sys.exit(-1)

locale.setlocale(locale.LC_ALL, conf.locale)

max_level = 0

for file in find_files_recursively(args.files, conf):
Expand Down
8 changes: 8 additions & 0 deletions yamllint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(self, content=None, file=None):
self.yaml_files = pathspec.PathSpec.from_lines(
'gitwildmatch', ['*.yaml', '*.yml', '.yamllint'])

self.locale = 'C.UTF-8'

if file is not None:
with open(file) as f:
content = f.read()
Expand Down Expand Up @@ -111,6 +113,12 @@ def parse(self, raw_content):
self.yaml_files = pathspec.PathSpec.from_lines('gitwildmatch',
conf['yaml-files'])

if 'locale' in conf:
if not isinstance(conf['locale'], str):
raise YamlLintConfigError(
'invalid config: locale should be a string')
self.locale = conf['locale']

def validate(self):
for id in self.rules:
try:
Expand Down
25 changes: 22 additions & 3 deletions yamllint/rules/key_ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

"""
Use this rule to enforce alphabetical ordering of keys in mappings. The sorting
order uses the Unicode code point number. As a result, the ordering is
case-sensitive and not accent-friendly (see examples below).
order uses the Unicode code point number as a default. As a result, the
ordering is case-sensitive and not accent-friendly (see examples below).
This can be changed by setting the global ``locale`` option. This allows to
sort case and accents properly.
.. rubric:: Examples
Expand Down Expand Up @@ -63,8 +65,24 @@
- haïr: true
hais: true
#. With global option ``locale: "en_US.UTF-8"`` and rule ``key-ordering: {}``
as opposed to before, the following code snippet would now **PASS**:
::
- t-shirt: 1
T-shirt: 2
t-shirts: 3
T-shirts: 4
- hair: true
haïr: true
hais: true
haïssable: true
"""

from locale import strcoll

import yaml

from yamllint.linter import LintProblem
Expand Down Expand Up @@ -101,7 +119,8 @@ def check(conf, token, prev, next, nextnext, context):
# This check is done because KeyTokens can be found inside flow
# sequences... strange, but allowed.
if len(context['stack']) > 0 and context['stack'][-1].type == MAP:
if any(next.value < key for key in context['stack'][-1].keys):
if any(strcoll(next.value, key) < 0
for key in context['stack'][-1].keys):
yield LintProblem(
next.start_mark.line + 1, next.start_mark.column + 1,
'wrong ordering of key "%s" in mapping' % next.value)
Expand Down

0 comments on commit 9e90c77

Please sign in to comment.