From 212f0d3c3f250817cd20ce0798b537954ba219d9 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Wed, 26 Jul 2023 19:28:58 -0500 Subject: [PATCH] Use pre-commit to sort the dictionaries This changes the sort order of specific characters (like `"_"`) but is cross-platform and can be fixed by pre-commit.ci. Closes #2689 --- .pre-commit-config.yaml | 3 +++ Makefile | 8 +------- codespell_lib/data/dictionary.txt | 14 +++++++------- codespell_lib/tests/test_dictionary.py | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 491371d902..08f82000d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,6 +46,9 @@ repos: - id: fix-byte-order-marker - id: check-case-conflict - id: check-toml + - id: file-contents-sorter + files: dictionary.*\.txt$ + args: [--ignore-case] - repo: https://github.com/adrienverge/yamllint.git rev: v1.32.0 hooks: diff --git a/Makefile b/Makefile index 0c239e5938..18cf94cf2b 100644 --- a/Makefile +++ b/Makefile @@ -14,10 +14,6 @@ codespell.1: codespell.1.include Makefile check-dictionaries: @for dictionary in ${DICTIONARIES}; do \ - if ! LC_ALL=C sort ${SORT_ARGS} -c $$dictionary; then \ - echo "Dictionary $$dictionary not sorted. Sort with 'make sort-dictionaries'"; \ - exit 1; \ - fi; \ if grep -E -n "^\s*$$|\s$$|^\s" $$dictionary; then \ echo "Dictionary $$dictionary contains leading/trailing whitespace and/or blank lines. Trim with 'make trim-dictionaries'"; \ exit 1; \ @@ -31,9 +27,7 @@ check-dictionaries: fi sort-dictionaries: - @for dictionary in ${DICTIONARIES}; do \ - LC_ALL=C sort ${SORT_ARGS} -u -o $$dictionary $$dictionary; \ - done + pre-commit run --all-files file-contents-sorter trim-dictionaries: @for dictionary in ${DICTIONARIES}; do \ diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt index f2bad8f511..a54e70d18c 100644 --- a/codespell_lib/data/dictionary.txt +++ b/codespell_lib/data/dictionary.txt @@ -5,6 +5,9 @@ 3rt->3rd 3st->3rd 4rd->4th +__attribyte__->__attribute__ +__cpluspus->__cplusplus +__cpusplus->__cplusplus a-diaerers->a-diaereses aaccess->access aaccessibility->accessibility @@ -8230,9 +8233,9 @@ cllouds->clouds cloack->cloak cloacks->cloaks cloberring->clobbering +clock_getttime->clock_gettime clocksourc->clocksource clockwíse->clockwise -clock_getttime->clock_gettime clodes->closed, clothes, cloding->closing cloes->close @@ -11247,8 +11250,8 @@ cought->caught, cough, fought, coul->could could'nt->couldn't could't->couldn't -couldent->couldn't coulden`t->couldn't +couldent->couldn't couldn->could, couldn't, couldn;t->couldn't couldnt'->couldn't @@ -31326,6 +31329,7 @@ phsyically->physically phsyics->physics phtread->pthread phtreads->pthreads +phy_interace->phy_interface phyiscal->physical phyiscally->physically phyiscs->physics @@ -31352,7 +31356,6 @@ physisions->physicians physisist->physicist phython->python phyton->python -phy_interace->phy_interface piar->pair, pier, pliers, piars->pairs, piers, pliers, piblisher->publisher @@ -47125,8 +47128,8 @@ woudl->would woudn't->wouldn't would'nt->wouldn't would't->wouldn't -wouldent->wouldn't woulden`t->wouldn't +wouldent->wouldn't wouldn;t->wouldn't wouldnt'->wouldn't wouldnt->wouldn't @@ -47370,9 +47373,6 @@ zukeeni->zucchini zuser->user zylophone->xylophone zylophones->xylophones -__attribyte__->__attribute__ -__cpluspus->__cplusplus -__cpusplus->__cplusplus évaluate->evaluate сontain->contain сontained->contained diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index 059b7f46a4..8b41ad52ab 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -77,6 +77,21 @@ def test_dictionary_formatting( raise AssertionError("\n" + "\n".join(errors)) +@fname_params +def test_dictionary_sorting( + fname: str, + in_aspell: Tuple[bool, bool], + in_dictionary: Tuple[Iterable[str], Iterable[str]], +) -> None: + previous_line = None + with open(fname, encoding="utf-8") as file: + for current_line in file: + current_line = current_line.strip().lower() + if previous_line is not None: + assert previous_line < current_line, f"{fname} is not sorted" + previous_line = current_line + + def _check_aspell( phrase: str, msg: str,