diff --git a/README.rst b/README.rst index 36ae2fd..5900612 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,11 @@ Topy ==== .. image:: https://badge.fury.io/py/topy.svg - :target: http://badge.fury.io/py/topy + :target: https://badge.fury.io/py/topy .. image:: https://travis-ci.org/intgr/topy.svg?branch=master :alt: Travis CI - :target: http://travis-ci.org/intgr/topy + :target: https://travis-ci.org/intgr/topy Topy (anagram of "typo") is a Python script to fix typos in text, using rulesets developed by the RegExTypoFix_ project from Wikipedia. The English ruleset is included with Topy and is used by default. Other rulesets can be manually diff --git a/setup.py b/setup.py index 8f692ba..2218bea 100755 --- a/setup.py +++ b/setup.py @@ -23,10 +23,10 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', # Until we have a test suite we're conservative about Python version compatibility claims - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Topic :: Documentation', 'Topic :: Software Development :: Quality Assurance', 'Topic :: Text Processing :: Filters', diff --git a/tests/test_cmd.py b/tests/test_cmd.py index 9471ea0..c10ff63 100644 --- a/tests/test_cmd.py +++ b/tests/test_cmd.py @@ -1,7 +1,5 @@ """Functional tests using the command line interface""" -from __future__ import unicode_literals - import os import shutil import tempfile diff --git a/tests/test_unit.py b/tests/test_unit.py index b490ddc..1347ad3 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1,8 +1,5 @@ -# -*- coding: utf-8 -*- """Unit tests for internal functions""" -from __future__ import unicode_literals - import unittest try: @@ -49,7 +46,7 @@ def test_print_diff(self): # Unicode filename filename = 'ünicöde.txt' self.diff_inner( - filename.encode('utf8') if topy.PY2 else filename, + filename, "Foobar\n", "Foobaz\n", """\ @@ -63,7 +60,7 @@ def test_print_diff(self): # Filename with invalid characters filename = b'foo\xffbar.txt' self.diff_inner( - filename if topy.PY2 else filename.decode('utf8', 'surrogateescape'), + filename.decode(errors='surrogateescape'), "Foobar\n", "Foobaz\n", """\ @@ -75,9 +72,6 @@ def test_print_diff(self): """) def diff_inner(self, filename, old, new, expected): - if topy.PY2: - expected = expected.encode('utf8') - out = StringIO() topy.print_diff(topy.sanitize_filename(filename), old, new, out) diff = out.getvalue() diff --git a/topy/topy.py b/topy/topy.py index b6b85bd..1215dca 100755 --- a/topy/topy.py +++ b/topy/topy.py @@ -3,19 +3,15 @@ Topy (anagram of "typo") is a Python script to fix typos in text, based on the RegExTypoFix project from Wikipedia and AutoWikiBrowser. -Topy requires BeautifulSoup version 4 and runs with either Python 2 and 3. +Topy requires BeautifulSoup version 4 and runs with Python 3.5+ Usage: ./topy.py /path/to/files -NB! Files will be changed in place (overwritten) See: * https://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos * https://github.com/intgr/topy """ -# TODO: clean this crappy code up! - -from __future__ import unicode_literals import sys import logging import os @@ -27,7 +23,6 @@ RETF_FILENAME = 'retf.txt' -ENCODING = 'utf8' # some rules are not working with regex or are not useful disabled = { @@ -48,7 +43,6 @@ } log = logging.getLogger('topy') -PY2 = sys.version_info[0] <= 2 def parse_replacement(replace): @@ -105,8 +99,8 @@ def read_text_file(filename): """Reads file `filename` and returns contents as Unicode string. On failure, returns None and logs error.""" try: - with open(filename, 'rb') as f: - return f.read().decode(ENCODING) + with open(filename, 'r') as f: + return f.read() except (IOError, OSError) as err: log.error("Cannot open %r: %s", filename, err) except UnicodeDecodeError: @@ -120,29 +114,15 @@ def read_text_file(filename): def sanitize_filename(filename): """Converts `filename` to unicode, replaces invalid (un-encodable) characters.""" - if PY2: - # This may break on Windows with Unicode filenames? Please tell me how to fix it if anyone out there cares. - if isinstance(filename, str): - # noinspection PyUnresolvedReferences - filename = filename.decode(sys.getfilesystemencoding() or ENCODING, 'replace') - return filename - else: - # Input filename is always unicode with surrogate escapes. - return filename.encode('utf8', 'surrogateescape').decode('utf8', 'replace') + # Input filename is always unicode with surrogate escapes. + return filename.encode(errors='surrogateescape').decode(errors='replace') def print_diff(filename, old, new, stream=sys.stdout): """Diffs the `old` and `new` strings and prints as unified diff to file-like object `stream`.""" # TODO: color output for terminals - if PY2: - # On Python 2, unified_diff() requires non-Unicode str - filename = filename.encode(ENCODING) lines = unified_diff(old.splitlines(True), new.splitlines(True), filename, filename) - if PY2: - # Encode lines that aren't already str - lines = (line if isinstance(line, str) else line.encode(ENCODING) - for line in lines) stream.writelines(lines) @@ -172,8 +152,8 @@ def handle_file(regs, filename): if replaced > 0: if opts.apply: log.info("Writing %s", safe_name) - with open(filename, 'wb') as f: - f.write(text.encode(ENCODING)) + with open(filename, 'w') as f: + f.write(text) else: print_diff(safe_name, oldtext, text) @@ -183,10 +163,9 @@ def walk_dir_tree(dirpath): for root, dirs, files in os.walk(dirpath): # Modify 'dirs' list in place, so walk() doesn't recurse into them - # str(".") fixes issue #14: Python 2 has non-Unicode str pathnames, Python 3 uses Unicode - dirs[:] = (d for d in dirs if not d.startswith(str("."))) + dirs[:] = (d for d in dirs if not d.startswith(".")) for f in files: - if not f.startswith(str(".")): + if not f.startswith("."): yield os.path.join(root, f) @@ -195,9 +174,7 @@ def flatten_files(paths): for path in paths: if os.path.isdir(path): - # Once we can drop Python < 3.3 support, this should use 'yield from' - for filename in walk_dir_tree(path): - yield filename + yield from walk_dir_tree(path) else: # Filename, or the path cannot be accessed (privilege errors, file not found, etc) yield path