intgr · intgr · Sep 8, 2020 · Sep 8, 2020
diff --git a/README.rst b/README.rst
@@ -1,11 +1,11 @@
 Topy
 ====
 .. image:: https://badge.fury.io/py/topy.svg
-   :target: http://badge.fury.io/py/topy
+   :target: https://badge.fury.io/py/topy
 
 .. image:: https://travis-ci.org/intgr/topy.svg?branch=master
    :alt: Travis CI
-   :target: http://travis-ci.org/intgr/topy
+   :target: https://travis-ci.org/intgr/topy
 
 Topy (anagram of "typo") is a Python script to fix typos in text, using rulesets developed by the RegExTypoFix_ project
 from Wikipedia. The English ruleset is included with Topy and is used by default. Other rulesets can be manually

diff --git a/setup.py b/setup.py
@@ -23,10 +23,10 @@
         'Intended Audience :: Developers',
         'License :: OSI Approved :: MIT License',
         # Until we have a test suite we're conservative about Python version compatibility claims
-        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
         'Topic :: Documentation',
         'Topic :: Software Development :: Quality Assurance',
         'Topic :: Text Processing :: Filters',

diff --git a/tests/test_cmd.py b/tests/test_cmd.py
@@ -1,7 +1,5 @@
 """Functional tests using the command line interface"""
 
-from __future__ import unicode_literals
-
 import os
 import shutil
 import tempfile

diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -1,8 +1,5 @@
-# -*- coding: utf-8 -*-
 """Unit tests for internal functions"""
 
-from __future__ import unicode_literals
-
 import unittest
 
 try:
@@ -49,7 +46,7 @@ def test_print_diff(self):
         # Unicode filename
         filename = 'ünicöde.txt'
         self.diff_inner(
-            filename.encode('utf8') if topy.PY2 else filename,
+            filename,
             "Foobar\n",
             "Foobaz\n",
             """\
@@ -63,7 +60,7 @@ def test_print_diff(self):
         # Filename with invalid characters
         filename = b'foo\xffbar.txt'
         self.diff_inner(
-            filename if topy.PY2 else filename.decode('utf8', 'surrogateescape'),
+            filename.decode(errors='surrogateescape'),
             "Foobar\n",
             "Foobaz\n",
             """\
@@ -75,9 +72,6 @@ def test_print_diff(self):
 """)
 
     def diff_inner(self, filename, old, new, expected):
-        if topy.PY2:
-            expected = expected.encode('utf8')
-
         out = StringIO()
         topy.print_diff(topy.sanitize_filename(filename), old, new, out)
         diff = out.getvalue()

diff --git a/topy/topy.py b/topy/topy.py
@@ -3,19 +3,15 @@
 Topy (anagram of "typo") is a Python script to fix typos in text, based on
 the RegExTypoFix project from Wikipedia and AutoWikiBrowser.
 
-Topy requires BeautifulSoup version 4 and runs with either Python 2 and 3.
+Topy requires BeautifulSoup version 4 and runs with Python 3.5+
 
 Usage: ./topy.py /path/to/files
-NB! Files will be changed in place (overwritten)
 
 See:
 * https://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos
 * https://github.com/intgr/topy
 """
 
-# TODO: clean this crappy code up!
-
-from __future__ import unicode_literals
 import sys
 import logging
 import os
@@ -27,7 +23,6 @@
 
 
 RETF_FILENAME = 'retf.txt'
-ENCODING = 'utf8'
 
 # some rules are not working with regex or are not useful
 disabled = {
@@ -48,7 +43,6 @@
 }
 
 log = logging.getLogger('topy')
-PY2 = sys.version_info[0] <= 2
 
 
 def parse_replacement(replace):
@@ -105,8 +99,8 @@ def read_text_file(filename):
     """Reads file `filename` and returns contents as Unicode string. On failure, returns None and logs error."""
 
     try:
-        with open(filename, 'rb') as f:
-            return f.read().decode(ENCODING)
+        with open(filename, 'r') as f:
+            return f.read()
     except (IOError, OSError) as err:
         log.error("Cannot open %r: %s", filename, err)
     except UnicodeDecodeError:
@@ -120,29 +114,15 @@ def read_text_file(filename):
 def sanitize_filename(filename):
     """Converts `filename` to unicode, replaces invalid (un-encodable) characters."""
 
-    if PY2:
-        # This may break on Windows with Unicode filenames? Please tell me how to fix it if anyone out there cares.
-        if isinstance(filename, str):
-            # noinspection PyUnresolvedReferences
-            filename = filename.decode(sys.getfilesystemencoding() or ENCODING, 'replace')
-        return filename
-    else:
-        # Input filename is always unicode with surrogate escapes.
-        return filename.encode('utf8', 'surrogateescape').decode('utf8', 'replace')
+    # Input filename is always unicode with surrogate escapes.
+    return filename.encode(errors='surrogateescape').decode(errors='replace')
 
 
 def print_diff(filename, old, new, stream=sys.stdout):
     """Diffs the `old` and `new` strings and prints as unified diff to file-like object `stream`."""
 
     # TODO: color output for terminals
-    if PY2:
-        # On Python 2, unified_diff() requires non-Unicode str
-        filename = filename.encode(ENCODING)
     lines = unified_diff(old.splitlines(True), new.splitlines(True), filename, filename)
-    if PY2:
-        # Encode lines that aren't already str
-        lines = (line if isinstance(line, str) else line.encode(ENCODING)
-                 for line in lines)
     stream.writelines(lines)
 
 
@@ -172,8 +152,8 @@ def handle_file(regs, filename):
     if replaced > 0:
         if opts.apply:
             log.info("Writing %s", safe_name)
-            with open(filename, 'wb') as f:
-                f.write(text.encode(ENCODING))
+            with open(filename, 'w') as f:
+                f.write(text)
         else:
             print_diff(safe_name, oldtext, text)
 
@@ -183,10 +163,9 @@ def walk_dir_tree(dirpath):
 
     for root, dirs, files in os.walk(dirpath):
         # Modify 'dirs' list in place, so walk() doesn't recurse into them
-        # str(".") fixes issue #14: Python 2 has non-Unicode str pathnames, Python 3 uses Unicode
-        dirs[:] = (d for d in dirs if not d.startswith(str(".")))
+        dirs[:] = (d for d in dirs if not d.startswith("."))
         for f in files:
-            if not f.startswith(str(".")):
+            if not f.startswith("."):
                 yield os.path.join(root, f)
 
 
@@ -195,9 +174,7 @@ def flatten_files(paths):
 
     for path in paths:
         if os.path.isdir(path):
-            # Once we can drop Python < 3.3 support, this should use 'yield from'
-            for filename in walk_dir_tree(path):
-                yield filename
+            yield from walk_dir_tree(path)
         else:
             # Filename, or the path cannot be accessed (privilege errors, file not found, etc)
             yield path