Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Python 2.x and <3.5 compatibility code #22

Merged
merged 1 commit into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Topy
====
.. image:: https://badge.fury.io/py/topy.svg
:target: http://badge.fury.io/py/topy
:target: https://badge.fury.io/py/topy

.. image:: https://travis-ci.org/intgr/topy.svg?branch=master
:alt: Travis CI
:target: http://travis-ci.org/intgr/topy
:target: https://travis-ci.org/intgr/topy

Topy (anagram of "typo") is a Python script to fix typos in text, using rulesets developed by the RegExTypoFix_ project
from Wikipedia. The English ruleset is included with Topy and is used by default. Other rulesets can be manually
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
# Until we have a test suite we're conservative about Python version compatibility claims
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Documentation',
'Topic :: Software Development :: Quality Assurance',
'Topic :: Text Processing :: Filters',
Expand Down
2 changes: 0 additions & 2 deletions tests/test_cmd.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Functional tests using the command line interface"""

from __future__ import unicode_literals

import os
import shutil
import tempfile
Expand Down
10 changes: 2 additions & 8 deletions tests/test_unit.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
"""Unit tests for internal functions"""

from __future__ import unicode_literals

import unittest

try:
Expand Down Expand Up @@ -49,7 +46,7 @@ def test_print_diff(self):
# Unicode filename
filename = 'ünicöde.txt'
self.diff_inner(
filename.encode('utf8') if topy.PY2 else filename,
filename,
"Foobar\n",
"Foobaz\n",
"""\
Expand All @@ -63,7 +60,7 @@ def test_print_diff(self):
# Filename with invalid characters
filename = b'foo\xffbar.txt'
self.diff_inner(
filename if topy.PY2 else filename.decode('utf8', 'surrogateescape'),
filename.decode(errors='surrogateescape'),
"Foobar\n",
"Foobaz\n",
"""\
Expand All @@ -75,9 +72,6 @@ def test_print_diff(self):
""")

def diff_inner(self, filename, old, new, expected):
if topy.PY2:
expected = expected.encode('utf8')

out = StringIO()
topy.print_diff(topy.sanitize_filename(filename), old, new, out)
diff = out.getvalue()
Expand Down
43 changes: 10 additions & 33 deletions topy/topy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,15 @@
Topy (anagram of "typo") is a Python script to fix typos in text, based on
the RegExTypoFix project from Wikipedia and AutoWikiBrowser.

Topy requires BeautifulSoup version 4 and runs with either Python 2 and 3.
Topy requires BeautifulSoup version 4 and runs with Python 3.5+

Usage: ./topy.py /path/to/files
NB! Files will be changed in place (overwritten)

See:
* https://en.wikipedia.org/wiki/Wikipedia:AutoWikiBrowser/Typos
* https://github.com/intgr/topy
"""

# TODO: clean this crappy code up!

from __future__ import unicode_literals
import sys
import logging
import os
Expand All @@ -27,7 +23,6 @@


RETF_FILENAME = 'retf.txt'
ENCODING = 'utf8'

# some rules are not working with regex or are not useful
disabled = {
Expand All @@ -48,7 +43,6 @@
}

log = logging.getLogger('topy')
PY2 = sys.version_info[0] <= 2


def parse_replacement(replace):
Expand Down Expand Up @@ -105,8 +99,8 @@ def read_text_file(filename):
"""Reads file `filename` and returns contents as Unicode string. On failure, returns None and logs error."""

try:
with open(filename, 'rb') as f:
return f.read().decode(ENCODING)
with open(filename, 'r') as f:
return f.read()
except (IOError, OSError) as err:
log.error("Cannot open %r: %s", filename, err)
except UnicodeDecodeError:
Expand All @@ -120,29 +114,15 @@ def read_text_file(filename):
def sanitize_filename(filename):
"""Converts `filename` to unicode, replaces invalid (un-encodable) characters."""

if PY2:
# This may break on Windows with Unicode filenames? Please tell me how to fix it if anyone out there cares.
if isinstance(filename, str):
# noinspection PyUnresolvedReferences
filename = filename.decode(sys.getfilesystemencoding() or ENCODING, 'replace')
return filename
else:
# Input filename is always unicode with surrogate escapes.
return filename.encode('utf8', 'surrogateescape').decode('utf8', 'replace')
# Input filename is always unicode with surrogate escapes.
return filename.encode(errors='surrogateescape').decode(errors='replace')


def print_diff(filename, old, new, stream=sys.stdout):
"""Diffs the `old` and `new` strings and prints as unified diff to file-like object `stream`."""

# TODO: color output for terminals
if PY2:
# On Python 2, unified_diff() requires non-Unicode str
filename = filename.encode(ENCODING)
lines = unified_diff(old.splitlines(True), new.splitlines(True), filename, filename)
if PY2:
# Encode lines that aren't already str
lines = (line if isinstance(line, str) else line.encode(ENCODING)
for line in lines)
stream.writelines(lines)


Expand Down Expand Up @@ -172,8 +152,8 @@ def handle_file(regs, filename):
if replaced > 0:
if opts.apply:
log.info("Writing %s", safe_name)
with open(filename, 'wb') as f:
f.write(text.encode(ENCODING))
with open(filename, 'w') as f:
f.write(text)
else:
print_diff(safe_name, oldtext, text)

Expand All @@ -183,10 +163,9 @@ def walk_dir_tree(dirpath):

for root, dirs, files in os.walk(dirpath):
# Modify 'dirs' list in place, so walk() doesn't recurse into them
# str(".") fixes issue #14: Python 2 has non-Unicode str pathnames, Python 3 uses Unicode
dirs[:] = (d for d in dirs if not d.startswith(str(".")))
dirs[:] = (d for d in dirs if not d.startswith("."))
for f in files:
if not f.startswith(str(".")):
if not f.startswith("."):
yield os.path.join(root, f)


Expand All @@ -195,9 +174,7 @@ def flatten_files(paths):

for path in paths:
if os.path.isdir(path):
# Once we can drop Python < 3.3 support, this should use 'yield from'
for filename in walk_dir_tree(path):
yield filename
yield from walk_dir_tree(path)
else:
# Filename, or the path cannot be accessed (privilege errors, file not found, etc)
yield path
Expand Down