Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 137 additions & 76 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,14 @@ def init_chardet(self) -> None:

self.encdetector = UniversalDetector()

def open(self, filename: str) -> tuple[list[str], str]:
def open(self, filename: str) -> tuple[list[tuple[bool, int, list[str]]], str]:
if self.use_chardet:
return self.open_with_chardet(filename)
return self.open_with_internal(filename)

def open_with_chardet(self, filename: str) -> tuple[list[str], str]:
def open_with_chardet(
self, filename: str
) -> tuple[list[tuple[bool, int, list[str]]], str]:
self.encdetector.reset()
with open(filename, "rb") as fb:
for line in fb:
Expand All @@ -259,7 +261,9 @@ def open_with_chardet(self, filename: str) -> tuple[list[str], str]:

return lines, f.encoding

def open_with_internal(self, filename: str) -> tuple[list[str], str]:
def open_with_internal(
self, filename: str
) -> tuple[list[tuple[bool, int, list[str]]], str]:
encoding = None
first_try = True
for encoding in ("utf-8", "iso-8859-1"):
Expand All @@ -286,21 +290,25 @@ def open_with_internal(self, filename: str) -> tuple[list[str], str]:

return lines, encoding

def get_lines(self, f: TextIO) -> list[str]:
def get_lines(self, f: TextIO) -> list[tuple[bool, int, list[str]]]:
fragments = []
line_number = 0
if self.ignore_multiline_regex:
text = f.read()
pos = 0
text2 = ""
for m in re.finditer(self.ignore_multiline_regex, text):
text2 += text[pos : m.start()]
# Replace with blank lines so line numbers are unchanged.
text2 += "\n" * m.group().count("\n")
lines = text[pos : m.start()].splitlines(True)
fragments.append((False, line_number, lines))
line_number += len(lines)
lines = m.group().splitlines(True)
fragments.append((True, line_number, lines))
line_number += len(lines) - 1
pos = m.end()
text2 += text[pos:]
lines = text2.split("\n")
lines = text[pos:].splitlines(True)
fragments.append((False, line_number, lines))
else:
lines = f.readlines()
return lines
fragments.append((False, line_number, f.readlines()))
return fragments


# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
Expand Down Expand Up @@ -869,86 +877,31 @@ def apply_uri_ignore_words(
return check_matches


def parse_file(
def parse_lines(
fragment: tuple[bool, int, list[str]],
filename: str,
colors: TermColors,
summary: Optional[Summary],
misspellings: dict[str, Misspelling],
ignore_words_cased: set[str],
exclude_lines: set[str],
file_opener: FileOpener,
word_regex: Pattern[str],
ignore_word_regex: Optional[Pattern[str]],
uri_regex: Pattern[str],
uri_ignore_words: set[str],
context: Optional[tuple[int, int]],
options: argparse.Namespace,
) -> int:
) -> tuple[int, bool]:
bad_count = 0
lines = None
changed = False

if filename == "-":
f = sys.stdin
encoding = "utf-8"
lines = f.readlines()
else:
if options.check_filenames:
for word in extract_words(filename, word_regex, ignore_word_regex):
if word in ignore_words_cased:
continue
lword = word.lower()
if lword not in misspellings:
continue
fix = misspellings[lword].fix
fixword = fix_case(word, misspellings[lword].data)

if summary and fix:
summary.update(lword)

cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"

reason = misspellings[lword].reason
if reason:
if options.quiet_level & QuietLevels.DISABLED_FIXES:
continue
creason = f" | {colors.FILE}{reason}{colors.DISABLE}"
else:
if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
continue
creason = ""

bad_count += 1

print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")

# ignore irregular files
if not os.path.isfile(filename):
return bad_count

try:
text = is_text_file(filename)
except PermissionError as e:
print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr)
return bad_count
except OSError:
return bad_count

if not text:
if not options.quiet_level & QuietLevels.BINARY_FILE:
print(f"WARNING: Binary file: {filename}", file=sys.stderr)
return bad_count
try:
lines, encoding = file_opener.open(filename)
except OSError:
return bad_count
_, fragment_line_number, lines = fragment

for i, line in enumerate(lines):
line = line.rstrip()
if not line or line in exclude_lines:
continue
line_number = fragment_line_number + i

extra_words_to_ignore = set()
match = inline_ignore_regex.search(line)
Expand Down Expand Up @@ -1035,7 +988,7 @@ def parse_file(
continue

cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
cline = f"{colors.FILE}{i + 1}{colors.DISABLE}"
cline = f"{colors.FILE}{line_number + 1}{colors.DISABLE}"
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"

Expand Down Expand Up @@ -1067,19 +1020,127 @@ def parse_file(
f"==> {crightword}{creason}"
)

return bad_count, changed


def parse_file(
filename: str,
colors: TermColors,
summary: Optional[Summary],
misspellings: dict[str, Misspelling],
ignore_words_cased: set[str],
exclude_lines: set[str],
file_opener: FileOpener,
word_regex: Pattern[str],
ignore_word_regex: Optional[Pattern[str]],
uri_regex: Pattern[str],
uri_ignore_words: set[str],
context: Optional[tuple[int, int]],
options: argparse.Namespace,
) -> int:
bad_count = 0
fragments = None

# Read lines.
if filename == "-":
f = sys.stdin
encoding = "utf-8"
fragments = file_opener.get_lines(f)
else:
if options.check_filenames:
for word in extract_words(filename, word_regex, ignore_word_regex):
if word in ignore_words_cased:
continue
lword = word.lower()
if lword not in misspellings:
continue
fix = misspellings[lword].fix
fixword = fix_case(word, misspellings[lword].data)

if summary and fix:
summary.update(lword)

cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"

reason = misspellings[lword].reason
if reason:
if options.quiet_level & QuietLevels.DISABLED_FIXES:
continue
creason = f" | {colors.FILE}{reason}{colors.DISABLE}"
else:
if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
continue
creason = ""

bad_count += 1

print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")

# ignore irregular files
if not os.path.isfile(filename):
return bad_count

try:
text = is_text_file(filename)
except PermissionError as e:
print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr)
return bad_count
except OSError:
return bad_count

if not text:
if not options.quiet_level & QuietLevels.BINARY_FILE:
print(f"WARNING: Binary file: {filename}", file=sys.stderr)
return bad_count
try:
fragments, encoding = file_opener.open(filename)
except OSError:
return bad_count

# Parse lines.
changed = False
for fragment in fragments:
ignore, _, _ = fragment
if ignore:
continue

bad_count_update, changed_update = parse_lines(
fragment,
filename,
colors,
summary,
misspellings,
ignore_words_cased,
exclude_lines,
word_regex,
ignore_word_regex,
uri_regex,
uri_ignore_words,
context,
options,
)
bad_count += bad_count_update
changed = changed or changed_update

# Write out lines, if changed.
if changed:
if filename == "-":
print("---")
for line in lines:
print(line, end="")
for _, _, lines in fragments:
for line in lines:
print(line, end="")
else:
if not options.quiet_level & QuietLevels.FIXES:
print(
f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}",
file=sys.stderr,
)
with open(filename, "w", encoding=encoding, newline="") as f:
f.writelines(lines)
for _, _, lines in fragments:
f.writelines(lines)

return bad_count


Expand Down
62 changes: 50 additions & 12 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,19 +952,19 @@ def test_ignore_multiline_regex_option(
assert code == EX_USAGE
assert "usage:" in stdout

text = """
Please see http://example.com/abandonned for info
# codespell:ignore-begin
'''
abandonned
abandonned
'''
# codespell:ignore-end
abandonned
"""

fname = tmp_path / "flag.txt"
fname.write_text(
"""
Please see http://example.com/abandonned for info
# codespell:ignore-begin
'''
abandonned
abandonned
'''
# codespell:ignore-end
abandonned
"""
)
fname.write_text(text)
assert cs.main(fname) == 4
assert (
cs.main(
Expand All @@ -975,6 +975,44 @@ def test_ignore_multiline_regex_option(
== 2
)

with FakeStdin(text):
assert (
cs.main(
"-",
"--ignore-multiline-regex",
"codespell:ignore-begin.*codespell:ignore-end",
)
== 2
)

fname.write_text("This\nThsi")
cs.main(
fname,
"-w",
"--ignore-multiline-regex",
"codespell:ignore-begin.*codespell:ignore-end",
)
assert fname.read_text() == "This\nThis"

fname.write_text(text)
cs.main(
fname,
"-w",
"--ignore-multiline-regex",
"codespell:ignore-begin.*codespell:ignore-end",
)
fixed_text = """
Please see http://example.com/abandoned for info
# codespell:ignore-begin
'''
abandonned
abandonned
'''
# codespell:ignore-end
abandoned
"""
assert fname.read_text() == fixed_text


def test_uri_regex_option(
tmp_path: Path,
Expand Down
Loading