Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion changetext/changetext.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ def change_text(text):
@log_exceptions()
def outer_change_text(text):
result = change_text(text)
get_logger().write("{}({!r}) -> {!r}".format(outer_change_text.__name__, text, result))
get_logger().write(f"{outer_change_text.__name__}({text!r}) -> {result!r}")
return result
2 changes: 1 addition & 1 deletion changetext/common_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ def init():
pass


@functools.lru_cache()
@functools.lru_cache
def get_state(object_id=None):
return ChangeTextState(object_id)
12 changes: 7 additions & 5 deletions changetext/corrector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class CorrectorRegistry:
def __init__(self):
self.changes = list()
self.changes = []

def register(self, regex=None, predicate=None):
def decorator(func):
Expand All @@ -16,10 +16,12 @@ def decorator(func):
if isinstance(regex, str):
regex = re.compile(regex)

predicate = lambda text: regex.search(text)
def predicate(text):
return regex.search(text)

if predicate is None:
predicate = lambda _text: True
def predicate(_text):
return True

self.changes.append((predicate, func))

Expand All @@ -40,7 +42,7 @@ def incremental_changes(self, text):
if predicate_result:
result = func(text, predicate_result)
if result:
get_logger().write("{}({!r}) -> {!r}".format(func.__name__, text, result))
get_logger().write(f"{func.__name__}({text!r}) -> {result!r}")
text = result or text

return text
Expand All @@ -51,7 +53,7 @@ def exclusive_changes(self, text):
if predicate_result:
result = func(text, predicate_result)
if result:
get_logger().write("{}({!r}) -> {!r}".format(func.__name__, text, result))
get_logger().write(f"{func.__name__}({text!r}) -> {result!r}")
return result or text

return text
48 changes: 24 additions & 24 deletions changetext/final_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

@final_changes.register(regex=r"Histories of (\w+) and (\w+)")
def corr_histories_of(_, search_result):
return "Истории о{} и {}".format(histories_adjs[search_result.group(1)], histories_adjs[search_result.group(2)])
return f"Истории о{histories_adjs[search_result.group(1)]} и {histories_adjs[search_result.group(2)]}"


possessive_adjectives = {"жаба": "жабий", "корова": "коровий", "медведь": "медвежий"}
Expand Down Expand Up @@ -117,7 +117,7 @@ def corr_container(text, _):
adjective = None
gender = get_gender(container, {"nomn"})
adjective = inflect_adjective(adjective, gender)
replacement_string = "{} {} ({})".format(container, containment, adjective)
replacement_string = f"{container} {containment} ({adjective})"
else:
words = of_material.split()
material = None
Expand All @@ -144,7 +144,7 @@ def corr_container(text, _):
material = "из " + " ".join(gen_case)
else:
material = of_material
replacement_string = "{} {} ({}".format(container, containment, material)
replacement_string = f"{container} {containment} ({material}"
if initial_string[-1] == ")":
replacement_string += ")"
text = text.replace(initial_string, replacement_string.capitalize())
Expand Down Expand Up @@ -245,7 +245,7 @@ def corr_of_material_item(text, _):
else:
replacement_string = " ".join(words) + " " + of_material
else:
raise ValueError("Unknown case: {!r}".format(text))
raise ValueError(f"Unknown case: {text!r}")

text = text.replace(initial_string, replacement_string)
return text
Expand Down Expand Up @@ -282,7 +282,7 @@ def corr_clothes(text, _):
search_result = re_clothes.search(text)
text = text.replace(
search_result.group(1),
"{} {} {}".format(search_result.group(4), search_result.group(3), to_genitive_case(search_result.group(2))),
f"{search_result.group(4)} {search_result.group(3)} {to_genitive_case(search_result.group(2))}",
)
text = text.replace("левый", "левая")
text = text.replace("правый", "правая")
Expand All @@ -307,7 +307,7 @@ def corr_prepared(text, search_result):
# >>> corr_prepared(" рубленная гигантский крот лёгкие")
# ' рубленные лёгкие гигантского крота'
groups = search_result.groups()
result = text.replace(groups[0], "{} {} {}".format(groups[1], groups[3], to_genitive_case(groups[2])))
result = text.replace(groups[0], f"{groups[1]} {groups[3]} {to_genitive_case(groups[2])}")
return result


Expand Down Expand Up @@ -414,14 +414,14 @@ def corr_weapon_trap_parts(text, search_result):
gender = get_main_word_gender(obj)
new_adj = inflect_as_adjective(adj, gender)
new_word_2 = inflect_adjective(make_adjective[material], gender)
text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, new_word_2, obj))
text = text.replace(search_result.group(0), f"{new_adj} {new_word_2} {obj}")
else:
material = " ".join(words[:3])
obj = " ".join(words[3:])
gender = get_main_word_gender(obj)
assert gender is not None
new_adj = inflect_as_adjective(adj, gender)
text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, obj, material))
text = text.replace(search_result.group(0), f"{new_adj} {obj} {material}")
return text


Expand Down Expand Up @@ -632,7 +632,7 @@ def corr_stopped_construction(_, search_result):
else:
gen_case_obj = to_genitive_case(obj)

return ("{} приостановили строительство {}.".format(subj, gen_case_obj)).capitalize()
return f"{subj} приостановили строительство {gen_case_obj}.".capitalize()


@final_changes.register(
Expand All @@ -655,7 +655,7 @@ def corr_relief(_, search_result):
if group1.split(" ")[0] == "Мёртвый":
text = "Мёртвое деревце ({})".format("".join(search_result.group(0).split(" ")[1:-1]))
else:
text = "Деревце ({})".format(group1)
text = f"Деревце ({group1})"
return text.capitalize()

if " " in group1:
Expand Down Expand Up @@ -688,7 +688,7 @@ def corr_relief(_, search_result):
text = "{} {} из {}".format(" ".join(first_words), obj, " ".join(words))
else:
material = group1
text = "{} из {}".format(obj, to_genitive_case(material))
text = f"{obj} из {to_genitive_case(material)}"

if "иза" in text:
text = text.replace(" иза", "")
Expand Down Expand Up @@ -722,7 +722,7 @@ def corr_adjective_relief(text, search_result):
gender = get_gender(obj)
new_word = inflect_adjective(adjective, gender, "nomn")
if new_word:
text = "{} {}".format(new_word, obj)
text = f"{new_word} {obj}"

return text.capitalize()

Expand Down Expand Up @@ -787,7 +787,7 @@ def corr_settlement(_, search_result):
name = search_result.group(3)

if len(adjective) == 0:
return "{} {}".format(settlement.capitalize(), name.capitalize())
return f"{settlement.capitalize()} {name.capitalize()}"

if adjective in {"Покинуть", "Разрушить"}:
return
Expand All @@ -802,7 +802,7 @@ def corr_settlement(_, search_result):
if adjective_2 is None:
adjective_2 = adjective

return "{} {} {}".format(adjective_2.capitalize(), settlement, name.capitalize())
return f"{adjective_2.capitalize()} {settlement} {name.capitalize()}"


# Clothier's shop
Expand Down Expand Up @@ -845,7 +845,7 @@ def corr_clothiers_shop(_, search_result):
preposition = "на"
material = inflect_noun(material, case="loct", orig_form={"nomn"})

return "{} {} {} {}".format(verb, product, preposition, material)
return f"{verb} {product} {preposition} {material}"
else:
if product in {"щит", "баклер"}:
_, of_material = cloth_subst[material] # Don't change the verb, leave 'Делать'/'Изготовить'
Expand All @@ -860,9 +860,9 @@ def corr_clothiers_shop(_, search_result):
if material in make_adjective: # "шёлк" -> "шёлковый"
gender = get_gender(product, {"nomn"})
material_adj = inflect_adjective(make_adjective[material], gender, "accs", animated=False)
return "{} {} {}".format(verb, material_adj, product_accus) # {Шить} {шёлковую} {робу}
return f"{verb} {material_adj} {product_accus}" # {Шить} {шёлковую} {робу}
else:
return "{} {} {}".format(verb, product_accus, of_material) # {Шить} {робу} {из ткани}
return f"{verb} {product_accus} {of_material}" # {Шить} {робу} {из ткани}


@final_changes.register(regex=r"(Делать|Изготовить|Украшать)([\w\s/]+)$")
Expand Down Expand Up @@ -903,7 +903,7 @@ def corr_craft_general(text, search_result):
if len(words) == 1 and words[0] not in make_adjective and not is_adjective(words[0]):
material = inflect_noun(words[0], "gent", orig_form={"nomn", "inan"}) # рог -> (из) рога
assert material is not None
result = "{} {} из {}".format(verb, product, material)
result = f"{verb} {product} из {material}"
else:
adjectives = [
make_adjective[word] if word in make_adjective else word if is_adjective(word) else None
Expand All @@ -913,7 +913,7 @@ def corr_craft_general(text, search_result):
adjectives = [inflect_adjective(adj, product_gender, "accs", animated=False) for adj in adjectives]
result = "{} {} {}".format(verb, " ".join(adjectives), product)
else:
result = "{} {}".format(verb, product)
result = f"{verb} {product}"

return text.replace(search_result.group(0), result).capitalize()

Expand Down Expand Up @@ -962,7 +962,7 @@ def corr_animal_material(_, search_result):
def corr_rings(text, search_result):
obj = search_result.group(2)
description = search_result.group(1)
return text.replace(search_result.group(0), "{} из {}".format(obj, to_genitive_case(description)))
return text.replace(search_result.group(0), f"{obj} из {to_genitive_case(description)}")


@final_changes.register(predicate=lambda text: text.startswith("Вы нашли из "))
Expand All @@ -983,7 +983,7 @@ def corr_you_struck(text, _):
else:
result = inflect_collocation(material, {"accs"})

return "{} {}!".format(you_struck, result)
return f"{you_struck} {result}!"


@final_changes.register(regex=r"(.+)\s(стал)\s(.+)\.")
Expand All @@ -1005,6 +1005,6 @@ def corr_become(_, search_result):
words = search_result.group(3)
words = inflect_collocation(words, {"ablt"})
if subj.startswith("Животное"):
return "Животное выросло и стало {}.".format(words)
else:
return "{} {} {}.".format(subj, verb, words)
return f"Животное выросло и стало {words}."

return f"{subj} {verb} {words}."
4 changes: 2 additions & 2 deletions changetext/logging_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def write(self, text):
self.logged.add(text)


@functools.lru_cache()
@functools.lru_cache
def get_logger(stream=None):
return LoggerWrapper(stream)

Expand All @@ -36,7 +36,7 @@ def wrapper(text):
try:
return func(text)
except Exception:
get_logger(stream).logger.exception("An exception occurred. Initial string: {!r}".format(text))
get_logger(stream).logger.exception(f"An exception occurred. Initial string: {text!r}")

return wrapper

Expand Down
8 changes: 4 additions & 4 deletions changetext/preliminary_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def corr_ending_s(text):
number = int(number)
parse = [x for x in custom_parse(group2) if {"NOUN", "nomn", "sing"} in x.tag]
assert len(parse) == 1
replacement_string = "{:d} {}".format(number, parse[0].make_agree_with_number(number).word)
replacement_string = f"{number:d} {parse[0].make_agree_with_number(number).word}"
elif group2 in dict_ending_s:
replacement_string = dict_ending_s[group2]
elif " " not in group2:
Expand Down Expand Up @@ -236,8 +236,8 @@ def corr_animal_gender(text, search_result):
animal = search_result.group(1)
if animal not in animal_genders:
return None
else:
return text.replace(search_result.group(0), animal_genders[animal][gender] + ", " + search_result.group(2))

return text.replace(search_result.group(0), animal_genders[animal][gender] + ", " + search_result.group(2))


@preliminary_changes.register(regex=re.compile(r"(он|она|вы)\s+(не\s+)?(имеете?)", flags=re.IGNORECASE))
Expand Down Expand Up @@ -320,7 +320,7 @@ def corr_tags_outer(text, _):
try:
result = corr_tags(text)
except (AssertionError, ValueError) as err:
print("corr_tags() raises exception {!r}:".format(err))
print(f"corr_tags() raises exception {err!r}:")
print(traceback.format_exc())
result = " ".join(
part.strip(" ") if not part.startswith("<") else part.strip("<>").partition(":")[2]
Expand Down
2 changes: 1 addition & 1 deletion changetext/tag_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def corr_tags(text, state=None):

if "get-form" in tags:
if get_index is not None:
raise ValueError("Duplicate <get-form> tag in {!r}".format(text))
raise ValueError(f"Duplicate <get-form> tag in {text!r}")
get_index = len(text_parts)
tags.remove("get-form")
elif "set-form" in tags:
Expand Down
4 changes: 2 additions & 2 deletions changetext/utf16_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def wrapper(data):
data = data.decode("utf-16-le")
output = func(data)
return output if output is None else output.encode("utf-16-le") + b"\0\0"
else:
return func(data)

return func(data)

return wrapper
24 changes: 12 additions & 12 deletions changetext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
def custom_parse(word: str) -> List[Parse]:
if word.lower().startswith("адамантин"):
return morph.parse(word) # Pymorphy2 thinks that adamantine is a surname and treats it properly
else:
return [p for p in morph.parse(word) if all(tag not in p.tag for tag in unwanted_tags)]

return [p for p in morph.parse(word) if all(tag not in p.tag for tag in unwanted_tags)]


def tag_to_set(tag: OpencorporaTag) -> Set[str]:
return set(sum((ss.split() for ss in str(tag).split(",")), list()))
return set(sum((ss.split() for ss in str(tag).split(",")), []))


def common_tags(parse: List[Parse]) -> Set[str]:
Expand Down Expand Up @@ -65,7 +65,7 @@ def inflect_collocation(text: str, tags: Set[str]) -> str:
for i, word in enumerate(words[:j]):
parse = custom_parse(word)
if not is_adjective(word, parse):
raise ValueError("{} is not an adjective".format(word))
raise ValueError(f"{word} is not an adjective")
p = next(p for p in parse if {"ADJF"} in p.tag)
p1 = p.inflect(tags)
assert p1 is not None, (p, tags)
Expand All @@ -90,8 +90,8 @@ def split_sentence(text: str) -> Optional[Tuple[str, str]]:
sentence = re_sentence.search(text)
if sentence:
return cast(Tuple[str, str], sentence.groups())
else:
return text, ""

return text, ""


def is_enumeration_delimiter(text: str) -> bool:
Expand Down Expand Up @@ -410,7 +410,7 @@ def get_gender(obj: str, known_tags: Union[None, str, Set[str]] = None) -> Optio
if obj.lower() in gender_exceptions:
return gender_exceptions[obj.lower()]
else:
if len(parse) > 0:
if parse:
gender = pm_gender(parse[0])
for p in parse:
if pm_gender(p) != gender:
Expand All @@ -433,7 +433,7 @@ def get_main_word_gender(text: str) -> Optional[str]:

def parse_as_adjective(adjective: str) -> List[Parse]:
parse = [p for p in custom_parse(adjective) if "ADJF" in p.tag or "PRTF" in p.tag]
assert len(parse) > 0, "parse: {!r}".format(parse)
assert parse, f"parse: {parse!r}"
return parse


Expand Down Expand Up @@ -466,7 +466,7 @@ def inflect_noun(word: str, case: str, orig_form: Union[None, str, Set[str]] = N
if orig_form:
parse = [p for p in parse if orig_form in p.tag]

assert len(parse) > 0
assert parse

p = parse[0]
new_form = p.inflect({case, p.tag.number})
Expand All @@ -477,8 +477,8 @@ def inflect_noun(word: str, case: str, orig_form: Union[None, str, Set[str]] = N
def to_genitive_case_single_noun(word: str) -> str:
if word.lower() in gent_case_except:
return gent_case_except[word.lower()]
else:
return inflect_noun(word, case="gent")

return inflect_noun(word, case="gent")


def to_genitive_case_list(words: List[str]) -> Iterator[str]:
Expand Down Expand Up @@ -516,7 +516,7 @@ def inflect_as_adjective(adj: str, gender: str) -> str:
elif is_adjective(adj):
new_adj = inflect_adjective(adj, gender)
else:
raise ValueError("Cannot inflect {} as adjective".format(adj))
raise ValueError(f"Cannot inflect {adj} as adjective")

return new_adj

Expand Down
Loading