Skip to content

Commit

Permalink
Handle more exceptions in title tag normalization code - 62
Browse files Browse the repository at this point in the history
  • Loading branch information
desbma committed Apr 9, 2018
1 parent dd8d3de commit e39345e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 5 deletions.
5 changes: 3 additions & 2 deletions amg/sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ def normalize_tag_case(s):
new_words = []
prev_word = None
roman_letters = frozenset("IVXLCDM")
punct_followed_all_uppercase = set(".-")
punct_followed_uppercase = set(string.punctuation)
punct_followed_uppercase.remove("'")
for i, old_word in enumerate(old_words):
if (((prev_word is not None) and
((prev_word[-1] in punct_followed_uppercase) and old_word[0].isupper())) or
((prev_word[-1] in punct_followed_all_uppercase) and old_word[0].isupper())) or
("." in old_word)):
new_word = old_word
elif old_word[0] in "(-":
Expand All @@ -40,7 +41,7 @@ def normalize_tag_case(s):
new_word = "'".join((old_word[0].lower(), old_word[2:].capitalize()))
else:
new_word = old_word
elif (i != 0) and (old_word.lower() in TAG_LOWERCASE_WORDS):
elif (i != 0) and (old_word.lower() in TAG_LOWERCASE_WORDS) and (prev_word[-1] not in punct_followed_uppercase):
new_word = old_word.lower()
elif all(map(roman_letters.__contains__,
old_word.strip(string.punctuation))):
Expand Down
6 changes: 5 additions & 1 deletion amg/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def __init__(self, artist, album):
expressions.extend(("pre-orders available", "preorders available", "hd",
"official", "pre-listening", "prelistening", "trollzorn",
"uncensored", "s/t", "sw exclusive",
"transcending obscurity india")) # crappy label suffixes TODO how to handle that?
"transcending obscurity india", # crappy label suffixes TODO how to handle that?
"trailer for the upcoming album"))
year = datetime.datetime.today().year
for y in range(year - 5, year + 1):
expressions.append(str(y))
Expand Down Expand Up @@ -123,6 +124,9 @@ def __init__(self, artist, album):
# normalize case
self.registerCleaner(FunctionCleaner(sanitize.normalize_tag_case, execute_once=True))

# post normalize case fix
self.registerCleaner(FunctionCleaner(lambda x: x.replace("PT.", "pt."), execute_once=True))

def registerCleaner(self, cleaner, args=()):
assert(isinstance(cleaner, TitleCleanerBase))
self.cleaners.append((cleaner, args))
Expand Down
4 changes: 3 additions & 1 deletion tests/test_sanitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_normalize_tag_case(self):
"VI VI VI": "VI VI VI",
"Episode VI: name": "Episode VI: Name",
"Matsya - The Fish": "Matsya - The Fish",
"I'M ALIVE!": "I'm Alive!"}
"I'M ALIVE!": "I'm Alive!",
"MARK OF THE BEAST PT. 2: SCION OF DARKNESS": "Mark of the Beast PT. 2: Scion of Darkness",
"BZZ: THE": "Bzz: The"}
for before, after in references.items():
self.assertEqual(sanitize.normalize_tag_case(before), after)
10 changes: 9 additions & 1 deletion tests/test_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,15 @@ def test_normalize_title_tag(self):
("Altars of Grief - Desolation [From album; Iris; 2018]",
"Altars of Grief",
"Iris",
"Desolation"))
"Desolation"),
("MONOTHEIST - MARK OF THE BEAST PT. 2: SCION OF DARKNESS",
"Monotheist",
"Scourge",
"Mark of the Beast pt. 2: Scion of Darkness"),
("SHADOWKEEP - Trailer for the upcoming album \"ShadowKeep\" (PURE STEEL RECORDS)",
"Shadowkeep",
"Shadowkeep",
"Shadowkeep"))

for source, artist, album, expected_result in references:
with self.subTest(source=source, expected_result=expected_result, artist=artist, album=album):
Expand Down

0 comments on commit e39345e

Please sign in to comment.