Skip to content

Commit

Permalink
Handle more exceptions in title tag normalization code - 90
Browse files Browse the repository at this point in the history
  • Loading branch information
desbma committed Jan 13, 2021
1 parent 18cc2cf commit cdd433e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
27 changes: 14 additions & 13 deletions amg/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ def __init__(self, artist: str, album: str):
self.registerCleaner(RegexSuffixCleaner(r" \| .*$", suffixes=" | ", execute_once=True))

# build list of common useless expressions
expressions = []
words1 = ("", "explicit", "full", "new", "official", "stop motion", "the new")
expressions = set()
words1 = ("", "explicit", "full", "including", "new", "official", "stop motion", "the new")
words2 = (
"",
"360",
Expand Down Expand Up @@ -167,6 +167,7 @@ def __init__(self, artist: str, album: str):
"hq",
"in 4k",
"lyric",
"lyrics",
"only",
"premier",
"premiere",
Expand All @@ -191,10 +192,10 @@ def __init__(self, artist: str, album: str):
if w1 or w2:
for rsep in (" ", "-", ""):
rpart = rsep.join((w2, w3)).strip()
expressions.append(" ".join((w1, rpart)).strip())
expressions.add(" ".join((w1, rpart)).strip())
else:
expressions.append(w3)
expressions.extend(
expressions.add(w3)
expressions.update(
(
"full ep",
"full-length",
Expand All @@ -216,11 +217,11 @@ def __init__(self, artist: str, album: str):
)
year = datetime.datetime.today().year
for y in range(year - 5, year + 1):
expressions.append(str(y))
expressions.add(str(y))
for month_name, month_abbr in zip(MONTH_NAMES, MONTH_NAMES_ABBR):
expressions.append(f"{month_name} {y}")
expressions.append(f"{month_abbr} {y}")
expressions.sort(key=len, reverse=True)
expressions.add(f"{month_name} {y}")
expressions.add(f"{month_abbr} {y}")
expressions = list(sorted(expressions, key=len, reverse=True))
expressions.remove("song")
suffix_cleaner = SimpleSuffixCleaner()
for expression in expressions:
Expand Down Expand Up @@ -273,7 +274,7 @@ def cleanup(self, title: str) -> str:
else:
new_title = cleaner.cleanup(cur_title, *args)
if new_title and (new_title != cur_title):
logging.getLogger().debug(
print(
f"{cleaner.__class__.__name__} changed title tag: "
f"{repr(cur_title)} -> {repr(new_title)}"
)
Expand Down Expand Up @@ -340,18 +341,18 @@ def rclean(self, s: str) -> str:
def lclean(self, s: str) -> str:
""" Remove garbage at left of string. """
r = s.lstrip(__class__.LCLEAN_CHARS)
c = unidecode.unidecode_expect_ascii(r).lstrip(__class__.LCLEAN_CHARS)
c = unidecode.unidecode_expect_ascii(r.lstrip(__class__.LCLEAN_CHARS)).lstrip(__class__.LCLEAN_CHARS)
if c != r:
r = c
return r

@functools.lru_cache(maxsize=32768)
def rnorm(self, s: str) -> str:
return unidecode.unidecode_expect_ascii(s).rstrip(string.punctuation).lower()
return unidecode.unidecode_expect_ascii(s.rstrip(string.punctuation)).rstrip(string.punctuation).lower()

@functools.lru_cache(maxsize=32768)
def lnorm(self, s: str) -> str:
return unidecode.unidecode_expect_ascii(s).lstrip(string.punctuation).lower()
return unidecode.unidecode_expect_ascii(s.lstrip(string.punctuation)).lstrip(string.punctuation).lower()

def startslike(self, s: str, l: str, *, sep: Optional[str] = None) -> bool:
""" Return True if start of string s is similar to l. """
Expand Down
6 changes: 6 additions & 0 deletions tests/normalize_title_tag.json
Original file line number Diff line number Diff line change
Expand Up @@ -784,5 +784,11 @@
"artist": "AthanaTheos",
"album": "Prophetic Era (Or How Yahveh Became the One)",
"result": "(R)evolution, Revelation"
},
{
"source": "NICARUS - Are You Afraid To Die Alone **including lyrics**",
"artist": "Nicarus",
"album": "Coal People Coal Puppets",
"result": "Are You Afraid to Die Alone"
}
]

0 comments on commit cdd433e

Please sign in to comment.