In [17]:
import re

In [4]:
sample_titles = [
    #
    # (channel_name, video_title)
    #
    
    ('Juice WRLD', 'Juice WRLD & The Weeknd - Smile (Official Video)'),
    ('Drake', 'Laugh Now Cry Later'),
    ('Stone Music Entertainment', "ATEEZ (에이티즈) - 'INCEPTION' MV"),
    ('MrSuicideSheep', 'Stonefox - The Garden')
]

#### Objective

Providing channel name & video title to this class should produce search query(ies) that our `SpotifySearch` uses. Maximise result accuracy.


Accurate result is defined as a perfect match for the song on Spotify **or** no match if the exact song is not on Spotify.

In [13]:
def split_title(song_tuple):
    """
    Split by hyphen
    """
    return [i.strip().lower() for i in song_tuple[1].split('-')]

In [14]:
def simple_split(song_tuple):
    """
    Simple split - assume artist name and song name in video title and split by hyphen
    """
    return ' '.join(split_title(song_tuple))

In [26]:
def simple_only_words_split(song_tuple):
    """
    Simple split with no special characters (brackets, colons, etc.)
    """
    regex = r'[@_!#$%^&*()<>?/\|}{~:\'\"]'
    res = []
    for word in split_title(song_tuple):
        word = re.sub(regex, '', word)
        res.append(word.strip())
    return ' '.join(res)

----

In [36]:
STOPWORDS = [
    'official',
    'video',
    'mv',
    'ft.',
    'exclusive',
]

In [37]:
def stopword_removal(title_string, stopwords=STOPWORDS):
    """
    Removes stopwords from a string
    """
    return ' '.join(list(filter(lambda x: x not in stopwords, title_string.split())))

----

In [47]:
def add_channel_title(song_tuple, base_generator=simple_only_words_split):
    """
    Adds channel title to the song title and returns. Useful for cases where the artist posts a song 
    without their name in the title
    """
    return ' '.join([song_tuple[0].lower(), base_generator(song_tuple)])

----

In [36]:
STOPWORDS = [
    'official',
    'video',
    'mv',
    'ft.',
    'exclusive',
]

In [37]:
def stopword_removal(title_string, stopwords=STOPWORDS):
    """
    Removes stopwords from a string
    """
    return ' '.join(list(filter(lambda x: x not in stopwords, title_string.split())))

----

In [50]:
def generate_queries(song_tuple):
    """
    Genereates queries for a song tuple of (artist, video title)
    """
    queries = []

    generators = [
        add_channel_title,
        simple_only_words_split,
        simple_split,
    ]
    modifiers = [
        stopword_removal,
    ]

    for generator in generators:
        for modifier in modifiers:
            queries.append(
                modifier(
                    generator(song_tuple)
                )
            )

    return queries

In [51]:
res = map(generate_queries, sample_titles)
list(res)

[['juice wrld juice wrld the weeknd smile',
  'juice wrld the weeknd smile',
  'juice wrld & the weeknd smile (official video)'],
 ['drake laugh now cry later', 'laugh now cry later', 'laugh now cry later'],
 ['stone music entertainment ateez 에이티즈 inception',
  'ateez 에이티즈 inception',
  "ateez (에이티즈) 'inception'"],
 ['mrsuicidesheep stonefox the garden',
  'stonefox the garden',
  'stonefox the garden']]