In [17]:
import re

In [4]:
sample_titles = [
    #
    # (channel_name, video_title)
    #
    
    ('Juice WRLD', 'Juice WRLD & The Weeknd - Smile (Official Video)'),
    ('Drake', 'Laugh Now Cry Later'),
    ('Stone Music Entertainment', "ATEEZ (에이티즈) - 'INCEPTION' MV"),
    ('MrSuicideSheep', 'Stonefox - The Garden')
]

#### Objective

Providing channel name & video title to this class should produce search query(ies) that our `SpotifySearch` uses. Maximise result accuracy.


Accurate result is defined as a perfect match for the song on Spotify **or** no match if the exact song is not on Spotify.

In [13]:
def split_title(song_tuple):
    """
    Split by hyphen
    """
    return [i.strip().lower() for i in song_tuple[1].split('-')]

In [14]:
def simple_split(song_tuple):
    """
    Simple split - assume artist name and song name in video title and split by hyphen
    """
    return ' '.join(split_title(song_tuple))

In [22]:
def simple_only_words_split(song_tuple):
    """
    Simple split with no special characters (brackets, colons, etc.)
    """
    regex = r'[@_!#$%^&*()<>?/\|}{~:]'
    res = []
    for word in split_title(song_tuple):
        word = re.sub(regex, '', word)
        res.append(word.strip())
    return ' '.join(res)

In [23]:
def generate_queries(song_tuple):
    """
    Genereates queries for a song tuple of (artist, video title)
    """
    queries = []
    
    generators = [
        simple_split,
        simple_only_words_split,
    ] 
    for generator in generators:
        queries.append(generator(song_tuple))
    
    return queries

In [24]:
res = map(generate_queries, sample_titles)
list(res)

[['juice wrld & the weeknd smile (official video)',
  'juice wrld  the weeknd smile official video'],
 ['laugh now cry later', 'laugh now cry later'],
 ["ateez (에이티즈) 'inception' mv", "ateez 에이티즈 'inception' mv"],
 ['stonefox the garden', 'stonefox the garden']]