# "Discover your next favorite" Interface

Authors: Liang-Yun Cheng and Federico Cimini

### Before runnning!

- Make sure Cython is installed (required dependency for py_stringsimjoin library)
- Place “tracks_feature_df.csv” and “logo.png” in the same folder as “Interface.ipynb” 
- Re-running Spotify Extraction might take a while, we recommend only running the “Interface.ipynb” file.
- Install the following packages:

In [None]:
!pip install sv-ttk
!pip install pillow
!pip3 install py_stringsimjoin

### Import Dataframe

In [1]:
import pandas as pd

# Import dataframe with songs (obtained from data Spotify_DataExtraction)
df = pd.read_csv("tracks_feature_df.csv")

### Interface

Run cell and the interface will run. 

If window is closed, you'll need to Restart Kernell

Enjoy!

In [None]:
import py_stringsimjoin as ssj
import py_stringmatching as sm
from PIL import ImageTk, Image
import tkinter as tk
from tkinter import ttk
from tkinter.messagebox import showinfo
from tkinter.messagebox import showerror
import sv_ttk

def features_confirmed(genre_clicked, result_genre_label,
                       period_selected, result_period_label,
                       dance_selected_min, dance_selected_max, result_dance_label,
                       energy_selected_min, energy_selected_max, result_energy_label,
                       vocal_clicked, result_vocal_label,
                       mode_clicked, result_mode_label
                      ):
    """ Function that runs when the "Confirm my tastes" button is pressed.
        It takes as input the values selected for all features from bliders and menus, and the labels to display.
        It gathers information for the following features: genre, period, danceability, energy, vocalness, mode.
        It doesn't return anything but displays in each corrisponding label the selected variable.
    """
    try:
        # confirm genre
        global genre
        genre = genre_clicked.get()
        result_genre = f' Your selected genre is: {genre}'
        result_genre_label.config(text = result_genre)
        
        # confirm period
        global period
        period = period_selected.get()
        result_period = f' Your selected time period is: {period}'
        result_period_label.config(text = result_period)
       
        # confimr danceability
        global dance_min
        global dance_max
        dance_min = round(dance_selected_min.get(), 2)
        dance_max = round(dance_selected_max.get(), 2)
        if dance_min > dance_max:
            result_dance = 'Invalid input, min value must be less than max value.'
        else:
            result_dance = f' Your selected danceability is between {dance_min} and {dance_max}'
        result_dance_label.config(text = result_dance)
        
        ## confirm energy
        global energy_min
        global energy_max
        energy_min = round(energy_selected_min.get(), 2)
        energy_max = round(energy_selected_max.get(), 2)
        if energy_min > energy_max:
            result_energy = 'Invalid input, min value must be less than max value.'
        else:
            result_energy = f' Your selected energy is between {energy_min} and {energy_max}'
        result_energy_label.configure(text = result_energy)
        
        ## confirm vocal
        global vocal
        vocal = vocal_clicked.get()
        if (vocal == "Songs with vocals"):
            result_vocal = 'You selected songs with vocals.'
        elif (vocal == "Songs without vocals"):
            result_vocal = 'You selected songs without vocals.'
        else:
            result_vocal = 'You selected songs with any amount of vocals.'
        result_vocal_label.config(text = result_vocal)
        
        ## confirm mode
        global mode
        mode = mode_clicked.get()
        if mode == "Major":
            mode = 1
            result_mode = 'You selected songs in major scale.'
        elif mode == "Minor":
            mode = 0
            result_mode = 'You selected songs in minor scale.'
        else:
            result_mode = 'You selected songs in any scale.'
        result_mode_label.config(text = result_mode)
        
    except ValueError as error:
        showerror(title='Error', message=error)


def button_song_clicked(song_box, result_song_label):
    """ Function that runs when the submit song button is pressed.
        It takes as parameters the value inside the song_box and the label for displaying results.
        If song is not found, runs SongTitleAutoCorrect and displays those results.
        Doesn't return anything.
    """
    try:
        global song
        song = song_box.get().lower()
        # look for the track name in the database
        target_song = df[df["track_name"].str.lower() == song]
        
        # if it finds at least one record, display result
        if len(target_song) >= 1:
            result = f' Your favorite song is: {song}'
            result_song_label.config(text=result)
        # if not, run SongTitleAutoCorrect function and display those results
        else:
            result_title_text = f' Ooops...Couldn\'t find [{song}] in our database, Are you looking for ... \n\n'
            title_similar_df = SongTitleAutoCorrect(song, df)
            for i in range(len(title_similar_df)):
                title_similar_text = f' {title_similar_df["Track Name"].iloc[i]} \n'
                result_title_text += title_similar_text
            result_song_label.config(text = result_title_text)
            
    except ValueError as error:
        showerror(title='Error', message=error)


def SongTitleAutoCorrect(song, tracks_feature_df):
    ''' take in the song given by user, look for similar titles in our database
        Return top 5 potential matching songs based on the name
    '''
    # create tokenizer, with 3-grams
    tok = sm.QgramTokenizer(qval = 3,return_set = True)
    song_ls = []
    song_ls.append(song.lower())
    song_df = pd.DataFrame(song_ls, columns = ['Target'])
    song_df
    tracks_feature_df['track_name_lower'] = tracks_feature_df['track_name'].str.lower()

    ## join target song with all songs in df
    output_pairs = ssj.jaccard_join(ltable = tracks_feature_df, 
                                    rtable = song_df, 
                                    l_key_attr = 'track_id', 
                                    r_key_attr = 'Target', 
                                    l_join_attr = 'track_name_lower', 
                                    r_join_attr = 'Target',
                                    tokenizer = tok, 
                                    threshold = 0.2,
                                    l_out_attrs=['track_name','artist_name'], 
                                    r_out_attrs=['Target'])

    # order result by similarity score descending 
    output_pairs.sort_values(by = ['_sim_score'], ascending = False, inplace = True)
    result = output_pairs[['l_track_name','l_artist_name','_sim_score']].drop_duplicates()
    result = result.rename(columns = {'l_track_name': 'Track Name', 'l_artist_name': 'Artist Name', '_sim_score': 'Similarity Score'})
    result['Similarity Score'] = (result['Similarity Score']  * 100).round()
    return result.head()

def submit_button_clicked():
    """ Function that runs when the Submit button is clicked.
        Filters the database with the selected parameters and displays on the result label the top 10 songs 
    """
    try:
        global result_df
        if song == "":
            cluster_filter = -1 # if song doesn't exist, return a cluster number less than 0
        else:
            cluster_filter = df[df["track_name"].str.lower() == song]["cluster"].max()
            
        # filter main df
        
        # filter songs from the same cluster
        if cluster_filter >= 0:
            result_df = df[df["cluster"] == cluster_filter]
        else: # if song doesn't exist, return the entire df
            result_df = df
            
        # filter songs based on [genres] selection
        if ((genre != "Any") and (genre != "None")): 
        #if (genre != "Any"): 
            result_df = result_df[result_df["genres"].str.contains(genre.lower()) == True]
            # if any, return entire result_df
            
        # filter songs based on [decades] selection    
        if ((period != "Any") and (period != "None")): # if any, return entire result_df
        #if (period != "Any"):
            result_df = result_df[result_df["release_decade"] == int(period[:-1])]
            # if any, return entire result_df
        
        # filter songs based on [danceability] selection
        if (dance_min <= dance_max): # if min is less than max, valid inputs
            result_df = result_df[(result_df["danceability"] >= float(dance_min) / 100) & (result_df["danceability"] <= float(dance_max) / 100)]
            # invalid filters return entire result df
    
        # filter songs based on [energy] selection    
        if (energy_min <= energy_max):
            result_df = result_df[(result_df["energy"] >= float(energy_min) / 100) & (result_df["energy"] <= float(energy_max) / 100)]
            # invalid filters return entire result df
        
        # filter songs based on [instrumentalness/vocal] selection    
        if (vocal == "Songs with vocals"):
            result_df = result_df[(result_df["instrumentalness"] <= 0.8)]
        elif (vocal == "Songs without vocals"):
            result_df = result_df[(result_df["instrumentalness"] > 0.8)]
        # else returns entire result df

        
        # filter songs based on [mode (major/minor)] selection    
        if ((mode == 1) or (mode == 0)):
            result_df = result_df[(result_df["mode"] == int(mode))]
        # else returns entire result df
        
        # final result sort by track popularity 
        result_df.sort_values("track_popularity", ascending=False, inplace = True)
        
        total_number_songs = len(result_df)
      
        result_songs_text = ""
        result_songs_text = f'There are {total_number_songs} recommended songs. Here is Top 10 ordered by popularity:\n\n'
        
        # return top 10 songs
        result_df = result_df.head(10)    
        for i in range(len(result_df)):
            result_song_text = f'{i+1} "{result_df["track_name"].iloc[i]}" by {result_df["artist_name"].iloc[i]} --> {result_df["uri"].iloc[i]} \n'
            result_songs_text += result_song_text
        
        final_result_label.config(text = result_songs_text)
        
    except ValueError as error:
        
        showerror(title='Error', message=error)
        
def reset_variables():
    """ Function that runs when the Reset button is pressed.
        Resets all variables and display labels.
    """
    global song
    global genre
    global period
    global dance_min
    global dance_max
    global energy_min
    global energy_max
    global result_df
    global vocal
    global mode
    song=""
    genre=""
    period = ""
    dance_min = ""
    dance_max = ""
    energy_min = ""
    energy_max = ""
    result_df = ""
    vocal = ""
    mode = ""
    result_song_label.config(text = "")
    result_genre_label.config(text = "")
    result_period_label.config(text = "")
    result_dance_label.config(text = "")
    result_energy_label.config(text = "")
    result_vocal_label.config(text = "")
    result_mode_label.config(text = "")
    final_result_label.config(text = "")
    

    
    
######## Interface initial configuration ########

root = tk.Tk()
root.title('Spotify Recommender')
sv_ttk.set_theme("dark")

# size of the window
window_width = 1200
window_height = 600

# get the screen dimension
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()

# find the center point
center_x = int(screen_width/2 - window_width / 2)
center_y = int(screen_height/2 - window_height / 2)

# set the position of the window to the center of the screen
root.geometry(f'{window_width}x{window_height}+{center_x}+{center_y}')

# deifne the frame
frm = ttk.Frame(root, padding=10)
frm.grid()

## Logo ##
image_logo = Image.open('logo.png')
image_for_label = ImageTk.PhotoImage(image_logo, master=root)
image_label = ttk.Label(frm, image = image_for_label)
image_label.grid(column=3, row= 10, columnspan = 2, rowspan = 5, sticky = tk.NW)




######## Widgets and Variables ########


##### Song ######

# label with instructions
ttk.Label(frm, text='Enter a song you like:', font=("Helvetica", 14)).grid(column=0, row=0, sticky = tk.W, pady = 2)

# configure variable and text box
fav_song = tk.StringVar()
fav_song.set("")
song_box = ttk.Entry(frm, textvariable=fav_song)
song_box.grid(column=1, row=0, sticky = tk.W, pady = 2)

# configure submit button
ttk.Button(frm, text="Submit", command=lambda: button_song_clicked(song_box, result_song_label))\
    .grid(column=3, row=0, sticky = tk.W, pady = 2)

# result label
result_song_label = ttk.Label(frm, font=("Helvetica", 14))
result_song_label.grid(row=1, column=1, columnspan=3, sticky = tk.W, pady = 2)


##### Genre ######

# list of genres to display in menu
genres = ["None", "Any", "Pop", "Rock", "Classic", "Mellow", "Rap", "Soul", "Hip hop", "Disco", "Jazz", "Blues", "Folk", "Romantic", "New Wave", "Dance"]

# label with instructions
ttk.Label(frm, text="What genre do you want?", font=("Helvetica", 14)).grid(column=0, row=2, sticky = tk.W, pady = 2)

# configure variable and drop down menu
genre_clicked = tk.StringVar()
genre_clicked.set("Any")
genre_drop = ttk.OptionMenu(frm, genre_clicked, *genres)
genre_drop.grid(column=1, row=2,  pady = 2) # center, do not need sticky

# result label
result_genre_label = ttk.Label(frm, font=("Helvetica", 14))
result_genre_label.grid(column=3, row=2, columnspan=3, sticky = tk.W, pady = 2)


##### Period ######

# list of periods to display in menu
periods = ["None","Any","1960s", "1970s", "1980s", "1990s", "2000s", "2010s", "2020s"]

# label with instructions
ttk.Label(frm, text="What period do you want?", font=("Helvetica", 14)).grid(column=0, row=3, sticky = tk.W, pady = 2)

# configure variable and drop down menu
period_selected = tk.StringVar()
period_selected.set("Any")
period_drop = ttk.OptionMenu(frm, period_selected, *periods)
period_drop.grid(column=1, row=3, pady = 2) #center middle, do not need sticky

# result label
result_period_label = ttk.Label(frm, font=("Helvetica", 14))
result_period_label.grid(column=3, row=3, columnspan=3, sticky = tk.W, pady = 2)


##### Danceability ######

# get current value of sliders
dance_selected_min = tk.DoubleVar()
dance_selected_max = tk.DoubleVar()

# slider label
dance_slider_label  = ttk.Label(frm, text="How much do you want to dance? ", font=("Helvetica", 14)) \
                       .grid(column=0, row=4, sticky = tk.W, pady = 2)
dance_slider_label  = ttk.Label(frm, text="Min Danceability: ", font=("Helvetica", 12)) \
                       .grid(column=1, row=4, columnspan = 2, sticky = tk.W, pady = 2)
dance_slider_label  = ttk.Label(frm, text="Max Danceability: ", font=("Helvetica", 12)) \
                       .grid(column=1, row=5, columnspan = 2, sticky = tk.W, pady = 2)

# constructor to create sliders ttk.Scale and button
dance_slider_min = ttk.Scale(
    frm,
    from_ = 0,
    to = 100,
    orient = 'horizontal',
    variable = dance_selected_min
).grid(column = 1, row = 4, sticky = tk.E, pady = 2)
dance_slider_max = ttk.Scale(
    frm,
    from_ = 0,
    to = 100,
    orient = 'horizontal',
    variable = dance_selected_max
)
dance_slider_max.set(100)
dance_slider_max.grid(column = 1, row = 5, sticky = tk.E, pady = 2)

# confirm selected dance
result_dance_label = ttk.Label(frm,
                                font=("Helvetica", 14))
result_dance_label.grid(column=3, row=5, columnspan=3, sticky = tk.W, pady = 2)


##### Energy ######

# get current value of sliders
energy_selected_min = tk.DoubleVar()
energy_selected_max = tk.DoubleVar()

# sliders labels
energy_slider_label  = ttk.Label(frm, text="What's your energy level? ", font=("Helvetica", 14)) \
                       .grid(column=0, row=6, sticky = tk.W, pady = 2)
energy_slider_label  = ttk.Label(frm, text="Min Energy: ", font=("Helvetica", 12)) \
                       .grid(column=1, row=6, columnspan = 2, sticky = tk.W, pady = 2)
energy_slider_label  = ttk.Label(frm, text="Max Energy: ", font=("Helvetica", 12)) \
                       .grid(column=1, row=7, columnspan = 2, sticky = tk.W, pady = 2)

# constructor to create slider ttk.Scale
energy_slider_min = ttk.Scale(
    frm,
    from_ = 0,
    to = 100,
    orient = 'horizontal',
    variable = energy_selected_min
).grid(column = 1, row = 6, sticky = tk.E, pady = 2)

energy_slider_max = ttk.Scale(
    frm,
    from_ = 0,
    to = 100,
    orient = 'horizontal',
    variable = energy_selected_max
)
energy_slider_max.set(100)
energy_slider_max.grid(column = 1, row = 7, sticky = tk.E, pady = 2)

# confirm selected energy
result_energy_label = ttk.Label(frm,
                                font=("Helvetica", 14))
result_energy_label.grid(column=3, row=7, columnspan=3, sticky = tk.W, pady = 2)


##### Vocals ######

# list of options to display in menu
vocals = ["None", "Any", "Songs with vocals", "Songs without vocals"]

# define initial variables
vocal_clicked = tk.StringVar()
vocal_clicked.set("Any")

# label with instructions
ttk.Label(frm, text="Do you want songs with vocals?", font=("Helvetica", 14)).grid(column=0, row=8, sticky = tk.W, pady = 2)

# drop down menu
vocal_drop = ttk.OptionMenu(frm, vocal_clicked, *vocals)
vocal_drop.grid(column=1, row=8, pady = 2) #center, no sticky

# result label
result_vocal_label = ttk.Label(frm, font=("Helvetica", 14))
result_vocal_label.grid(column=3, row=8, columnspan=3, sticky = tk.W, pady = 2)


##### Mode ######

# list of options to display in menu
modes = ["None", "Any", "Major", "Minor"]

# define initial variables
mode_clicked = tk.StringVar()
mode_clicked.set("Any")

# label with instructions
ttk.Label(frm, text="Do you want songs to be in major or minor scale?", font=("Helvetica", 14)).grid(column=0, row=9, sticky = tk.W, pady = 2)

# drop down menu
mode_drop = ttk.OptionMenu(frm, mode_clicked, *modes)
mode_drop.grid(column=1, row=9, pady = 2)

# result label
result_mode_label = ttk.Label(frm, font=("Helvetica", 14))
result_mode_label.grid(column=3, row=9, columnspan=3, sticky = tk.W, pady = 2)


##### Confirm Features ######

# button to confirm features
ttk.Button(frm, text="Confirm My Tastes", command=lambda:\
                       features_confirmed(genre_clicked, result_genre_label,
                       period_selected, result_period_label,
                       dance_selected_min, dance_selected_max, result_dance_label,
                       energy_selected_min, energy_selected_max, result_energy_label,
                       vocal_clicked, result_vocal_label,
                       mode_clicked, result_mode_label))\
    .grid(column=1, row=10, pady = 2)


##### RESULTS ######

# initialize text list
text = []

# button to filter and display results
ttk.Button(frm, text="GET RECOMMENDED SONGS", command=lambda: submit_button_clicked()) \
    .grid(column=1, row=11, pady = 2)

#label to display results
final_result_label = ttk.Label(frm, font=("Helvetica", 14))
final_result_label.grid(column=0, row=12, columnspan=3, rowspan=11, sticky = tk.NW)


##### Reset and Quit #####

ttk.Button(frm, text="Reset", command=lambda: reset_variables())\
   .grid(column=6, row=0, sticky = tk.E, padx = 5, pady = 2)
ttk.Button(frm, text="Quit", command=root.destroy)\
   .grid(column=7, row=0, sticky = tk.E, pady = 2)


##### Close loop #####

root.mainloop()


  projected_dataframe = dataframe[proj_attrs].dropna(0,
  projected_dataframe = dataframe[proj_attrs].dropna(0,
0% [#] 100% | ETA: 00:00:00
Total time elapsed: 00:00:00
