In [None]:
pip install spotipy

In [None]:
# import all necessary packages
import ipywidgets as widgets
import IPython
from IPython.display import display
from IPython.display import HTML
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from IPython.display import Javascript, display
from mpl_toolkits.mplot3d import Axes3D
import plotly.express as px
from IPython.display import Javascript

# Execute the next 5 cells
display(Javascript('IPython.notebook.execute_cells([1,2,3,4,5])'))

In [None]:
# Hide all of the code, so the UI is clean
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [None]:
# Get access to the Spotify library
CLIENT_ID = "aaddcd39d3b94eff8d638832094ad11c"
CLIENT_SECRET = "744795edffb748188abdf154a5078ea0"
username = '1255041959'
scope = 'user-library-read'
auth_manager = SpotifyClientCredentials(CLIENT_ID,CLIENT_SECRET)
sp = spotipy.Spotify(auth_manager=auth_manager)

token = util.prompt_for_user_token(username,scope,client_id=CLIENT_ID,
                                   client_secret=CLIENT_SECRET,
                                   redirect_uri='https://localhost.com/callback/')
userSP = spotipy.Spotify(auth=token)

In [None]:
class Song:
    def __init__(self, id, name, artist):
        self.id = id
        self.name = name
        self.artist = artist
        
    def __lt__(self, song):
        if self.name < song.name: return True
        else: return False
        
    def __le__(self, song):
        if self.name <= song.name: return True
        else: return False
        
    def __gt__(self, song):
        if self.name > song.name: return True
        else: return False
        
    def __ge__(self, song):
        if self.name >= song.name: return True
        else: return False
        
    def __str__(self):
        return self.name + " - " + self.artist

In [None]:
# Global variables
k = 3
track_id = []
track_name = []
artist_name = []
songs = []
ratings = {'rating': [],'numSongs':[],'numPlaylists':[]}

In [None]:
# Create widgets to display a user interface
title = widgets.HTML(value = "<h1 align='center'>Playlist Generator</h1>")
display(title)
errorMessage = widgets.Label(value="Something went wrong with the connection, hit the import button again.")
errorMessage.layout.visibility = 'hidden'
display(errorMessage)
leftLabel = widgets.Label(value="Choose songs below or import my songs:  ")
importSongs = widgets.Button(description="Import Songs")
hBox0 = widgets.HBox([leftLabel,importSongs])

search = widgets.Text(placeholder="Search for a song")
add = widgets.Button(description="Add")
hBox1 = widgets.HBox([search,add])

options = widgets.Select()
options.layout.width = "485px"
options.layout.height="200px"
rightLabel = widgets.Label(value="Selected Songs:")

selectedSongs = widgets.Select()
selectedSongs.layout.width= "485px"
selectedSongs.layout.height= "200px"
selectedSongs.options = songs

remove = widgets.Button(description="Remove")
progress = widgets.FloatProgress(description= 'importing', orientation = 'horizontal', min = 0, max=50)
progress.layout.visibility = 'hidden'
hBox4 = widgets.HBox([remove, progress])
vBox1 = widgets.VBox([hBox0,hBox1,options])
vBox2 = widgets.VBox([rightLabel,hBox4,selectedSongs])
hBox2 = widgets.HBox([vBox1, vBox2])
display(hBox2)

done = widgets.Button(description="Done")
loading = widgets.FloatProgress(description= 'loading', orientation = 'horizontal', min = 0, max = 50)
loading.layout.visibility = 'hidden'
chooseKLabel = widgets.Label(value="Number of playlists to generate: ")
chooseK = widgets.IntSlider(min = 2, max = 6, step = 1, value = k)
hBox3 = widgets.HBox([chooseKLabel,chooseK,done,loading])
display(hBox3)

# Create event handlers for the widgets
def importSongsHandler(obj):
    track_id.clear()
    track_name.clear()
    artist_name.clear()
    errorMessage.layout.visibility = 'hidden'
    try:
        progress.layout.visibility = 'visible'
        for i in range(0,7000,50):
            results = userSP.current_user_saved_tracks(limit=50,offset=i)
            for i, t in enumerate(results['items']):
                song = Song(t['track']['id'],t['track']['name'],t['track']['artists'][0]['name'])
                if song not in songs:
                    songs.append(song)
                progress.value = i
        songs.sort()
        selectedSongs.options = songs
        progress.layout.visibility = 'hidden'
    except:
        songs.clear()
        progress.layout.visibility = 'hidden'
        errorMessage.layout.visibility = 'visible'

def searchHandler(obj):
    tracks = sp.search(q=search.value, type="track", market='US', limit=50)
    choices = []
    for item in tracks['tracks']['items']:
        song = Song(item['id'], item['name'], item['artists'][0]['name'])
        choices.append(song)
    options.options = choices
    
def addButtonHandler(obj):
    if options.value != None:
        song = options.value
        if song not in songs:
            songs.append(song)
        songs.sort()
        selectedSongs.options = songs
    
def removeButtonHandler(obj):
    song = selectedSongs.value
    songs.remove(song)
    selectedSongs.options = songs
    
def slider(obj):
    global k
    k = chooseK.value
    
def doneButtonHandler(ev):
    loading.layout.visibility = 'visible'
    track_id.clear()
    track_name.clear()
    artist_name.clear()
    for item in songs:
        track_id.append(item.id)
        track_name.append(item.name)
        artist_name.append(item.artist)
    display(Javascript('IPython.notebook.execute_cells_below()'))
    
# Call the appropriate event handlers
importSongs.on_click(importSongsHandler)
search.on_submit(searchHandler)
add.on_click(addButtonHandler)
remove.on_click(removeButtonHandler)
chooseK.observe(slider)
done.on_click(doneButtonHandler)

In [None]:
# Create a dataframe that stores basic info: track ids, track names, and artist names
df_tracks = pd.DataFrame({'track_id':track_id,'track_name':track_name,'artist_name':artist_name})

In [None]:
# Use the track ids to get audio features and create a dataframe of those features
rows = []
batchsize = 100
for i in range(0,len(df_tracks['track_id']),batchsize):
    batch = df_tracks['track_id'][i:i+batchsize]
    feature_results = sp.audio_features(batch)
    for i, t in enumerate(feature_results):
        if t != None:
            rows.append(t)
            loading.value = i
df_audio_features = pd.DataFrame.from_dict(rows,orient='columns')

In [None]:
# Clean the data by getting rid of some of the features that we don't want
columns_to_drop = ['analysis_url','track_href','type','uri','key','liveness','mode','duration_ms','time_signature']
df_audio_features.drop(columns_to_drop, axis=1,inplace=True)
df_audio_features.rename(columns={'id': 'track_id'}, inplace=True)

In [None]:
# Combine the original dataframe of basic track info with the audio features dataframe
df = pd.merge(df_tracks,df_audio_features,on='track_id',how='inner')

In [None]:
# Display histograms of all of the audio features
%matplotlib inline
pd.options.display.max_columns = None
out1 = widgets.Output()
out2 = widgets.Output()
out3 = widgets.Output()
out4 = widgets.Output()
out5 = widgets.Output()
out6 = widgets.Output()
out7 = widgets.Output()
out8 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3, out4, out5, out6, out7, out8])
tab.set_title(0, 'Acousticness')
tab.set_title(1, 'Danceability')
tab.set_title(2, 'Energy')
tab.set_title(3, 'Instrumentalness')
tab.set_title(4, 'Loudness')
tab.set_title(5, 'Speechiness')
tab.set_title(6, 'Tempo')
tab.set_title(7, 'Valence')
display(tab)

with out1:
    print("A confidence measure from 0.0 to 1.0 of whether the track is acoustic.") 
    print("1.0 represents high confidence that the track is acoustic.")
    df['acousticness'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()
with out2:
    print("Danceability describes how suitable a track is for dancing based on a combination")
    print("of musical elements including tempo, rhythm stability, beat strength, and overall regularity.")
    print("A value of 0.0 is least danceable and 1.0 is most danceable.")
    df['danceability'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()
with out3:
    print("Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity.")
    print("Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy,") 
    print("while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include")
    print("dynamic range, perceived loudness, timbre, onset rate, and general entropy.")
    df['energy'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()
with out4:
    print('Predicts whether a track contains no vocals. "Ooh" and "aah" sounds are treated as instrumental in this context.') 
    print('Rap or spoken word tracks are clearly "vocal". The closer the instrumentalness value is to 1.0, the greater likelihood') 
    print('the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks,')
    print('but confidence is higher as the value approaches 1.0.')
    df['instrumentalness'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()
with out5:
    print('The overall loudness of a track in decibels (dB). Loudness values are averaged across the entire track and are useful for') 
    print('comparing relative loudness of tracks. Loudness is the quality of a sound that is the primary psychological correlate')
    print('of physical strength (amplitude). Values typically range between -60 and 0 db.')
    df['loudness'].plot.hist(figsize = [15,8])
    plt.show()
with out6:
    print('Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording')
    print('(e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks')
    print('that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain') 
    print('both music and speech, either in sections or layered, including such cases as rap music.') 
    print('Values below 0.33 most likely represent music and other non-speech-like tracks.')
    df['speechiness'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()
with out7:
    print('The overall estimated tempo of a track in beats per minute (BPM). In musical terminology,')
    print('tempo is the speed or pace of a given piece and derives directly from the average beat duration.')
    df['tempo'].plot.hist(figsize = [15,8])
    plt.show()
with out8:
    print('A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track.')
    print('Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric),') 
    print('while tracks with low valence sound more negative (e.g. sad, depressed, angry).')
    df['valence'].plot.hist(xlim = [0,1],figsize = [15,8])
    plt.show()

In [None]:
# Copy the dataframe for future use
df_cluster = df.copy()

In [None]:
# Prepare the data for KMeans clustering by scaling the values to be between 0 and 1
X = df_cluster.iloc[:, [3,4,5,6,7,8,9,10]].values
scaler = MinMaxScaler()
scaled = scaler.fit_transform(X)

In [None]:
# Perform the KMeans clustering
kmeans = KMeans(n_clusters = k, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
y_kmeans = kmeans.fit_predict(scaled)

In [None]:
# Create a new dataframe that will store the cluster numbers
kmeans_df = pd.DataFrame(data=y_kmeans, dtype=int)
kmeans_df.columns = ['playlist']

In [None]:
# Add the cluster numbers to the tracks dataframe
df_cluster = pd.concat([df_cluster, kmeans_df], axis=1)

In [None]:
# Make a 3D plot to visualize the KMeans clustering process
fig, ax = plt.subplots(figsize=(15,13))
ax = fig.add_subplot(111, projection='3d')
plt.scatter(scaled[y_kmeans == 0,0],scaled[y_kmeans == 0,1], s= 50, c= 'blue',label= 'Playlist 1')
plt.scatter(scaled[y_kmeans == 1,0], scaled[y_kmeans == 1,1], s= 50, c= 'red', label= 'Playlist 2')
if k > 2:
    plt.scatter(scaled[y_kmeans == 2,0], scaled[y_kmeans == 2,1], s= 50, c= 'green', label= 'Playlist 3')
if k > 3:
    plt.scatter(scaled[y_kmeans == 3,0], scaled[y_kmeans == 3,1], s= 50, c= 'cyan', label= 'Playlist 4')
if k > 4:
    plt.scatter(scaled[y_kmeans == 4,0], scaled[y_kmeans == 4,1], s= 50, c= 'magenta', label= 'Playlist 5')
if k > 5:
    plt.scatter(scaled[y_kmeans == 5,0], scaled[y_kmeans == 5,1], s= 50, c= 'orange', label= 'Playlist 6')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], s= 300, c= 'yellow', label= 'Centroids')
plt.title('Visual Representation of Grouping Songs Into Playlists')
plt.legend()
plt.show()

In [None]:
# Create a dataframe that stores the scaled audio features for all songs
df_scaled = pd.DataFrame(scaled)

In [None]:
# Add the cluster numbers to the scaled audio features dataframe
df_radar = pd.concat([df_scaled, kmeans_df], axis=1)

In [None]:
# A function that returns a dataframe with average scaled audio features for a playlist
def avgDF(df):
    avg = pd.DataFrame(dict(
    r = [df[0].mean(), df[1].mean(), df[2].mean(), df[3].mean(), df[4].mean(), df[5].mean(), df[6].mean(), df[7].mean()],
    theta = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'tempo']))
    return avg

In [None]:
# Create dataframes for each playlist that store the average scaled audio features for use in the radar charts
df_r1 = avgDF(df_radar.loc[df_radar['playlist'] ==0])
df_r2 = avgDF(df_radar.loc[df_radar['playlist'] ==1])
df_r3 = avgDF(df_radar.loc[df_radar['playlist'] ==2])
df_r4 = avgDF(df_radar.loc[df_radar['playlist'] ==3])
df_r5 = avgDF(df_radar.loc[df_radar['playlist'] ==4])
df_r6 = avgDF(df_radar.loc[df_radar['playlist'] ==5])

In [None]:
# Create lists to hold Song objects for each playlist for display purposes
p1, p2, p3, p4, p5, p6 = [], [], [], [], [], []

df_1 = df_cluster.loc[df_cluster['playlist'] == 0]
for i, row in df_1.iterrows():
    s = Song(row['track_id'], row['track_name'], row['artist_name'])
    p1.append(s)

df_2 = df_cluster.loc[df_cluster['playlist'] == 1]
for i, row in df_2.iterrows():
    s = Song(row['track_id'], row['track_name'], row['artist_name'])
    p2.append(s)

if k > 2: 
    df_3 = df_cluster.loc[df_cluster['playlist'] == 2]
    for i, row in df_3.iterrows():
        s = Song(row['track_id'], row['track_name'], row['artist_name'])
        p3.append(s)

if k > 3: 
    df_4 = df_cluster.loc[df_cluster['playlist'] == 3]
    for i, row in df_4.iterrows():
        s = Song(row['track_id'], row['track_name'], row['artist_name'])
        p4.append(s)

if k > 4: 
    df_5 = df_cluster.loc[df_cluster['playlist'] == 4]
    for i, row in df_5.iterrows():
        s = Song(row['track_id'], row['track_name'], row['artist_name'])
        p5.append(s)

if k > 5: 
    df_6 = df_cluster.loc[df_cluster['playlist'] == 5]
    for i, row in df_6.iterrows():
        s = Song(row['track_id'], row['track_name'], row['artist_name'])
        p6.append(s)

In [None]:
# Create tabs for each playlist that hold output objects
kids = []
c1 = widgets.Output()
kids.append(c1)
c2 = widgets.Output()
kids.append(c2)
if k > 2: 
    c3 = widgets.Output()
    kids.append(c3)
if k > 3: 
    c4 = widgets.Output()
    kids.append(c4)
if k > 4: 
    c5 = widgets.Output()
    kids.append(c5)
if k > 5: 
    c6 = widgets.Output()
    kids.append(c6)
tabs = widgets.Tab(children = kids)
tabs.set_title(0,'Playlist 1')
tabs.set_title(1,'Playlist 2')
if k > 2: tabs.set_title(2,'Playlist 3')
if k > 3: tabs.set_title(3,'Playlist 4')
if k > 4: tabs.set_title(4,'Playlist 5')
if k > 5: tabs.set_title(5,'Playlist 6')
display(tabs)

In [None]:
# Display a list of songs in the playlist with a radar chart of average audio features
def displayPlaylist(p, df):
    selection = widgets.Select(options = p)
    selection.layout.width='350px'
    selection.layout.height='400px'
    title = widgets.HTML(value = "<h2 align='center'>Songs</h2>")
    vBox = widgets.VBox([title, selection])
    radar = widgets.Output()
    innerTab = widgets.Tab(children = [radar])
    innerTab.set_title(0,'Audio Features')
    hbox = widgets.HBox([vBox,innerTab])
    display(hbox)
    with radar:
        fig = px.line_polar(df, r='r', theta='theta', line_close=True,width=450)
        fig.update_traces(fill='toself')
        fig.update_layout(polar = dict(radialaxis = dict(range=[0,1], showticklabels=False, ticks=''),
                angularaxis = dict(ticks='')))
        fig.show()

with c1:
    displayPlaylist(p1, df_r1)
with c2:
    displayPlaylist(p2, df_r2)
if k > 2:
    with c3:
        displayPlaylist(p3, df_r3)
if k > 3:
    with c4:
        displayPlaylist(p4, df_r4)
if k > 4:
    with c5:
        displayPlaylist(p5, df_r5)
if k > 5:
    with c6:
        displayPlaylist(p6, df_r6)

In [None]:
# Stop displaying the loading bar
loading.layout.visibility = 'hidden'

In [None]:
# Get feedback from the user
rateLabel = widgets.Label(value="Please rate the playlist generator on a scale from 1 to 10:")
rateUs = widgets.BoundedIntText(
    value=5,
    min=0,
    max=10,
    step=1,
    disabled=False
)
submit = widgets.Button(description="Submit")
ratingHbox = widgets.HBox([rateLabel,rateUs,submit])
display(ratingHbox)
goodbye = widgets.Label(value = "Thank you for your feedback")
     
def submitHandler(ev):
    ratings['rating'].append(rateUs.value)
    ratings['numSongs'].append(len(songs))
    ratings['numPlaylists'].append(k)
    display(goodbye)
    
submit.on_click(submitHandler)