# Recommending Songs
   * Use Top Spotify Tracks 2017 as dataset
   * Look at preferences based on top songs on chart
   * Use clustering to classify songs
   * Recommend songs based on clusters

# Loading the Data
Load the standard modules and dataset

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

In [2]:
song_df = pd.read_csv('songs.csv') #Create dataframe from csv file
song_df.head(6) #Print first 6 lines of data as sample

FileNotFoundError: File b'songs.csv' does not exist

# Top 5 Songs Analysis
Using a bar graph, compare different audio features of the top five songs of 2017 and identify if there are any correlations or trends between them. 

In [None]:
top_5 = song_df.head(5) #Create variable containing top 5 songs 2017 and their related data

barWidth = 0.12 #Set bar width

#Create array of values for 5 important features for each of the top 5 songs
song1 = [0.825, 0.652, 0.581, 0.931, 0.0802] 
song2 = [0.694, 0.815, 0.229, 0.813, 0.12]
song3 = [0.660, 0.786, 0.209, 0.846, 0.17]
song4 = [0.617, 0.635, 0.0498, 0.446, 0.0317]
song5 = [0.609, 0.668, 0.0552, 0.811, 0.0367]

#Set height of bars based on value for each feature between 0 and 1
r1 = np.arange(len(song1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
r5 = [x + barWidth for x in r4]

#Set plot parameters and labels for graph to depict sample of audio features for each of the top 5 songs 2017
plt.bar(r1, song1, color ='red', width=barWidth, edgecolor='white', label='Shape of You - Ed Sheeran')
plt.bar(r2, song2, color ='blue', width=barWidth, edgecolor='white', label='Despacito Remix - Luis Fonsi')
plt.bar(r3, song3, color ='pink', width=barWidth, edgecolor='white', label='Despacito - Luis Fonsi')
plt.bar(r4, song4, color ='yellow', width=barWidth, edgecolor='white', label='Something Just Like This - The Chainsmokers')
plt.bar(r5, song5, color ='cyan', width=barWidth, edgecolor='white', label="I'm the one - DJ Khaled")

#Add x-axis name tags for each audio feature
plt.xticks([r + barWidth for r in range(len(song1))], ['Danceability','Energy','Acousticness','Valence', 'Speechiness'])

plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) #Create legend
plt.show() #Print plot


Based on the chart, we can determine the songs demonstrated high levels of danceability, energy, and valence, implying people liked songs that were upbeat, intense, positive, and envigorating. But what about others who enjoy music less hyper and more soothing.

# Clustering
* Use appropriate attributes that are of importance in determining recommendations

In [None]:
song = song_df[['danceability','energy','key','loudness','mode','speechiness','acousticness','liveness',
                'valence','tempo']] #Create dataframe containing just the audio features
song.head(6) #Print first 6 row sample of the dataframe of audio features

## Run k-Means

In [None]:
X = song.values
kmeans = KMeans(n_clusters=15) #Cluster data in 15 groups(0-14)
kmeans.fit(X) 

In [None]:
print(kmeans.labels_) #Print group labels for each of the 100 songs in the dataframe

In [None]:
song_names = song_df['name'] #Create dataframe of song names
curr_song = 'Shape of You' #Sample of current track being played to recommend off of
curr_song_ind = 0

# Determine index of song in dataframe
for i in range(0,len(song_df)):
    if (song_names[i] == curr_song):
        curr_song_ind = i   

In [None]:
#Pull songs' index #s of the same cluster as the current track out
list = []
for j in range(0, len(kmeans.labels_)):
    if ((j != curr_song_ind) and (kmeans.labels_[j] == kmeans.labels_[curr_song_ind])): 
        list.append(j)

In [None]:
#Transfer song names to the index's pulled from the cluster to create recommended song list for current track
recommended_songs = []
for k in list:
    recommended_songs.append(song_names[k])
print (recommended_songs)