# Let's first gather our data and do last cleaning (lower case, etc.)

In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
top100 = pd.read_csv('top100songs.csv')
playlist_clustered = pd.read_csv('playlist_clustered.csv')
kmeans = pickle.load(open('kmeans_6.pkl', 'rb'))
scaler = pickle.load(open('scaler_spoti.pkl', 'rb'))

In [3]:
top100['title'] = list(map(lambda x: x.lower(), top100['title']))
top100 = top100.drop('Unnamed: 0', axis=1)

In [4]:
playlist_clustered = playlist_clustered.drop('Unnamed: 0',axis=1)

# Prepare connection to the API

In [5]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [6]:
secrets_file = open("SpotifySecret.txt","r")

In [7]:
string = secrets_file.read()

In [8]:
string.split('\n')

['cid:9d470e7ce3a646e5a0399dc806f176f7', 'cs:65069de4155e4a2186f95a95b93bd83f']

In [9]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]

In [10]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['cs']))

# Get user input

In [11]:
song = input("Enter the name of a song: ")

Enter the name of a song: Shivers


# Check if song is in top 100 and recommend if yes

In [12]:
song = song.lower()
top100_title = pd.Series(top100.title)
check = song in top100_title.unique()
if check == True:
    chosen_idx = np.random.choice(100, replace = False, size = 1)
    reco_100 = top100.iloc[chosen_idx]
    print("Your recommendation is:", reco_100['title'])
else:
    print(False)

Your recommendation is: 38    heat waves
Name: title, dtype: object


In [13]:
#I first check if the song title belongs to the top100 list. If true, then I randomly return a title from the top100 list, otherwise return false for now (spoiler alert: will work on what happens when "false" in next chapter). 

# If song is not in top100 - get features of the song from Spotify

In [14]:
#had to dive into the spotipy documentation but managed eventually

In [15]:
results = sp.search(q='track:' + song, type = 'track')
output = pd.DataFrame(results['tracks']['items'])
output['name'] = output['name'].apply(lambda x: x.lower())

if len(output) > 0:
    id = output['id'][0]
    track_info = sp.track(id)
    features_info = sp.audio_features(id)
    
    name = track_info['name']
    album = track_info['album']['name']
    artist = track_info['album']['artists'][0]['name']
    release_date = track_info['album']['release_date']
    length = track_info['duration_ms']
    popularity = track_info['popularity']
    
    acousticness = features_info[0]['acousticness']
    danceability = features_info[0]['danceability']
    energy = features_info[0]['energy']
    instrumentalness = features_info[0]['instrumentalness']
    liveness = features_info[0]['liveness']
    loudness = features_info[0]['loudness']
    speechiness = features_info[0]['speechiness']
    tempo = features_info[0]['tempo']
    time_signature = features_info[0]['time_signature']
    
    track_data = [name, album, artist, release_date, length, popularity, acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]

    track_list = pd.DataFrame(track_data)
    total_features = track_list.T
    total_features.columns=['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature']
    

# Scale the features and get a cluster number for the song

In [16]:
# X_list = total_features.drop(['name', 'album','artist','release_date'],axis=1)

In [17]:
# X_prep = scaler.transform(X_list)
# X_prep = pd.DataFrame(X_prep,columns=X_list.columns)

In [18]:
# prediction = kmeans.predict(X_prep)

In [19]:
# total_features['cluster_number'] = pd.Series(prediction, index=total_features.index)

In [20]:
X_list = total_features.drop(['name', 'album','artist','release_date'],axis=1)
X_prep = scaler.transform(X_list)
X_prep_def = pd.DataFrame(X_prep,columns=X_list.columns)
prediction = kmeans.predict(X_prep_def)
total_features['cluster_number'] = pd.Series(prediction, index=total_features.index)



# Recommend another song from the same cluster

In [32]:
reco_cluster = playlist_clustered.loc[playlist_clustered['cluster_number'] == prediction[0]][['name','artist','cluster_number']].reset_index(drop=True)

In [54]:
chosen_idx_cluster = np.random.choice(len(reco_cluster), replace = False, size = 1)
final_reco = reco_cluster.iloc[chosen_idx_cluster]
reco_title = final_reco['name']
reco_artist = final_reco['artist']
print("Your recommendation is " + reco_title + ' by ' + reco_artist)

2319    Your recommendation is Crimewave by Crystal Ca...
dtype: object
