## Song Recommendation

In [1]:
import pandas as pd
import numpy as np
import random
import pickle
from pandas import json_normalize

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
name=input('Please enter a song name:').lower()

Please enter a song name:come monday


### 1st Prototype

From top songs we scraped from popvortex site and all eurovision winners

In [4]:
top_songs=pd.read_csv('top_songs.csv')

In [5]:
top_songs['song']=top_songs['song'].str.lower()
top_songs.head()

Unnamed: 0,song,artist
0,margaritaville,Jimmy Buffett
1,come monday,Jimmy Buffett
2,rich men north of richmond,Oliver Anthony Music
3,cheeseburger in paradise,Jimmy Buffett
4,all star,Smash Mouth


In [6]:
if name in top_songs['song'].values:
    random_song = top_songs['song'][[random.randint(0, len(top_songs)-1)]].values[0]
    artist=top_songs['artist'][top_songs['song']==random_song].values[0]
    random_song=random_song.title()
    print("That's hot!")
    print("Here's another recommendation: "+random_song+ " by " +artist)
else:
    print("No Recommendation!")

That's hot!
Here's another recommendation: Don'T Keep It In Your Heart, There'S Nothing Happy About It by Vy Napi


### 2nd Prototype

From a large playlist on spotify

#### Loading the dataset with clusters

In [7]:
songs_list=pd.read_pickle('songs_wt_clusters.pkl')
songs_list['track.name']=songs_list['track.name'].str.lower()
songs_list.head(3)

Unnamed: 0,track.name,name,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,...,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature,k5_clusters,k8_clusters
0,like a rolling stone,Bob Dylan,3AhXZa8sUQht0UEdBJgpGc,0.482,0.721,0,-6.839,1,0.0321,0.731,...,0.557,95.263,audio_features,spotify:track:3AhXZa8sUQht0UEdBJgpGc,https://api.spotify.com/v1/tracks/3AhXZa8sUQht...,https://api.spotify.com/v1/audio-analysis/3AhX...,369600,4,0,5
1,smells like teen spirit,Nirvana,3oTlkzk1OtrhH8wBAduVEi,0.485,0.863,1,-9.027,1,0.0495,1.2e-05,...,0.767,116.835,audio_features,spotify:track:3oTlkzk1OtrhH8wBAduVEi,https://api.spotify.com/v1/tracks/3oTlkzk1Otrh...,https://api.spotify.com/v1/audio-analysis/3oTl...,300977,4,0,5
2,a day in the life - remastered,The Beatles,3ZFBeIyP41HhnALjxWy1pR,0.364,0.457,4,-14.162,0,0.0675,0.29,...,0.175,163.219,audio_features,spotify:track:3ZFBeIyP41HhnALjxWy1pR,https://api.spotify.com/v1/tracks/3ZFBeIyP41Hh...,https://api.spotify.com/v1/audio-analysis/3ZFB...,337413,4,4,7


#### Spotify API Connection and Authentication

In [8]:
def authenticate():
    secrets_file= open("secrets.txt","r")
    string = secrets_file.read()
    secrets_dict={}
    for line in string.split('\n'):
        if len(line) > 0:
            secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))
    return sp

In [9]:
sp=authenticate()

In [10]:
sp

<spotipy.client.Spotify at 0x1de355f4590>

- I built two models; one with k=5 and another with k=8. I will be using them both to make predictions

In [11]:
def get_audio_features(track):
    
    #searches track info
    search= sp.search(q=name,type='track',limit=1)
    track_id=search['tracks']['items'][0]['id']
    
    #getting the audio features and selecting only numeric
    #scaler will be used to transform numeric data
    audio_ft=json_normalize(sp.audio_features(track_id))
    audio_ft=audio_ft.select_dtypes(np.number)
    
    #loading scaler that was used to fit the X_prep
    scaler=pickle.load(open('scaler.pkl','rb'))
    audio_ft_scaled=scaler.transform(audio_ft)
    
    #loading the models built to make predictions
    model_1=pickle.load(open('kmeans_5.sav','rb'))
    model_2=pickle.load(open('kmeans_8.sav','rb'))
    
    #making predictions of the cluster based on audio features
    pred_1=model_1.predict(audio_ft_scaled)
    pred_2=model_2.predict(audio_ft_scaled)
    
    #returns value instead of a series
    return pred_1[0],pred_2[0]

#cluster_1,cluster_2= get_audio_features(name)

**Note:** I dont want to end my recommendation if the songs exists in the top hot songs and simply recommend a song from that top 170  songs. So if it exists in the top hot songs, I will recommend another hot song but also recommend songs based on audio features. Since I have two models, I will have two songs as recommendation based on audio features.

In [12]:
def recommend(track):
    print("You like:",track.title())
    print("Amazing Choice! \n")
    
    #recommend songs from top hot songs
    if track in top_songs['song'].values:
        random_song = top_songs['song'][[random.randint(0, len(top_songs)-1)]].values[0]
        artist=top_songs['artist'][top_songs['song']==random_song].values[0]
        random_song=random_song.title()
        print("That's hot!")
        print("Here's another recommendation from Hot Right Now: "+random_song+ " by " +artist,'\n')
    
    #recommend songs based on audio features:
    #1. get the cluster value
    if (songs_list['track.name'] == name).any():   #from the predictions we made using model on our playlist
        cluster_1=songs_list['k5_clusters'][songs_list['track.name']==track].values[0]
        cluster_2=songs_list['k8_clusters'][songs_list['track.name']==track].values[0]
        
    elif (songs_list['track.name'] != name).any(): #using directly spotify api
        cluster_1,cluster_2= get_audio_features(track)
        
    else:
        return 'No Recommendation!'
    
    #2 use the cluster value to get the songs which are clustered together
    #reset_index is important otherwise random.choice won't work
    recommend_1= songs_list['track.name'][songs_list['k5_clusters']==cluster_1].reset_index(drop=True)
    recommend_2= songs_list['track.name'][songs_list['k8_clusters']==cluster_2].reset_index(drop=True)
    
    #3 from the chosen clustered songs, select a random song except for the song given by the user 
    random_1= random.choice(recommend_1[recommend_1!=name])
    random_2= random.choice(recommend_2[recommend_2!=name])
    
    #4 get the artist of the song select by random
    artist_1=songs_list['name'][songs_list['track.name']==random_1].values[0]
    artist_2=songs_list['name'][songs_list['track.name']==random_2].values[0]
    
    #improves readability
    random_1=random_1.title()
    random_2=random_2.title()
    
    print(r'We would also recommend: {} by {} and {} by {}'.format(random_1,artist_1,random_2,artist_2))

In [13]:
name=input('Please enter a song name:').lower()

Please enter a song name:Paradise


In [14]:
recommend(name)

You like: Paradise
Amazing Choice! 

We would also recommend: Lucille by Little Richard and Rockin' All Over The World by John Fogerty


In [15]:
# can be updated: the if else loop for audio features can be dropped
# can directly say cluster_1,cluster_2= get_audio_features(track) and run the rest of the code