In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from sklearn.datasets import make_classification # download a dataset 
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt

from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

from config import *

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= client_id,
                                                           client_secret= client_secret)) #client_id and client-secret as written the config file.

In [3]:
#funtion to load
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as file: # we open the file to read "rb"
            return pickle.load(file) #load
    except FileNotFoundError: 
        print("File not found!") 

In [4]:
music_recommender_df=pd.read_csv('data/music_recommender_db.csv')
#music_recommender_df.columns
music_recommender_df=music_recommender_df.drop(['Unnamed: 0'], axis=1)

### Retrieve user provided song features: 

In [5]:
def get_user_song():
    
    artist_name=input("Please enter the full name of the artist: ")
    song_name=input("Please enter the full name of the song: ")
    
    return artist_name, song_name


In [6]:
artist_name, song_name = get_user_song()
user_song_id=[]
try:
    results= sp.search(q="track:"+song_name+" artist:"+artist_name, limit=1)
                   #(q="track:"+title+" artist:"+artist,limit=1)
    song_id=results['tracks']['items'][0]['id']
    user_song_id.append(song_id)
    print("The song ID is {}".format(user_song_id))
except:
    print("The song {} of the artist {} has not been found on Spotify".format(song_name,artist_name))
    print("Plese enter a new artist name and song:")
    artist_name, song_name = get_user_song()

featuredic=sp.audio_features(user_song_id)[0]
songs_features = { key : [featuredic[key]] for key in list(featuredic.keys()) }

user_song_features=pd.DataFrame(songs_features)
#user_song_features

for item in results['tracks']['items']:
    print("You can listen to: '{}', by the artist:'{}' at: {}".format(item['name'],item['artists'][0]['name'],item['external_urls']['spotify']))

Please enter the full name of the artist: Glass Animals
Please enter the full name of the song: Heat Waves
The song ID is ['3USxtqRwSYz57Ewm6wWRMp']
You can listen to: 'Heat Waves', by the artist:'Glass Animals' at: https://open.spotify.com/track/3USxtqRwSYz57Ewm6wWRMp


In [7]:
user_song_features_num = user_song_features.select_dtypes(include=np.number)
user_song_features_num=user_song_features_num.drop(['key','mode','time_signature','duration_ms','liveness'],axis=1)

### Recommend the user a song based on perceived preference: 

In [8]:
scaler=load('transformation/scaler.pickle')
user_song_features_scaled=scaler.transform(user_song_features_num)
user_scaled_df = pd.DataFrame(user_song_features_scaled, columns = user_song_features_num.columns)

In [9]:
kmeans_30=load('models/kmeans_30.pickle')
kmeans_30

In [10]:
user_cluster = kmeans_30.predict(user_scaled_df)
#user_cluster[0]
#clusters
#pd.Series(clusters).value_counts().sort_index()#.sum() # Number of wines 

In [11]:
def isin_df(df, artist_name, song_name):
    """
    This function looks if a input song is in the the input dataframe
    
    Inputs: 
      df -> dataframe
      artist_name -> str
      song_name -> str
    
    Outputs:
      bool
    """
    if (artist_name in list(df['artist_name'].values)) and (song_name in list(df['song_title'].values)) :
        return True
    else:
        return False

In [12]:
def recommend(df):
    df_hot = df[df['identifier'] == 'hot100']

    if ( isin_df(df_hot, artist_name, song_name) == True  ):
        print("Hot song!")
        #display(df[(df['identifier']== "hot100") & (df['cluster_k30']==user_cluster[0])].head())
        recommendation = df[(df['identifier']=="hot100") & (df['cluster_k30'] == user_cluster[0]) & (artist_name != df['artist_name']) & (song_name != df['song_title'])].sample()
        display(recommendation[['artist_name','song_title','identifier']])

        results= sp.search(q="track:"+recommendation['song_title']+" artist:"+recommendation['artist_name'], limit=1)
        for item in results['tracks']['items']:
            print("The recommended song name: '{}', the name of the artist is:'{}'.Listen to the song at: {}".format(item['name'],item['artists'][0]['name'],item['external_urls']['spotify']))

    else:
        print("Not hot song!")
        recommendation=(df[(df['identifier']=="not_hot") & (df['cluster_k30'] == user_cluster[0]) & (artist_name != df['artist_name']) & (song_name != df['song_title'])].sample())
        display(recommendation[['artist_name','song_title','identifier']])

        results= sp.search(q="track:"+recommendation['song_title']+" artist:"+recommendation['artist_name'], limit=1)
        for item in results['tracks']['items']:
            print("The recommended song name: '{}', the name of the artist is:'{}'.Listen to the song at: {}".format(item['name'],item['artists'][0]['name'],item['external_urls']['spotify']))



In [13]:
recommend(music_recommender_df)

Hot song!


Unnamed: 0,artist_name,song_title,identifier
58,Harry Styles,Daydreaming,hot100


The recommended song name: 'Daydreaming', the name of the artist is:'Harry Styles'.Listen to the song at: https://open.spotify.com/track/69w5X6uTrOaWM32IetSzvO


THE END