In [1]:
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import random

from IPython.display import Markdown, display

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import cluster, datasets
from sklearn.cluster import KMeans

import pickle

In [2]:
#### Authentification

secrets_file = open("secrets.txt","r")
string = secrets_file.read()
#string.split('\n')

# Dictionary
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0: # excluding empty lines
        #           [first element:key]  [ second element:value  ]
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [3]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))


In [4]:
topsongs = pd.read_csv('topsongs_data.csv', index_col=False)
spotify_data = pd.read_csv('spotify_data.csv', index_col=False)

display(topsongs.head(3), spotify_data.head(3))

Unnamed: 0,title,artist
0,FLOWER,JISOO
1,Last Night,Morgan Wallen
2,Flowers,Miley Cyrus


Unnamed: 0,title,artist,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,bohemian rhapsody - remastered 2011,Queen,spotify:track:7tFiyTwD0nx5a1eklYtX2J,0.392,0.402,0,-9.961,0,0.0536,0.288,0.0,0.243,0.228,143.883,audio_features,7tFiyTwD0nx5a1eklYtX2J,https://api.spotify.com/v1/tracks/7tFiyTwD0nx5...,https://api.spotify.com/v1/audio-analysis/7tFi...,354320,4
1,roller coaster,Danny Vera,spotify:track:5B5YKjgne3TZzNpMsN9aj1,0.401,0.383,9,-10.048,1,0.0279,0.51,0.0078,0.121,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,hotel california - 2013 remaster,Eagles,spotify:track:40riOy7x9W7GXjyGp4pjAv,0.579,0.508,2,-9.484,1,0.027,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4


In [5]:
scaler = pickle.load(open('scaler_model.pkl','rb'))

In [6]:
kmeans = pickle.load(open('kmeans_model.pkl','rb'))

In [7]:
track = input().lower()
track_id = sp.search(q=' track' + track, type= 'track')

l'empordà


In [8]:
track_id['tracks']['items'][0]['album']['artists'][0]['name'] # artist band

'Sopa De Cabra'

In [9]:
track_name = track_id['tracks']['items'][0]['name'] # title
track_name

"L'Empordà"

In [10]:
song_uri = track_id['tracks']['items'][0]['uri'] # uri
features = sp.audio_features(song_uri)[0]
features

{'danceability': 0.681,
 'energy': 0.837,
 'key': 4,
 'loudness': -8.907,
 'mode': 1,
 'speechiness': 0.041,
 'acousticness': 0.0362,
 'instrumentalness': 1.57e-06,
 'liveness': 0.0986,
 'valence': 0.959,
 'tempo': 143.472,
 'type': 'audio_features',
 'id': '1vzugDTXDgZCF0QQrzB5hX',
 'uri': 'spotify:track:1vzugDTXDgZCF0QQrzB5hX',
 'track_href': 'https://api.spotify.com/v1/tracks/1vzugDTXDgZCF0QQrzB5hX',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1vzugDTXDgZCF0QQrzB5hX',
 'duration_ms': 159333,
 'time_signature': 4}

In [11]:
spotify_data.columns

Index(['title', 'artist', 'uri', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'track_href', 'analysis_url',
       'duration_ms', 'time_signature'],
      dtype='object')

In [12]:
spotify_data.shape

(6795, 20)

In [13]:
def get_song_from_spotify(song_searched, spotify_data):
    
    # The function get_song_from_spotify takes in a song name and a dataframe spotify_data
    # and searches for the song in the Spotify database. If the song is already present in 
    # spotify_data, it just returns the dataframe. If the song is not present, it searches for
    # the song in the Spotify API using the song name, and if there are multiple versions of 
    #the song played by different artists, it asks the user to select the desired version.
    # Then, it extracts information such as URI and features of the selected song and creates
    # a dictionary of song information, which is then converted to a DataFrame and appended to
    # spotify_data. Finally, the function returns the updated spotify_data dataframe.
    # If the song is not found, it returns None.
    
    
    # If song is NOT in spotify data:
    if song_searched not in spotify_data['title'].values:
        
        # Search song
        track_id = sp.search(q='track:' + song_searched, type='track')  # song info

        # If multiple versions of song played by different artists are found:
        if len(track_id['tracks']['items']) > 1:
            # Print list of artists who played the song
            print("Multiple versions of the song '{}' were found:".format(song_searched))
            for i, item in enumerate(track_id['tracks']['items']):
                print("{}: {}".format(i+1, item['album']['artists'][0]['name']))
                
            # Ask user to select the desired version of the song
            while True:
                try:
                    selection = int(input("Please enter the NUMBER of the version you are looking for: "))
                    if selection < 1 or selection > len(track_id['tracks']['items']):
                        print("Invalid selection. Please enter a NUMBER between 1 and {}.".format(len(track_id['tracks']['items'])))
                    else:
                        break
                except ValueError:
                    print("Invalid selection. Please enter a NUMBER between 1 and {}.".format(len(track_id['tracks']['items'])))
            
            # Use the selected version of the song
            item = track_id['tracks']['items'][selection-1]
        
        else:
            # Use the only version of the song found
            item = track_id['tracks']['items'][0]
        
        # Extracting info of new song
        song_uri = item['uri']  # uri
        features = sp.audio_features(song_uri)[0]

        # New dict retrieving values of 'title', 'artist' and 'uri'
        track = {
            'title': item['name'],
            'artist': item['album']['artists'][0]['name'],
            'uri': song_uri,
        }

        # For each feature (song uri) add in track dict
        # new key (column name) and the value
        for feature_key, feature_value in features.items():
            track[feature_key] = feature_value

        # Append new songs to spotify_data
        new_song_df = pd.DataFrame([track])
        spotify_data = pd.concat([spotify_data, new_song_df], axis=0)#, ignore_index=True)
        spotify_data = spotify_data.drop_duplicates()
        #spotify_data = spotify_data.drop(['Unnamed: 0'], axis=1)
        spotify_data = spotify_data.reset_index(drop=True)
        #spotify_data.to_csv('spotify_data.csv')
        
        # Return the results as a DataFrame
        return spotify_data
    
    else:
        
        print("{} was already in spotify_data database".format(song_searched))
        return spotify_data

    
        # track_name = track_id['tracks']['items'][0]['name'] # title
       # track_id['tracks']['items'][0]['album']['artists'][0]['name'] # artist band
       # song_uri = track_id['tracks']['items'][0]['uri'] # uri
       # features = sp.audio_features(song_uri)[0]
    

In [14]:
song_searched = input()
spotify_data = get_song_from_spotify(song_searched, spotify_data).reset_index(drop=True)
spotify_data.to_csv('spotify_data.csv', index=False)
spotify_data

sta guai
Multiple versions of the song 'sta guai' were found:
1: Oques Grasses
2: Oques Grasses
3: Adelyne
4: Verbo
5: Sicario
6: Arpioni
7: Sergio Cammariere
8: Tavola 28
9: Pentesilea Road
10: Pentesilea Road
Please enter the NUMBER of the version you are looking for: 1


Unnamed: 0,title,artist,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,bohemian rhapsody - remastered 2011,Queen,spotify:track:7tFiyTwD0nx5a1eklYtX2J,0.392,0.402,0,-9.961,0,0.0536,0.28800,0.000000,0.2430,0.228,143.883,audio_features,7tFiyTwD0nx5a1eklYtX2J,https://api.spotify.com/v1/tracks/7tFiyTwD0nx5...,https://api.spotify.com/v1/audio-analysis/7tFi...,354320,4
1,roller coaster,Danny Vera,spotify:track:5B5YKjgne3TZzNpMsN9aj1,0.401,0.383,9,-10.048,1,0.0279,0.51000,0.007800,0.1210,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,hotel california - 2013 remaster,Eagles,spotify:track:40riOy7x9W7GXjyGp4pjAv,0.579,0.508,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,piano man,Billy Joel,spotify:track:3FCto7hnn1shUyZL42YgfO,0.334,0.472,0,-8.791,1,0.0277,0.60000,0.000004,0.3170,0.431,179.173,audio_features,3FCto7hnn1shUyZL42YgfO,https://api.spotify.com/v1/tracks/3FCto7hnn1sh...,https://api.spotify.com/v1/audio-analysis/3FCt...,336093,3
4,fix you,Coldplay,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,0.209,0.417,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6791,Cara de cul,Oques Grasses,spotify:track:2FFrYTZjyby3j0xaEn5RNZ,0.652,0.825,11,-5.705,1,0.0584,0.00853,0.000094,0.1140,0.330,133.009,audio_features,2FFrYTZjyby3j0xaEn5RNZ,https://api.spotify.com/v1/tracks/2FFrYTZjyby3...,https://api.spotify.com/v1/audio-analysis/2FFr...,246213,4
6792,Escopinya,Oques Grasses,spotify:track:7apIPSn5fCcb8QbH5fv3dd,0.754,0.765,0,-5.692,1,0.0837,0.03600,0.000002,0.0969,0.822,141.007,audio_features,7apIPSn5fCcb8QbH5fv3dd,https://api.spotify.com/v1/tracks/7apIPSn5fCcb...,https://api.spotify.com/v1/audio-analysis/7apI...,175467,4
6793,Lakilove,Oques Grasses,spotify:track:6dHdYZI2KDrq6FJEqCPs0k,0.825,0.818,7,-5.626,1,0.1530,0.18300,0.000056,0.0544,0.783,102.003,audio_features,6dHdYZI2KDrq6FJEqCPs0k,https://api.spotify.com/v1/tracks/6dHdYZI2KDrq...,https://api.spotify.com/v1/audio-analysis/6dHd...,171320,4
6794,La Gent Que Estimo,Oques Grasses,spotify:track:2aTvlIMGoo2L9x5YzJNjj5,0.348,0.239,10,-13.241,1,0.0428,0.88700,0.000048,0.2260,0.145,104.716,audio_features,2aTvlIMGoo2L9x5YzJNjj5,https://api.spotify.com/v1/tracks/2aTvlIMGoo2L...,https://api.spotify.com/v1/audio-analysis/2aTv...,239760,4


In [15]:
spotify_data.shape

(6796, 20)

In [16]:
spotify_data.columns

Index(['title', 'artist', 'uri', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'type', 'id', 'track_href', 'analysis_url',
       'duration_ms', 'time_signature'],
      dtype='object')

In [17]:
# CLUSTERS AND PICKING FROM CLUSTER

In [None]:
def recommend_song(song_searched, spotify_data):
    """
    This function takes in a song name and a dataframe spotify_data, searches for the song in the
    Spotify database, creates clusters of the data, and recommends another song from the same cluster
    as the selected song.
    """
    # Nested get_song_from_spotify function:
    spotify_data = get_song_from_spotify(song_searched, spotify_data)

    X_features = spotify_data.select_dtypes(np.number)

    # Scaling Data
    scaler = StandardScaler()
    X_prep = scaler.fit_transform(X_features)

    # KMeans : 8 clusters
    kmeans = KMeans(n_clusters=8, random_state=1234)
    kmeans.fit(X_prep)

    # Preprocessing the song name for matching
    song_searched = song_searched.lower().strip()

    # Finding the index of the selected song:
    spotify_data['title'] = spotify_data['title'].str.lower().str.strip()
    song_index = spotify_data.index[spotify_data['title'] == song_searched]
    
    if len(song_index) == 0:
        print(f"No matching song found for {song_searched}")
        return
    
    song_index = song_index[0]

    # Predicting cluster for the selected song:
    song_cluster = kmeans.predict(X_prep[song_index].reshape(1, -1))[0]

    # Selecting a random song from the same cluster:
    cluster_songs = spotify_data[kmeans.labels_ == song_cluster]
    recommended_song = cluster_songs.sample(n=1)

    # Print recommendation:    
    display(Markdown(f"Based on your selection of '**{song_searched.capitalize()}**', we recommend the song **'{recommended_song['title'].iloc[0].upper()}'** by **'{recommended_song['artist'].iloc[0].upper()}'**."))

    return recommended_song


In [19]:
song_searched = input()
get_song_from_spotify(song_searched, spotify_data)

# Saving updated dataframe
spotify_data.to_csv('spotify_data.csv', index=False)
spotify_data

bancals
Multiple versions of the song 'bancals' were found:
1: Oques Grasses
2: Garlou
3: Klimperei
4: Fromage En Feu
Please enter the NUMBER of the version you are looking for: 1


Unnamed: 0,title,artist,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,bohemian rhapsody - remastered 2011,Queen,spotify:track:7tFiyTwD0nx5a1eklYtX2J,0.392,0.402,0,-9.961,0,0.0536,0.28800,0.000000,0.2430,0.228,143.883,audio_features,7tFiyTwD0nx5a1eklYtX2J,https://api.spotify.com/v1/tracks/7tFiyTwD0nx5...,https://api.spotify.com/v1/audio-analysis/7tFi...,354320,4
1,roller coaster,Danny Vera,spotify:track:5B5YKjgne3TZzNpMsN9aj1,0.401,0.383,9,-10.048,1,0.0279,0.51000,0.007800,0.1210,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,hotel california - 2013 remaster,Eagles,spotify:track:40riOy7x9W7GXjyGp4pjAv,0.579,0.508,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,piano man,Billy Joel,spotify:track:3FCto7hnn1shUyZL42YgfO,0.334,0.472,0,-8.791,1,0.0277,0.60000,0.000004,0.3170,0.431,179.173,audio_features,3FCto7hnn1shUyZL42YgfO,https://api.spotify.com/v1/tracks/3FCto7hnn1sh...,https://api.spotify.com/v1/audio-analysis/3FCt...,336093,3
4,fix you,Coldplay,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,0.209,0.417,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6791,Cara de cul,Oques Grasses,spotify:track:2FFrYTZjyby3j0xaEn5RNZ,0.652,0.825,11,-5.705,1,0.0584,0.00853,0.000094,0.1140,0.330,133.009,audio_features,2FFrYTZjyby3j0xaEn5RNZ,https://api.spotify.com/v1/tracks/2FFrYTZjyby3...,https://api.spotify.com/v1/audio-analysis/2FFr...,246213,4
6792,Escopinya,Oques Grasses,spotify:track:7apIPSn5fCcb8QbH5fv3dd,0.754,0.765,0,-5.692,1,0.0837,0.03600,0.000002,0.0969,0.822,141.007,audio_features,7apIPSn5fCcb8QbH5fv3dd,https://api.spotify.com/v1/tracks/7apIPSn5fCcb...,https://api.spotify.com/v1/audio-analysis/7apI...,175467,4
6793,Lakilove,Oques Grasses,spotify:track:6dHdYZI2KDrq6FJEqCPs0k,0.825,0.818,7,-5.626,1,0.1530,0.18300,0.000056,0.0544,0.783,102.003,audio_features,6dHdYZI2KDrq6FJEqCPs0k,https://api.spotify.com/v1/tracks/6dHdYZI2KDrq...,https://api.spotify.com/v1/audio-analysis/6dHd...,171320,4
6794,La Gent Que Estimo,Oques Grasses,spotify:track:2aTvlIMGoo2L9x5YzJNjj5,0.348,0.239,10,-13.241,1,0.0428,0.88700,0.000048,0.2260,0.145,104.716,audio_features,2aTvlIMGoo2L9x5YzJNjj5,https://api.spotify.com/v1/tracks/2aTvlIMGoo2L...,https://api.spotify.com/v1/audio-analysis/2aTv...,239760,4


In [20]:
spotify_data.shape

(6796, 20)

In [None]:
def recommend_song(song_searched, spotify_data, topsongs):
    """
    This function takes in a song name and a dataframe spotify_data, searches for the song in the
    Spotify database, creates clusters of the data, and recommends another song from the same cluster
    as the selected song.
    """
    song_searched = song_searched.lower().strip()

    # Check if the song is in top songs:
    if song_searched in topsongs['title'].str.lower().str.strip().values:
        recommended_song = topsongs.loc[topsongs['title'].str.lower().str.strip() != song_searched].sample(n=1)
        print(f"Based on your selection of '{song_searched.capitalize()}', we recommend the song '{recommended_song['title'].iloc[0]}' by {recommended_song['artist'].iloc[0]}.")
        return recommended_song
    
    # Nested get_song_from_spotify function:
    spotify_data = get_song_from_spotify(song_searched, spotify_data)

    X_features = spotify_data.select_dtypes(np.number)

    # Scaling Data
    scaler = StandardScaler()
    X_prep = scaler.fit_transform(X_features)

    # KMeans : 8 clusters
    kmeans = KMeans(n_clusters=8, random_state=1234)
    kmeans.fit(X_prep)

    # Finding the index of the selected song:
    spotify_data['title'] = spotify_data['title'].str.lower().str.strip()
    song_index = spotify_data.index[spotify_data['title'] == song_searched]
    
    if len(song_index) == 0:
        print(f"No matching song found for {song_searched}")
        return
    
    song_index = song_index[0]

    # Predicting cluster for the selected song:
    song_cluster = kmeans.predict(X_prep[song_index].reshape(1, -1))[0]

    # Selecting a random song from the same cluster:
    cluster_songs = spotify_data[kmeans.labels_ == song_cluster]
    recommended_song = cluster_songs.sample(n=1)

    # Print recommendation:    
    display(Markdown(f"Based on your selection of '**{song_searched.capitalize()}**', we recommend the song **'{recommended_song['title'].iloc[0].upper()}'** by **'{recommended_song['artist'].iloc[0].upper()}'**."))

    return recommended_song



In [None]:
song_searched = input('Enter a song: ')
recommend_song(song_searched, spotify_data, topsongs)

In [None]:
def recommend_song(song_searched, spotify_data, topsongs):
    """
    This function takes in a song name and a dataframe spotify_data, searches for the song in the
    Spotify database, creates clusters of the data, and recommends another song from the same cluster
    as the selected song.
    """
    song_searched = song_searched.lower().strip()

    # Check if the song is in top songs:
    if song_searched in topsongs['title'].str.lower().str.strip().values:
        recommended_song = topsongs.loc[topsongs['title'].str.lower().str.strip() != song_searched].sample(n=1)
        print(f"Based on your selection of '{song_searched.capitalize()}', we recommend the song '{recommended_song['title'].iloc[0]}' by {recommended_song['artist'].iloc[0]}.")
        return recommended_song
    
    # Nested get_song_from_spotify function:
    spotify_data = get_song_from_spotify(song_searched, spotify_data)

    X_features = spotify_data.select_dtypes(np.number)

    # Scaling Data
    scaler = StandardScaler()
    X_prep = scaler.fit_transform(X_features)

    # KMeans : 8 clusters
    kmeans = KMeans(n_clusters=8, random_state=1234)
    kmeans.fit(X_prep)

    # Finding the index of the selected song:
    spotify_data['title'] = spotify_data['title'].str.lower().str.strip()
    song_index = spotify_data.index[spotify_data['title'] == song_searched]
    
    if len(song_index) == 0:
        print(f"No matching song found for {song_searched}")
        return
    
    song_index = song_index[0]

    # Predicting cluster for the selected song:
    song_cluster = kmeans.predict(X_prep[song_index].reshape(1, -1))[0]

    # Selecting a random song from the same cluster:
    #cluster_songs = spotify_data[kmeans.labels_ == song_cluster]
    #recommended_song = cluster_songs.sample(n=1)
    
    # Selecting a random song from the same cluster:
    cluster_songs = spotify_data[kmeans.labels_ == song_cluster]
    recommended_song = cluster_songs.sample(n=1)

    # Finding also the most similar song in the same cluster:
    cosine_similarities = cosine_similarity(X_prep[kmeans.labels_ == song_cluster])
    similarity_scores = pd.Series(cosine_similarities[song_index])
    most_similar_song_index = similarity_scores.sort_values(ascending=False).index[1]
    most_similar_song = spotify_data.iloc[most_similar_song_index]

    # Print recommendations:    
    display(Markdown(f"Based on your selection of '**{song_searched.capitalize()}**', we recommend the song **'{recommended_song['title'].iloc[0].upper()}'** by **'{recommended_song['artist'].iloc[0].upper()}'**."))
    display(Markdown(f"We also recommend the most similar song in the same cluster, which is **'{most_similar_song['title'].upper()}'** by **'{most_similar_song['artist'].upper()}'**."))
    return recommended_song, most_similar_song



In [26]:
song_searched = input()
get_song_from_spotify(song_searched, spotify_data)

# Saving updated dataframe
spotify_data.to_csv('spotify_data.csv', index=False)
spotify_data

canço de laire
Multiple versions of the song 'canço de laire' were found:
1: Oques Grasses
2: Juli Garreta
3: Coses
4: Coses
5: Coses
Please enter the NUMBER of the version you are looking for: 1


Unnamed: 0,title,artist,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,bohemian rhapsody - remastered 2011,Queen,spotify:track:7tFiyTwD0nx5a1eklYtX2J,0.392,0.402,0,-9.961,0,0.0536,0.28800,0.000000,0.2430,0.228,143.883,audio_features,7tFiyTwD0nx5a1eklYtX2J,https://api.spotify.com/v1/tracks/7tFiyTwD0nx5...,https://api.spotify.com/v1/audio-analysis/7tFi...,354320,4
1,roller coaster,Danny Vera,spotify:track:5B5YKjgne3TZzNpMsN9aj1,0.401,0.383,9,-10.048,1,0.0279,0.51000,0.007800,0.1210,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,hotel california - 2013 remaster,Eagles,spotify:track:40riOy7x9W7GXjyGp4pjAv,0.579,0.508,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,piano man,Billy Joel,spotify:track:3FCto7hnn1shUyZL42YgfO,0.334,0.472,0,-8.791,1,0.0277,0.60000,0.000004,0.3170,0.431,179.173,audio_features,3FCto7hnn1shUyZL42YgfO,https://api.spotify.com/v1/tracks/3FCto7hnn1sh...,https://api.spotify.com/v1/audio-analysis/3FCt...,336093,3
4,fix you,Coldplay,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,0.209,0.417,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6791,Cara de cul,Oques Grasses,spotify:track:2FFrYTZjyby3j0xaEn5RNZ,0.652,0.825,11,-5.705,1,0.0584,0.00853,0.000094,0.1140,0.330,133.009,audio_features,2FFrYTZjyby3j0xaEn5RNZ,https://api.spotify.com/v1/tracks/2FFrYTZjyby3...,https://api.spotify.com/v1/audio-analysis/2FFr...,246213,4
6792,Escopinya,Oques Grasses,spotify:track:7apIPSn5fCcb8QbH5fv3dd,0.754,0.765,0,-5.692,1,0.0837,0.03600,0.000002,0.0969,0.822,141.007,audio_features,7apIPSn5fCcb8QbH5fv3dd,https://api.spotify.com/v1/tracks/7apIPSn5fCcb...,https://api.spotify.com/v1/audio-analysis/7apI...,175467,4
6793,Lakilove,Oques Grasses,spotify:track:6dHdYZI2KDrq6FJEqCPs0k,0.825,0.818,7,-5.626,1,0.1530,0.18300,0.000056,0.0544,0.783,102.003,audio_features,6dHdYZI2KDrq6FJEqCPs0k,https://api.spotify.com/v1/tracks/6dHdYZI2KDrq...,https://api.spotify.com/v1/audio-analysis/6dHd...,171320,4
6794,La Gent Que Estimo,Oques Grasses,spotify:track:2aTvlIMGoo2L9x5YzJNjj5,0.348,0.239,10,-13.241,1,0.0428,0.88700,0.000048,0.2260,0.145,104.716,audio_features,2aTvlIMGoo2L9x5YzJNjj5,https://api.spotify.com/v1/tracks/2aTvlIMGoo2L...,https://api.spotify.com/v1/audio-analysis/2aTv...,239760,4


In [21]:
spotify_data.shape

(6796, 20)

In [28]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_similar_song(song_searched, spotify_data, topsongs):
    """
    This function takes in a song name and a dataframe spotify_data, searches for the song in the
    Spotify database, creates clusters of the data, and recommends another song from the same cluster
    as the selected song. If the song is in topsongs, a random song from topsongs with the same artist is recommended.
    """
    
    # ------------------IF song in the input IS IN TOPSONGS ---------------------------
    if song_searched in topsongs['title'].values:
        # While song in input IS different from the recommended (output)
        while True:
            # Generating a random number to select another song from the list
            random_num = random.randint(0, len(topsongs)-1)
            # Picking the song 'title' with random_num index
            song_recommended_title = topsongs.iloc[random_num]['title']
            # Getting also the artist name 
            song_recommended_artist = topsongs.iloc[random_num]['artist']
            # If recommended song IS NOT the same as the searched one, stop the while
            if song_recommended_title != song_searched:
                break
        # Capitalizing first letters of 'title' (.title())
        song_recommended_title = song_recommended_title.title()
        # Joinning song and artist to display in the output
        song_recommended = f"{song_recommended_title} by {song_recommended_artist}"
        # Displaying output with both 'title' and 'artist' bigger and bolt style.
        display(Markdown(f"You should listen to: **{song_recommended}**!"))
    
    # ------------------If song in the input IS NOT IN TOPSONGS--------------------------
    else:
    
        # Song is not in topsongs, proceed with recommend_song function
        spotify_data = get_song_from_spotify(song_searched, spotify_data)

        X_features = spotify_data.select_dtypes(np.number)

        # Scaling Data
        scaler = StandardScaler()
        X_prep = scaler.fit_transform(X_features)

        # KMeans : 8 clusters
        kmeans = KMeans(n_clusters=8, random_state=1234)
        kmeans.fit(X_prep)

        # Preprocessing the song name for matching
        song_searched = song_searched.lower().strip()

        # Finding the index of the selected song:
        spotify_data['title'] = spotify_data['title'].str.lower().str.strip()
        song_index = spotify_data.index[spotify_data['title'] == song_searched]
    
        if len(song_index) == 0:
            print(f"No matching song found for {song_searched}.")
            return
    
        song_index = song_index[0]

        # Predicting cluster for the selected song:
        song_cluster = kmeans.predict(X_prep[song_index].reshape(1, -1))[0]

        # Selecting all songs from the same cluster:
        cluster_songs = spotify_data[kmeans.labels_ == song_cluster]

        # Calculating cosine similarity between selected song and all songs in cluster:
        similarity_scores = cosine_similarity(X_prep[song_index].reshape(1, -1), X_prep[kmeans.labels_ == song_cluster])

        # Finding index of the most similar song in the cluster:
        recommended_index = np.argmax(similarity_scores)
        recommended_song = cluster_songs.iloc[recommended_index]

        # Selecting a random song from the cluster if the most similar song is the searched song:
        if recommended_song['title'] == song_searched:
            cluster_size = len(cluster_songs)
            while True:
                random_num = random.randint(0, cluster_size-1)
                if random_num != recommended_index:
                    recommended_song = cluster_songs.iloc[random_num]
                    break

    # Capitalizing first letters of 'title' (.title())
    song_searched_title = song_searched.title()
    recommended_song_title = recommended_song['title'].title()

    # Joinning song and artist to display in the output
    song_searched_artist = spotify_data.iloc[song_index]['artist']
    recommended_song_artist = recommended_song['artist']
    song_searched_formatted = f"**{song_searched_title} by {song_searched_artist}**"
    recommended_song_formatted = f"**{recommended_song_title} by {recommended_song_artist}**"

    # Displaying output with both 'title' and 'artist' bigger and bolt style.
    display(Markdown(f"If you like {song_searched_formatted}, you should listen to {recommended_song_formatted}!"))



In [30]:
song_searched = input()
recommend_similar_song(song_searched, spotify_data,topsongs)

# Saving updated dataframe
spotify_data.to_csv('spotify_data.csv', index=False)
spotify_data

canço de laire
Multiple versions of the song 'canço de laire' were found:
1: Oques Grasses
2: Juli Garreta
3: Coses
4: Coses
5: Coses
Please enter the NUMBER of the version you are looking for: 1
No matching song found for canço de laire.


Unnamed: 0,title,artist,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,bohemian rhapsody - remastered 2011,Queen,spotify:track:7tFiyTwD0nx5a1eklYtX2J,0.392,0.402,0,-9.961,0,0.0536,0.28800,0.000000,0.2430,0.228,143.883,audio_features,7tFiyTwD0nx5a1eklYtX2J,https://api.spotify.com/v1/tracks/7tFiyTwD0nx5...,https://api.spotify.com/v1/audio-analysis/7tFi...,354320,4
1,roller coaster,Danny Vera,spotify:track:5B5YKjgne3TZzNpMsN9aj1,0.401,0.383,9,-10.048,1,0.0279,0.51000,0.007800,0.1210,0.285,96.957,audio_features,5B5YKjgne3TZzNpMsN9aj1,https://api.spotify.com/v1/tracks/5B5YKjgne3TZ...,https://api.spotify.com/v1/audio-analysis/5B5Y...,269986,4
2,hotel california - 2013 remaster,Eagles,spotify:track:40riOy7x9W7GXjyGp4pjAv,0.579,0.508,2,-9.484,1,0.0270,0.00574,0.000494,0.0575,0.609,147.125,audio_features,40riOy7x9W7GXjyGp4pjAv,https://api.spotify.com/v1/tracks/40riOy7x9W7G...,https://api.spotify.com/v1/audio-analysis/40ri...,391376,4
3,piano man,Billy Joel,spotify:track:3FCto7hnn1shUyZL42YgfO,0.334,0.472,0,-8.791,1,0.0277,0.60000,0.000004,0.3170,0.431,179.173,audio_features,3FCto7hnn1shUyZL42YgfO,https://api.spotify.com/v1/tracks/3FCto7hnn1sh...,https://api.spotify.com/v1/audio-analysis/3FCt...,336093,3
4,fix you,Coldplay,spotify:track:7LVHVU3tWfcxj5aiPFEW4Q,0.209,0.417,3,-8.740,1,0.0338,0.16400,0.001960,0.1130,0.124,138.178,audio_features,7LVHVU3tWfcxj5aiPFEW4Q,https://api.spotify.com/v1/tracks/7LVHVU3tWfcx...,https://api.spotify.com/v1/audio-analysis/7LVH...,295533,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6791,Cara de cul,Oques Grasses,spotify:track:2FFrYTZjyby3j0xaEn5RNZ,0.652,0.825,11,-5.705,1,0.0584,0.00853,0.000094,0.1140,0.330,133.009,audio_features,2FFrYTZjyby3j0xaEn5RNZ,https://api.spotify.com/v1/tracks/2FFrYTZjyby3...,https://api.spotify.com/v1/audio-analysis/2FFr...,246213,4
6792,Escopinya,Oques Grasses,spotify:track:7apIPSn5fCcb8QbH5fv3dd,0.754,0.765,0,-5.692,1,0.0837,0.03600,0.000002,0.0969,0.822,141.007,audio_features,7apIPSn5fCcb8QbH5fv3dd,https://api.spotify.com/v1/tracks/7apIPSn5fCcb...,https://api.spotify.com/v1/audio-analysis/7apI...,175467,4
6793,Lakilove,Oques Grasses,spotify:track:6dHdYZI2KDrq6FJEqCPs0k,0.825,0.818,7,-5.626,1,0.1530,0.18300,0.000056,0.0544,0.783,102.003,audio_features,6dHdYZI2KDrq6FJEqCPs0k,https://api.spotify.com/v1/tracks/6dHdYZI2KDrq...,https://api.spotify.com/v1/audio-analysis/6dHd...,171320,4
6794,La Gent Que Estimo,Oques Grasses,spotify:track:2aTvlIMGoo2L9x5YzJNjj5,0.348,0.239,10,-13.241,1,0.0428,0.88700,0.000048,0.2260,0.145,104.716,audio_features,2aTvlIMGoo2L9x5YzJNjj5,https://api.spotify.com/v1/tracks/2aTvlIMGoo2L...,https://api.spotify.com/v1/audio-analysis/2aTv...,239760,4


In [27]:
spotify_data.shape

(6796, 20)

In [None]:
def get_song_cluster(song_searched, spotify_data):
    
    # All NUMERICAL columns AFTER 'uri' are the features to be taken
    features = spotify_data.select_dtypes(np.number)
    #features = features.drop(['Unnamed: 0'], axis = 1)
    
    # Select X, but no target yet
    X = features

    # Scaling Data
    X_prep = StandardScaler().fit_transform(X)

    # Creating 8 clusters/segments (Kmeans)
    kmeans = KMeans(n_clusters=8, random_state=1234)
    kmeans.fit(X_prep)
    
    # Predicting / assigning the clusters:
    clusters = kmeans.predict(X_prep)
    
    # Converting both searched song and 'title' in spotify_data to lower case
    song_searched = song_searched.lower() 
    spotify_data['title'] = spotify_data['title'].str.lower()

    # Checking if the input song is in the Spotify data
    if song_searched in spotify_data['title'].values:
        
        # Extracting the features for the input song 
        # (filtering through boolean mask and selecting first row of filtered)
        song_features = features.loc[spotify_data['title']==song_searched].iloc[0]
    
        # Predicting/assigning a cluster for song_searched:
    
        # Solving dimensional issue and need for array:   
        # Convert target_song to a numpy array and reshape to be a 2-dimensional array with one row
        song_features_arr = np.array(song_features).reshape(1, -1)
    
        # Using the previously fitted scaler to transform new data
        song_features_prep = scaler.transform(song_features_arr)

        # Predicting / assigning the cluster using the same Kmeans fitted before:
        song_cluster = kmeans.predict(song_features_prep)
    
        
        # Recommending the most similar song from that cluster:
        
        # Selecting all songs from the same cluster as song_searched (song_cluster)
        songs_in_cluster = spotify_data[kmeans.labels_ == song_cluster[0]]
        
                ## Note(I)
    
        # Comparing song_searched with songs in same cluster using cosine_similarity
        from sklearn.metrics.pairwise import cosine_similarity
        
                ## Note (II)
    
        # Calculating cosine similarity between searched song and songs in cluster
        similarities = cosine_similarity(song_features_prep, songs_in_cluster.select_dtypes(np.number))
    
        # Finding the most similar song in the cluster
        most_similar_song_idx = np.argmax(similarities)
        
                ## Note (III)
    
        # Retrieving the most similar song from the cluster
        most_similar_song = songs_in_cluster.iloc[most_similar_song_idx]['title']
    
    
        # Recommending another random song from that cluster:
        
        # Selecting a random song from that cluster
        random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

        # Check random_song and most_similar_sonog are not the same. If so, repeat random choice.
        while spotify_data.iloc[random_song_idx]['title'] == most_similar_song:
            random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

        # Getting title of random song
        random_song = spotify_data.iloc[random_song_idx]['title']

    
        display(Markdown(f"The song '**{song_searched.capitalize()}**' belongs to cluster '**{song_cluster[0]}**'.\n\n"
                  f"The most similar song in the cluster is '**{most_similar_song.capitalize()}**'.\n\n"
                  f"Here's also a random song from the same cluster: '**{random_song.capitalize()}**'."))

 
    
    else:
    
        return f"The song '**{song_searched}**' is not in our data."


In [None]:
song_searched = input()
get_song_from_spotify(song_searched, spotify_data)

In [None]:
 # List of items in track
    tracks = []
    
    # While there is a next page of results in the playlist
    while playlist:
        # For each item in playlist:
        for item in playlist['items']:
            # Look for uri and save id and get values in features
            song_uri = item['track']['uri']
            features = sp.audio_features(song_uri)[0]
            
            # New dict retrieving values of 'title', 'artist' and 'uri' (id)
            track = {
                'title': item['track']['name'],
                'artist': item['track']['artists'][0]['name'],
                'uri': song_uri,
            }
            
            # For each feature (song uri) add in track dict
            # new key (column name) and the value
            for feature_key, feature_value in features.items():
                track[feature_key] = feature_value
                
            # Append each item in track
            tracks.append(track)
        
        # Get the next page of results
        playlist = sp.next(playlist)
        
        # Sleep for a random time between 1 and 3 seconds to avoid triggering rate limits
        sleep_time = random.randint(1, 3)
        time.sleep(sleep_time)
    
    # Return the results as a DataFrame
    return pd.DataFrame(tracks)

In [None]:
topsongs.columns

In [None]:
def features(track,artist):
    track_id = sp.search(q='artist' + artist + ' track' + track, type= 'track')
    uri = track_id['tracks']['items'][0]['i']
    features = sp.audio_features[uri]
    return features

In [None]:
new_song = input('Enter song: ').lower()
new_artist = input("Enter artist: ").lower()
print('\n')

#check if in topsongs
if new_song in list(topsongs['title']):
    recommend_hot = random.choice(list(topsongs['title']))
    while recommend_hot == new_song:
        recommend_hot = random.choice(list(topsongs['title']))
        print('Listen to: \n', recommendation_hot)
        
# Get spotify features
else:
    feature = features(new_song,new_artist)
    column = list(feature[0].keys())
    values = [list(feature[0].values())]
    df_new_song = pd.DataFrame(data=feature, columns = column)
    df_new_song = df_new_song.drop(['type','id','uri','track_href', 'analysis_url','time_signature'],axis=1)
    std_new_song = scaler.transform(df_new_song)
    new_cluster = kmeans.predict(std_new_song)
    df_cluster = list_clusters[list_cluster['cluster']==list(new_cluster)[0]]
    
    print('Listen to: \n', random.choice(list(df_cluster['song_and_artist'])))

In [None]:
# Not in spotify function

In [None]:
def check_song_in_spotify (song_searched):

    # Search for the song using the Spotify API
    results = sp.search(q='track:' + song_searched, type='track')

    # Check if there are multiple versions of the same song
    if results['tracks']['total'] > 1:
        print('Multiple versions of the song were found. Please specify the artist:')
        # Print all versions and the artist who played them
        for track in results['tracks']['items']:
            print(f"Track name: {track['name']}\nArtist name: {track['artists'][0]['name']}\n")

        # Ask to input just one of the artists
        artist_name = input('Enter the name of the artist: ')
        for track in results['tracks']['items']:
            if track['artists'][0]['name'].lower() == artist_name.lower():
                song_id = track['id']
                break
        else:
            print('No matching artist found. Exiting.')
            exit()
    elif results['tracks']['total'] == 1:
        song_id = results['tracks']['items'][0]['id']
    else:
        print('No results found for', song_title)
        exit()

    # Get audio features for the selected song
    song_features = sp.audio_features(song_id)[0]

    return song_features

In [None]:
song_searched = input()

song_features =check_song_in_spotify (song_searched)

In [None]:
type(song_features)

In [None]:
song_features

In [None]:
tracks = []
track = {}

for key, value in song_features.items():
    track[key] = value

tracks.append(track)

tracks = pd.DataFrame(tracks)



tracks

tracks = tracks.select_dtypes(np.number)

tracks.shape

scaled_tracks = scaler.transform(tracks)
scaled_tracks

track_cluster = kmeans.predict(scaled_tracks)
track_cluster

In [None]:
def check_song_in_spotify (song_searched):

    # Search for the song using the Spotify API
    results = sp.search(q='track:' + song_searched, type='track')

    # Check if there are multiple versions of the same song
    if results['tracks']['total'] > 1:
        print('Multiple versions of the song were found. Please specify the artist:')
        # Print all versions and the artist who played them
        for track in results['tracks']['items']:
            print(f"Track name: {track['name']}\nArtist name: {track['artists'][0]['name']}\n")

        # Ask to input just one of the artists
        artist_name = input('Enter the name of the artist: ')
        for track in results['tracks']['items']:
            if track['artists'][0]['name'].lower() == artist_name.lower():
                song_id = track['id']
                break
        else:
            print('No matching artist found. Exiting.')
            exit()
    elif results['tracks']['total'] == 1:
        song_id = results['tracks']['items'][0]['id']
    else:
        print('No results found for', song_title)
        exit()

    # Get audio features for the selected song
    song_features = sp.audio_features(song_id)[0]

    return song_features

In [None]:
song_searched = input()

check_song_in_spotify (song_searched)

In [None]:
def song_recommender(topsongs, spotify_data, song_searched):
    
    # Converting both input value and 'title' in lower case
    song_searched = song_searched.lower() 
    topsongs['title'] = topsongs['title'].str.lower()
    
    
    # ------------------ IF song in the input IS IN TOPSONGS -------------------------
    # ---------------- recommend random'hot' song fro topsong -----------------------
    
    if song_searched in topsongs['title'].values:
        # While song in input IS different from the recommended (output)
        while True:
            # Generating a random number to select another song from the list
            random_num = random.randint(0, len(topsongs)-1)
            # Picking the song 'title' with random_num index
            song_recommended_title = topsongs.iloc[random_num]['title']
            # Getting also the artist name 
            song_recommended_artist = topsongs.iloc[random_num]['artist']
            # If recommended song IS NOT the same as the searched one, stop the while
            if song_recommended_title != song_searched:
                break
        # Capitalizing first letters of 'title' (.title())
        song_recommended_title = song_recommended_title.title()
        # Joinning song and artist to display in the output
        song_recommended = f"{song_recommended_title} by {song_recommended_artist}"
        # Displaying output with both 'title' and 'artist' bigger and bolt style.
        display(Markdown(f"You should listen to: **{song_recommended}**!"))
    
    # ---------------- IF song in the input IS NOT IN TOPSONGS -----------------------
    else:
    
    # ------------------- Create CLUSTERS from SPOTIFY_DATA --------------------------
        
        # All numerical columns after 'uri' are the features to be taken
        features = spotify_data.select_dtypes(np.number)
        # Select X, but no target yet
        X = features
        # Scaling Data
        X_prep = StandardScaler().fit_transform(X)
        # Creating 8 clusters/segments (Kmeans)
        kmeans = KMeans(n_clusters=8, random_state=1234)
        kmeans.fit(X_prep)
        # Defining clusters:
        clusters = kmeans.predict(X_prep)
    
    # ---------------- IF song in the input IS IN SPOTIFY_DATA -----------------------
    
        # Converting both searched song and 'title' in spotify_data to lower case
        #song_searched = song_searched.lower()
        spotify_data['title'] = spotify_data['title'].str.lower()

        # Checking if the input song IS NOT IN the Spotify data
        if song_searched not in spotify_data['title'].values:
             # Selecting the features for the input song 
            # (filtering through boolean mask and selecting first row of filtered)
            song_features = features.loc[spotify_data['title']==song_searched].iloc[0]
            
            # Convert target_song to a numpy array and reshape to be a 2-dimensional array with one row
            song_features_arr = np.array(song_features).reshape(1, -1)
    
            # Scaling target_song values using the same X_prep scaler used for X data
            song_features_prep = StandardScaler().fit(X_prep).transform(song_features_arr)
    
            # Assigning the cluster using the same Kmeans fitted before:
            song_cluster = kmeans.predict(song_features_prep)
            
            
            # Recommending the most similar song from that cluster: 
        
            # Selecting all songs from the same cluster as song_searched (song_cluster)
            songs_in_cluster = spotify_data[kmeans.labels_ == song_cluster[0]]       ## Note(I)
    
            # Comparing song_searched with songs in same cluster using cosine_similarity
            from sklearn.metrics.pairwise import cosine_similarity                 ## Note (II)
    
            # Calculating cosine similarity between searched song and songs in cluster
            similarities = cosine_similarity(song_features_prep, songs_in_cluster.select_dtypes(np.number))
    
            # Finding the most similar song in the cluster
            most_similar_song_idx = np.argmax(similarities)                      ## Note (III)
    
            # Retrieving the most similar song from the cluster
            most_similar_song = songs_in_cluster.iloc[most_similar_song_idx]['title']
    
    
            # Recommending a random song from that cluster: 
        
            # Selecting a random song from that cluster
            random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

            # Check random_song and most_similar_sonog are not the same. If so, repeat random choice.
            while spotify_data.iloc[random_song_idx]['title'] == most_similar_song:
                random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

                # Getting title of random song
            random_song = spotify_data.iloc[random_song_idx]['title']


            display(Markdown(f"The song '**{song_searched.capitalize()}**' belongs to cluster '**{song_cluster[0]}**'.\n\n"
          f"The most similar song in the cluster is '**{most_similar_song.capitalize()}**'.\n\n"
          f"Here's a random song from the same cluster: '**{random_song.capitalize()}**'.")

        #else:
        #    return f"The song '**{song_searched}**' is not in our data."


In [None]:
song_searched = input("Enter a song: ")
song_recommender(topsongs, spotify_data, song_searched)

In [None]:
def song_recommender(topsongs, spotify_data, song_searched):
    
    # Converting both input value and 'title' in lower case
    song_searched = song_searched.lower() 
    topsongs['title'] = topsongs['title'].str.lower()
    
    # ------------------ IF song in the input IS IN TOPSONGS -------------------------
    
    # ------------------ If song in the input IS NOT IN TOPSONGS----------------------------
    
    else:
        
        # Defining clusters for audio features of spotify_data
        
        # ------------------ And song IS NOT IN SPOTIFY_DATA ------------------------------
        
        # Converting both searched song and 'title' in spotify_data to lower case
        song_searched = song_searched.lower()
        spotify_data['title'] = spotify_data['title'].str.lower()

        # Checking if the input song IS NOT IN the Spotify data
        if song_searched not in spotify_data['title'].values:
            
            # Searching specific song in Spotify
            new_song = sp.search(q='track'+ song_searched, type='track')
            
            # Checking if there are multiple versions of the same song, if so --> artist
            if new_song['tracks']['total'] > 1:
                print('Multiple versions of the song were found. Please specify the artist: \n')
                # Print list of versions and artist who played them
                for track in new_song['tracks']['items']:
                    print(f"Track name: {track['name']}\nArtist name: {track['artists'][0]['name']}\n")
                    
                # Ask to input just one of the artists/bands
                artist_name = input('Enter the name of the artist: ')
                
                # Checking which artist was chosen
                for track in new_song['tracks']['items']:
                    if track['artists'][0]['name'].lower() == artist_name.lower():
                        # ID of the song played by the artist selected
                        song_id = track['id']
                        break
                    else:
                        print('No matching artist found. Exiting.')
                        exit()
           
            # If there is just one version of that song
            elif new_song['tracks']['total'] == 1:
                song_id = new_song['tracks']['items'][0]['id']
            
            # If there is no coincidence at all
            else:
                print("No results found for ", song_title)
                exit()
                
            #Getting audio features for that song_id
            song_features = sp.audio_features(song_id)[0]
            
            # Selecting just numerical
            #song_features = audio_features.select_dtypes(np.number)
            
        
        # ------------------ but if song IS IN SPOTIFY_DATA ------------------------------ 
        
        else:
            # Extracting the features for the input song 
            # (filtering through boolean mask and selecting first row of filtered)
            song_features = features.loc[spotify_data['title']==song_searched].iloc[0]
           
    
    
        # Predicting/assigning a cluster for song_searched: --------------------------------
    
        # Solving dimensional issue and need for array:   
        
        # Exclude 'audio_features' key from song_features dictionary
        song_features_clean = {k: v for k, v in song_features.items() if k != 'audio_features'}

        # Convert target_song to a numpy array and reshape to be a 2-dimensional array with one row
        song_features_arr = np.array(list(song_features_clean.values())).reshape(1, -1)

        # Transforming target_song values using the same X_prep scaler used for X data
        song_features_prep = scaler.transform(song_features_arr)

        # Predicting / assigning the cluster using the same Kmeans fitted before:
        song_cluster = kmeans.predict(song_features_prep)
        
        
        
        
        # Convert target_song to a numpy array and reshape to be a 2-dimensional array with one row
        #song_features_arr = np.array(song_features).reshape(1, -1)
    
        # Scaling target_song values using the same X_prep scaler used for X data
        #song_features_prep = StandardScaler().fit(X_prep).transform(song_features_arr)
    
        # Predicting / assigning the cluster using the same Kmeans fitted before:
        #song_cluster = kmeans.predict(song_features_prep)
    
        
        # Recommending the most similar song from that cluster: -----------------------------
        
        # Selecting all songs from the same cluster as song_searched (song_cluster)
        songs_in_cluster = spotify_data[kmeans.labels_ == song_cluster[0]]
        
                    ## Note(I)
    
        # Comparing song_searched with songs in same cluster using cosine_similarity
        from sklearn.metrics.pairwise import cosine_similarity
        
                    ## Note (II)
    
        # Calculating cosine similarity between searched song and songs in cluster
        similarities = cosine_similarity(song_features_prep, songs_in_cluster.select_dtypes(np.number))
    
        # Finding the most similar song in the cluster
        most_similar_song_idx = np.argmax(similarities)
        
                    ## Note (III)
    
        # Retrieving the most similar song from the cluster
        most_similar_song = songs_in_cluster.iloc[most_similar_song_idx]['title']
    
    
        # Recommending another random song from that cluster: -------------------------------
        
        # Selecting a random song from that cluster
        random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

        # Check random_song and most_similar_sonog are not the same. If so, repeat random choice.
        while spotify_data.iloc[random_song_idx]['title'] == most_similar_song:
            random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])

            # Getting title of random song
            random_song = spotify_data.iloc[random_song_idx]['title']

    
        display(Markdown(f"The song '**{song_searched.capitalize()}**' belongs to cluster '**{song_cluster[0]}**'.\n\n"
                  f"The most similar song in the cluster is '**{most_similar_song.capitalize()}**'.\n\n"))
                 # f"Here's a random song from the same cluster: '**{random_song.capitalize()}**'."))

 
    
        #else:
    
            #return f"The song '**{song_searched}**' is not in our data."

In [None]:
song_searched = input("Enter a song: ")
song_recommender(topsongs, spotify_data, song_searched)