In [None]:
# 1. import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from IPython.display import IFrame
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [181]:
df = pd.read_csv("billboard100_data.csv") # data of top 100 songs
df['title'] = df['title'].map(str.lower) # converting the entries to lower
df['author'] = df['author'].map(str.lower) # converting the entries to lower

x = input('Enter a song you like: ')
x_artist = df.loc[df['title'].map(str.lower)  == x.lower(), 'author'] # artist corresponding to song x from billboard100_data.csv

results = sp.search(q=x,limit=3) # gives the deatils of the inputed song x
track_id=results["tracks"]["items"][0]["id"] # track_id of the song x
display(IFrame(src="https://open.spotify.com/embed/track/"+track_id,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",))  

### When the inputed song belong to the top100 songs, i.e., in df 
if x.lower() in df.values:   # converts the entered x into all lower case. When x is in the billboard100
        rec_song = df['title'].sample().to_string(index=False) # recommendation of song. Removed the indices as they are not required 
        rec_song_results = sp.search(q=rec_song,limit=3) # gives the deatils of rec_song
        track_id_rec=rec_song_results["tracks"]["items"][0]["id"] # track_id of the song rec_song 
        rec_artist = df.loc[df['title'] == rec_song, 'author'].to_string(index=False) # artist of the recommended song
        print("You might like this song: " + rec_song.title() + " by " + rec_artist.title())  # .title() makes the first alphabet in every word to upper   
        display(IFrame(src="https://open.spotify.com/embed/track/"+track_id_rec,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",)) 
### When the inputed song does not belong to top100, we give a suggestion from out database, i.e., df_spotify
else:   
    df_spotify = pd.read_csv("spotify_data.csv") # collected data of songs from spotify (unclustered)
    X = df_spotify.drop(['id', 'duration_ms'], axis=1) # spotify data without id and duration (because X needs to be numerical)
    scaler = StandardScaler() # for scaling of X
    scaler_fitted = scaler.fit(X) # we need scaler_fitted for transforming/scaling the song which is inputed, i.e., x 
    X_scaled = scaler_fitted.transform(X)
    X_scaled_df = pd.DataFrame(X_scaled, columns = X.columns) # scaled the numerical feature data X
    kmeans = KMeans(n_clusters=4, random_state=1234) # number of clusters is 4
    kmeans.fit(X_scaled_df.values) # for prediction of cluster of the inputed song (x), we need kmean fitted to X_scaled (our data base)
    
    ### from inputed song to extracting features and scaling
    x_af = sp.audio_features(track_id) # extracting audio features, i.e., danceability, tempo etc
    X_af = pd.DataFrame(x_af) # audio features (af) as a dataframe
    X_af_num = X_af.drop(['key', 'mode', 'type', 'id', 'uri', 'track_href', 
                          'analysis_url', 'duration_ms', 'time_signature'], axis=1) # droping unwanted columns
    X_af_num_scaled = scaler_fitted.transform(X_af_num) # scaled the inputed song to the parameters from scaler_fitted 
    
    ### predicting the song cluster
    song_cluster = kmeans.predict(X_af_num_scaled) # cluster of the inputed song based on kmeans fitted to our database X_scaled_df
    
    ### checking in which cluster the inputed song belongs
    df_cluster = pd.read_csv("clustered_data.csv") # data of clustered (4) songs from spotify 
    if song_cluster == 0:  
        mask = df_cluster['cluster'] == 0
        selected_rows = df_cluster[mask]
        random_row = selected_rows.sample()
        track_id_rand = random_row['id'].to_string(index=False)
        track = sp.track(track_id_rand)
        print('You might like this song: ' + track["name"]+ ' by ' + track["album"]["artists"][0]["name"]) 
        display(IFrame(src="https://open.spotify.com/embed/track/"+track_id_rand,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",))  
    elif song_cluster == 1:    
        mask = df_cluster['cluster'] == 1
        selected_rows = df_cluster[mask]
        random_row = selected_rows.sample()
        track_id_rand = random_row['id'].to_string(index=False)
        track = sp.track(track_id_rand)
        print('You might like this song: ' + track["name"]+ ' by ' + track["album"]["artists"][0]["name"]) 
        display(IFrame(src="https://open.spotify.com/embed/track/"+track_id_rand,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",))  
    elif song_cluster == 2:
        mask = df_cluster['cluster'] == 2
        selected_rows = df_cluster[mask]
        random_row = selected_rows.sample()
        track_id_rand = random_row['id'].to_string(index=False)
        track = sp.track(track_id_rand)
        print('You might like this song: ' + track["name"]+ ' by ' + track["album"]["artists"][0]["name"]) 
        display(IFrame(src="https://open.spotify.com/embed/track/"+track_id_rand,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",))  
    else:
        mask = df_cluster['cluster'] == 3
        selected_rows = df_cluster[mask]
        random_row = selected_rows.sample()
        track_id_rand = random_row['id'].to_string(index=False) 
        track = sp.track(track_id_rand)
        print('You might like this song: ' + track["name"] + ' by ' + track["album"]["artists"][0]["name"])
        display(IFrame(src="https://open.spotify.com/embed/track/"+track_id_rand,
               width="320",
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media",))  

Enter a song you like: flowerS


You might like this song: Do It Again by Nle Choppa & 2Rare
