In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time
import json
from pathlib import Path
from flatten_json import flatten
from urllib.error import HTTPError
import time, sys
from IPython.display import clear_output
from sklearn.neighbors import NearestNeighbors

In [2]:
client_id = '#################'
client_secret = '####################'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
def getTrackIDs(user, playlist_id):
    ids = []
    results = sp.user_playlist(user, playlist_id, fields="tracks,next")
    tracks = results['tracks']
    for i in range(0, len(tracks['items'])):
        ids.append(tracks['items'][i]['track']['id'])

    while tracks['next']:
        tracks = sp.next(tracks)
    
    for i in range(0, len(tracks['items'])):
        ids.append(tracks['items'][i]['track']['id'])
        
    return ids
        
user_ids = getTrackIDs('ethan8chang', '0mbobnEpYbg1ONLafaWWR3')

In [4]:
def getTrackFeatures(id):
  meta = sp.track(id)
  features = sp.audio_features(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']
  length = meta['duration_ms']
  popularity = meta['popularity']

  # features
  acousticness = features[0]['acousticness']
  danceability = features[0]['danceability']
  energy = features[0]['energy']
  instrumentalness = features[0]['instrumentalness']
  liveness = features[0]['liveness']
  loudness = features[0]['loudness']
  speechiness = features[0]['speechiness']
  tempo = features[0]['tempo']
  time_signature = features[0]['time_signature']
  key = features[0]['key']
  mode = features[0]['mode']
  valence = features[0]['valence']

  track = [name, album, artist, release_date, length, popularity, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature]
  return track

In [5]:
def getGenres(song_features): 
    genres = []
    for i in range (0, len(song_features)):
        result = sp.search(song_features['artist'][i])
        track = result['tracks']['items'][0]

        artist = sp.artist(track["artists"][0]["external_urls"]["spotify"])
        genres.append(artist['genres'])
        
    return genres

In [6]:
def createFullPlaylistDf(track_features):
    all_tracks = []
    for i in range(len(track_features)):
      track = getTrackFeatures(track_features[i])
      all_tracks.append(track)
    
    # create dataset
    df = pd.DataFrame(all_tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])
    
    genres = getGenres(df)
    
    df['genres'] = genres
    
    return df

In [7]:
user_playlist = createFullPlaylistDf(user_ids)
user_playlist

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
0,Syphilis,Death Race For Love,Juice WRLD,2019-03-08,131761,65,0.782,0.659,1,-3.103,1,0.2480,0.06960,0.000000,0.1680,0.336,148.030,4,"[chicago rap, melodic rap]"
1,God's Warrior,God's Warrior,Tee Grizzley,2019-04-18,167704,42,0.803,0.689,1,-5.318,0,0.4030,0.21500,0.000000,0.2830,0.684,157.517,4,"[detroit hip hop, detroit trap, hip hop, rap, ..."
2,YuNg BrAtZ,Revenge,XXXTENTACION,2017-05-16,101851,74,0.755,0.695,9,-5.963,1,0.2600,0.01790,0.000000,0.1070,0.493,128.063,4,"[emo rap, miami hip hop]"
3,Shots,We Love You Tecca,Lil Tecca,2019-08-30,109373,0,0.829,0.582,7,-5.721,0,0.0415,0.52500,0.000000,0.0759,0.510,108.028,4,"[melodic rap, rap, trap]"
4,Carbonated Water,Carbonated Water,Ski Mask The Slump God,2019-10-31,141040,63,0.938,0.559,10,-6.734,1,0.3120,0.05460,0.000000,0.1570,0.760,119.901,4,"[miami hip hop, underground hip hop]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,No Chains,No Chains,KB,2018-04-18,222651,0,0.676,0.815,1,-3.427,1,0.2780,0.01520,0.000004,0.1070,0.591,162.054,4,"[christian hip hop, christian trap]"
112,Chanel Bag,Chanel Bag,KillBunk,2020-10-31,158667,72,0.773,0.614,11,-5.490,0,0.0776,0.06780,0.000000,0.0873,0.700,115.015,4,[sad rap]
113,Do You Feel Me,DC Fridays,DC The Don,2019-12-20,190824,30,0.732,0.543,11,-10.747,1,0.2400,0.00849,0.000194,0.1080,0.523,155.924,4,"[melodic rap, meme rap, vapor trap]"
114,Daij Is Drunk,Daij Is Drunk,DC The Don,2021-05-07,134011,49,0.767,0.508,9,-7.850,1,0.3480,0.13500,0.000000,0.1190,0.452,159.806,4,"[melodic rap, meme rap, vapor trap]"


In [8]:
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [9]:
def getTrackDF(ids):
    # loop over track ids 
    tracks = []
    for i in range(len(ids)):
        try:
            track = getTrackFeatures(ids[i])
            tracks.append(track)
        except HTTPError as err:
            print(err.code)
        update_progress(i/len(ids))

    # create dataset
    df = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])
    return df

In [10]:
def getDatasetIDs(playlist_df):
    ids = []
    for i in range (0, playlist_df['track_uri'].count()-1):
        uri = playlist_df['track_uri'][i]
        shortened = uri[14:]
        ids.append(shortened)
    return ids

In [11]:
def import_json_playlist_x(local_path, x):
    p = Path(local_path)
    with p.open('r', encoding='utf-8') as f:
        data = json.loads(f.read())
    tracks_data = data['playlists']
    
    #make loop so that it loops through all playlists in the one instance
    first_playlist_tracks = tracks_data[0]['tracks']
    total_tracks_df = pd.DataFrame(first_playlist_tracks)
    

    for i in range (1, x):
        a_playlist_tracks = tracks_data[i]['tracks']
        a_playlist_df  = pd.DataFrame(a_playlist_tracks)
    
        
        total_tracks_df = total_tracks_df.append(a_playlist_df, ignore_index = True)

        
    total_tracks_df_ids = getDatasetIDs(total_tracks_df)
    total_tracks_df_ids_features = getTrackDF(total_tracks_df_ids)
    
    
    
    return total_tracks_df_ids_features


In [12]:
def add_genres(total_tracks_df_ids_features):
    genres = []
    for i in range (0, len(total_tracks_df_ids_features)):
        result = sp.search(total_tracks_df_ids_features['artist'][i])
        track = result['tracks']['items'][0]

        artist = sp.artist(track["artists"][0]["external_urls"]["spotify"])
        
        genres.append(artist['genres'])
        
        update_progress(i/len(total_tracks_df_ids_features))
        
    return genres


In [13]:
song_dataset = import_json_playlist_x(r'C:\Users\ecusa\OneDrive\Desktop\MPSD\mpd.slice.3000-3999.json', 5)

Progress: [####################] 99.7%


In [14]:
genres = add_genres(song_dataset)
genres

Progress: [####################] 99.7%


[['belgian edm', 'edm', 'pop', 'pop dance', 'tropical house'],
 ['neo soul', 'pop', 'pop soul', 'r&b', 'urban contemporary'],
 ['deep tropical house', 'gauze pop'],
 ['big room',
  'brostep',
  'dance pop',
  'dutch edm',
  'edm',
  'house',
  'pop',
  'pop dance',
  'slap house',
  'trance',
  'tropical house'],
 ['alt z', 'australian pop', 'nz pop'],
 ['neo soul', 'r&b'],
 ['australian dance', 'melbourne bounce'],
 ['brazilian edm', 'complextro'],
 ['edm', 'pop', 'pop dance', 'tropical house'],
 ['contemporary country', 'country road', 'modern country rock'],
 ['german dance', 'slap house'],
 ['classic rock',
  'folk',
  'folk rock',
  'mellow gold',
  'rock',
  'roots rock',
  'singer-songwriter',
  'soft rock'],
 ['dance pop', 'edm', 'electropop', 'pop', 'pop dance', 'tropical house'],
 ['deep house',
  'deep tropical house',
  'electra',
  'house',
  'pop dance',
  'progressive house',
  'swiss pop',
  'tropical house'],
 ['alt z',
  'electropop',
  'indie cafe pop',
  'indie elec

In [15]:
print(song_dataset.shape)
print(len(genres))

(351, 18)
351


In [16]:
song_dataset['genres'] = genres

In [17]:
#filters out songs from dataset does not match genre of playlist
def getRelevantData(df1, df2):
    
    df1_genre_list = []
    for i in range(0, len(df1)):
        for j in range(0, len(df1['genres'][i])):
            specific_genre = df1['genres'][i][j]
            if (specific_genre in df1_genre_list):
                pass
            else:
                df1_genre_list.append(specific_genre)
                
    irrelevant_songs_ids = []
    for k in range(0, len(df2)):
        song_genres = []
        for m in range(0, len(df2['genres'][k])):
            specific_genre = df2['genres'][k][m]
            song_genres.append(specific_genre)
        
        appears = False
        for n in range(0, len(song_genres)):
            if(song_genres[n] in df1_genre_list):
                appears = True
            
        if appears == False:
            irrelevant_songs_ids.append(k)

    relevant_df2 = df2.drop(irrelevant_songs_ids)
    
    return relevant_df2

In [18]:
getRelevantData(user_playlist, song_dataset)

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
0,What Is Love 2016 - Mike Mago Remix,What Is Love 2016 (Remixes),Lost Frequencies,2016-12-09,209882,2,0.665,0.751,1,-5.687,0,0.0596,0.03840,0.000033,0.0573,0.445,124.052,4,"[belgian edm, edm, pop, pop dance, tropical ho..."
1,Love Me Now - Dave Audé Remix Radio Edit,Love Me Now (Remixes),John Legend,2016-12-01,219106,37,0.595,0.841,8,-4.240,1,0.0796,0.03220,0.000000,0.0847,0.761,124.973,4,"[neo soul, pop, pop soul, r&b, urban contempor..."
3,I Will Be Here (feat. Sneaky Sound System) - (...,Kaleidoscope Remixed,Tiësto,2010,237066,0,0.722,0.723,6,-5.188,0,0.0644,0.01620,0.000017,0.0709,0.522,127.831,4,"[big room, brostep, dance pop, dutch edm, edm,..."
5,I'll Take You - Solidisco Remix,I'll Take You (Remixes),MSTR ROGERS,2016-01-22,196742,16,0.725,0.806,9,-3.482,1,0.0331,0.00285,0.011900,0.0241,0.669,123.965,4,"[neo soul, r&b]"
8,Hold Me Up - AM2PM Remix,Hold Me Up (AM2PM Remix),Conrad Sewell,2015-08-21,327029,0,0.785,0.598,4,-9.151,0,0.0456,0.00698,0.044700,0.0533,0.859,123.016,4,"[edm, pop, pop dance, tropical house]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,Dem My Dogs,Subluxe,Healy,2017-06-09,248480,0,0.871,0.387,10,-9.535,1,0.5450,0.73000,0.000004,0.1110,0.368,59.964,4,"[alternative r&b, indie r&b, memphis hip hop, ..."
326,Too Bad,Too Bad - Single,IshDARR,2015-08-17,193071,40,0.625,0.726,2,-5.617,1,0.1460,0.09330,0.000000,0.2340,0.337,139.951,4,"[deep underground hip hop, milwaukee hip hop, ..."
343,Solid Wall of Sound,We got it from Here... Thank You 4 Your service,A Tribe Called Quest,2016-11-11,223346,45,0.670,0.684,9,-6.053,0,0.4010,0.31100,0.000274,0.1810,0.473,146.054,4,"[alternative hip hop, conscious hip hop, east ..."
344,Get Stupid,Ronald Dregan,Mac Dre,2004-07-20,242626,49,0.844,0.540,10,-6.100,0,0.3060,0.27900,0.000000,0.0823,0.654,91.053,4,"[cali rap, gangster rap, hyphy, oakland hip ho..."


In [19]:
#machine learning model
knn = NearestNeighbors(n_neighbors=10, metric='cosine')

In [20]:
ml_features_for_user_playlist = createFullPlaylistDf(user_ids)


In [21]:
ml_features_for_song_dataset = getRelevantData(user_playlist, song_dataset)

In [22]:
ml_features_for_user_playlist.drop(['name',  'album',  'artist', 'release_date', 'popularity', 'genres'], axis=1, inplace=True)

In [23]:
ml_features_for_user_playlist

Unnamed: 0,length,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,131761,0.782,0.659,1,-3.103,1,0.2480,0.06960,0.000000,0.1680,0.336,148.030,4
1,167704,0.803,0.689,1,-5.318,0,0.4030,0.21500,0.000000,0.2830,0.684,157.517,4
2,101851,0.755,0.695,9,-5.963,1,0.2600,0.01790,0.000000,0.1070,0.493,128.063,4
3,109373,0.829,0.582,7,-5.721,0,0.0415,0.52500,0.000000,0.0759,0.510,108.028,4
4,141040,0.938,0.559,10,-6.734,1,0.3120,0.05460,0.000000,0.1570,0.760,119.901,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,222651,0.676,0.815,1,-3.427,1,0.2780,0.01520,0.000004,0.1070,0.591,162.054,4
112,158667,0.773,0.614,11,-5.490,0,0.0776,0.06780,0.000000,0.0873,0.700,115.015,4
113,190824,0.732,0.543,11,-10.747,1,0.2400,0.00849,0.000194,0.1080,0.523,155.924,4
114,134011,0.767,0.508,9,-7.850,1,0.3480,0.13500,0.000000,0.1190,0.452,159.806,4


In [24]:
ml_features_for_user_playlist.shape

(116, 13)

In [25]:
ml_features_for_song_dataset.drop(['name',  'album',  'artist', 'release_date', 'popularity', 'genres'], axis=1, inplace=True)

In [26]:
ml_features_for_song_dataset

Unnamed: 0,length,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,209882,0.665,0.751,1,-5.687,0,0.0596,0.03840,0.000033,0.0573,0.445,124.052,4
1,219106,0.595,0.841,8,-4.240,1,0.0796,0.03220,0.000000,0.0847,0.761,124.973,4
3,237066,0.722,0.723,6,-5.188,0,0.0644,0.01620,0.000017,0.0709,0.522,127.831,4
5,196742,0.725,0.806,9,-3.482,1,0.0331,0.00285,0.011900,0.0241,0.669,123.965,4
8,327029,0.785,0.598,4,-9.151,0,0.0456,0.00698,0.044700,0.0533,0.859,123.016,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,248480,0.871,0.387,10,-9.535,1,0.5450,0.73000,0.000004,0.1110,0.368,59.964,4
326,193071,0.625,0.726,2,-5.617,1,0.1460,0.09330,0.000000,0.2340,0.337,139.951,4
343,223346,0.670,0.684,9,-6.053,0,0.4010,0.31100,0.000274,0.1810,0.473,146.054,4
344,242626,0.844,0.540,10,-6.100,0,0.3060,0.27900,0.000000,0.0823,0.654,91.053,4


In [94]:
def recommendationList(user_playlist, song_dataset):
    knn_model = knn.fit(song_dataset)
    
    recommendation_frequency = []
    for i in range(0, len(song_dataset)):
        recommendation_frequency.append(0)
        
    for j in range(0, len(user_playlist)):
        distances, indices = knn_model.kneighbors([user_playlist.iloc[j]])
        for k in range(0, len(indices[0])):
            recommendation_frequency[indices[0][k]] += 1

    return recommendation_frequency

In [28]:
#example of indices
knn_model = knn.fit(ml_features_for_song_dataset)
distances, indices = knn_model.kneighbors([ml_features_for_user_playlist.iloc[2]])
distances, indices

(array([[1.54029495e-08, 1.78547714e-08, 2.39049378e-08, 2.39512953e-08,
         3.35899232e-08, 3.81700629e-08, 3.93364227e-08, 8.08306906e-08,
         8.21051127e-08, 9.05314598e-08]]),
 array([[ 19, 131,  39,  76,  18,  14, 128,  54,  38,  93]], dtype=int64))

In [29]:
test = recommendationList(ml_features_for_user_playlist, ml_features_for_song_dataset)

In [30]:
frequencies_of_recs = recommendationList(ml_features_for_user_playlist, ml_features_for_song_dataset)

def indicesOfMax(frequency_list):

    max_indices = []
    
    for i in range(0, 10):  
        frequency_max = frequency_list[0]
        frequency_max_index = 0

        for j in range(0, len(frequency_list)):
            if (frequency_list[j] > frequency_max):
                frequency_max = frequency_list[j]
                frequency_max_index = j
        
        print([frequency_max_index, frequency_max])
    
        max_indices.append(frequency_max_index)
        frequency_list.pop(frequency_max_index)    
    
    return max_indices


In [31]:
sorted_indices = indicesOfMax(test)

[93, 47]
[54, 46]
[38, 45]
[125, 31]
[138, 30]
[14, 29]
[41, 29]
[48, 29]
[100, 29]
[100, 29]


In [32]:
song_dataset.iloc[sorted_indices]

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
93,Better Slow Down,Sketches,Conner Youngblood,2012-04-12,239294,9,0.786,0.364,0,-12.075,1,0.0413,0.377,0.0272,0.109,0.498,113.405,5,"[shimmer pop, shiver pop]"
54,Ends of the Earth,Lonesome Dreams,Lord Huron,2013-01-21,284146,0,0.555,0.584,4,-8.667,1,0.0301,0.274,0.0228,0.254,0.41,120.968,4,"[indie folk, indie pop, stomp and holler]"
38,Be Ok,I Am,Chrisette Michele,2007-01-01,222960,26,0.798,0.595,7,-6.39,1,0.167,0.692,0.0,0.121,0.306,99.984,4,"[hip pop, neo soul, quiet storm, r&b, urban co..."
125,Marvins Room,Take Care (Deluxe),Drake,2011-11-15,347226,73,0.492,0.26,9,-17.341,0,0.0921,0.646,0.00178,0.0705,0.312,111.519,5,"[canadian hip hop, canadian pop, hip hop, pop ..."
138,The Man Who Lives Forever,Lonesome Dreams,Lord Huron,2013-01-21,318813,0,0.629,0.726,4,-8.721,1,0.0277,0.468,0.00477,0.103,0.67,125.004,4,"[indie folk, indie pop, stomp and holler]"
14,I Love You Always Forever,The Valley,Betty Who,2017-03-24,223160,55,0.747,0.687,0,-6.102,1,0.0449,0.336,0.0,0.302,0.558,106.045,4,"[alt z, electropop, indie cafe pop, indie elec..."
41,Hope She Cheats On You (With A Basketball Player),Late Nights & Early Mornings,Marsha Ambrosius,2011-02-25,243653,35,0.501,0.666,6,-7.312,0,0.414,0.0596,0.0,0.123,0.392,163.925,4,"[conscious hip hop, gangster rap, hip hop, pop..."
48,Something Just Like This,Memories...Do Not Open,The Chainsmokers,2017-04-07,247160,83,0.617,0.635,11,-6.769,0,0.0317,0.0498,1.4e-05,0.164,0.446,103.019,4,"[dance pop, edm, electropop, pop, pop dance, t..."
100,Hungry Face,Les Revenants Soundtrack,Mogwai,2013-02-25,144693,0,0.579,0.607,7,-10.548,1,0.0282,0.794,0.926,0.0717,0.36,108.965,4,"[british post-rock, chamber psych, cosmic post..."
100,Hungry Face,Les Revenants Soundtrack,Mogwai,2013-02-25,144693,0,0.579,0.607,7,-10.548,1,0.0282,0.794,0.926,0.0717,0.36,108.965,4,"[british post-rock, chamber psych, cosmic post..."


In [33]:
#example of algorithm run on one song
song_dataset.iloc[indices[0]]

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
19,Tyrone - Live,Live,Erykah Badu,1997-01-01,221866,53,0.464,0.598,1,-10.051,0,0.112,0.0241,2.6e-05,0.977,0.353,130.373,3,"[afrofuturism, hip hop, indie r&b, neo soul, r..."
131,Flax,Acid and Everything,Gem Club,2010-07-06,218254,14,0.237,0.191,7,-13.151,1,0.0416,0.972,0.0373,0.125,0.12,169.171,4,"[boston indie, chamber pop]"
39,Alone Together,Days & Nights,Daley,2014-01-01,239960,30,0.51,0.504,0,-8.357,0,0.118,0.157,0.0,0.123,0.286,89.192,4,[alternative hip hop]
76,The Stairs,Loma Vista,Family of the Year,2012,231493,0,0.48,0.837,3,-4.252,1,0.033,0.0455,0.00459,0.374,0.314,124.866,4,[stomp and holler]
18,Honey,New Amerykah Part One (4th World War) [UK-AU-N...,Erykah Badu,2008-03-03,320986,14,0.398,0.801,6,-7.858,0,0.34,0.0308,0.00372,0.221,0.491,87.942,4,"[afrofuturism, hip hop, indie r&b, neo soul, r..."
14,I Love You Always Forever,The Valley,Betty Who,2017-03-24,223160,55,0.747,0.687,0,-6.102,1,0.0449,0.336,0.0,0.302,0.558,106.045,4,"[alt z, electropop, indie cafe pop, indie elec..."
128,Stubborn Love,The Lumineers,The Lumineers,2012-01-01,277333,0,0.596,0.555,0,-11.689,1,0.0374,0.617,0.0395,0.275,0.235,115.019,4,"[folk-pop, modern rock, pop, pop rock, stomp a..."
54,Ends of the Earth,Lonesome Dreams,Lord Huron,2013-01-21,284146,0,0.555,0.584,4,-8.667,1,0.0301,0.274,0.0228,0.254,0.41,120.968,4,"[indie folk, indie pop, stomp and holler]"
38,Be Ok,I Am,Chrisette Michele,2007-01-01,222960,26,0.798,0.595,7,-6.39,1,0.167,0.692,0.0,0.121,0.306,99.984,4,"[hip pop, neo soul, quiet storm, r&b, urban co..."
93,Better Slow Down,Sketches,Conner Youngblood,2012-04-12,239294,9,0.786,0.364,0,-12.075,1,0.0413,0.377,0.0272,0.109,0.498,113.405,5,"[shimmer pop, shiver pop]"


In [34]:
long_set_of_songs = import_json_playlist_x(r'C:\Users\ecusa\OneDrive\Desktop\MPSD\999.json', 100)

Progress: [####################] 100.0%


In [35]:
long_set_of_songs

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Lose Control (feat. Ciara & Fat Man Scoop),The Cookbook,Missy Elliott,2005-07-04,226863,67,0.904,0.813,4,-7.105,0,0.1210,0.03110,0.006970,0.0471,0.810,125.461,4
1,Toxic,In The Zone,Britney Spears,2003-11-13,198800,82,0.774,0.838,5,-3.914,0,0.1140,0.02490,0.025000,0.2420,0.924,143.040,4
2,Crazy In Love (feat. Jay-Z),Dangerously In Love (Alben für die Ewigkeit),Beyoncé,2003-06-23,235933,23,0.664,0.758,2,-6.583,0,0.2100,0.00238,0.000000,0.0598,0.701,99.259,4
3,Rock Your Body,Justified,Justin Timberlake,2002-11-04,267266,76,0.892,0.714,4,-6.055,0,0.1410,0.20100,0.000234,0.0521,0.817,100.972,4
4,It Wasn't Me,Hot Shot (International Version #2),Shaggy,2000,227600,0,0.853,0.606,0,-4.596,1,0.0713,0.05610,0.000000,0.3130,0.654,94.759,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6094,Midnight Train to Georgia - Single Version,Imagination (Expanded Edition),Gladys Knight & The Pips,1973,237120,37,0.570,0.438,10,-9.928,0,0.0404,0.18900,0.000006,0.1010,0.425,89.246,4
6095,My Thang,Hell,James Brown,1974-01-01,259760,0,0.523,0.743,1,-10.760,1,0.2400,0.04370,0.000000,0.3220,0.648,202.607,4
6096,Pass The Peas,James Brown's Funky People,Various Artists,1986-01-01,192160,0,0.561,0.311,2,-14.981,1,0.2650,0.26000,0.000237,0.3530,0.702,195.996,4
6097,Laisse tomber les filles,Paris in April,April March,1996,127280,0,0.606,0.949,4,-4.324,1,0.0461,0.47300,0.000000,0.3840,0.906,139.121,4


In [36]:
def add_genres_w_try(total_tracks_df_ids_features):
    genres = []
    for i in range (0, len(total_tracks_df_ids_features)):   
        result = sp.search(total_tracks_df_ids_features['artist'][i]) 
        
        print(i)
        
        try:
            track = result['tracks']['items'][0]
            artist = sp.artist(track["artists"][0]["external_urls"]["spotify"])
            genres.append(artist['genres'])
        
        except IndexError:
            print("Index Error occurred")
            genres.append([])
        
        finally:
            update_progress(i/len(total_tracks_df_ids_features))
        
    return genres

In [37]:
long_genres = add_genres_w_try(long_set_of_songs)
long_genres

Progress: [####################] 100.0%


[['dance pop', 'hip pop', 'pop', 'post-teen pop', 'r&b', 'urban contemporary'],
 ['dance pop', 'pop', 'post-teen pop'],
 ['houston rap', 'pop', 'pop rap', 'trap queen'],
 ['dance pop', 'pop'],
 ['dance pop', 'pop rap', 'reggae fusion'],
 ['atl hip hop',
  'dance pop',
  'pop',
  'r&b',
  'south carolina hip hop',
  'urban contemporary'],
 ['atl hip hop',
  'dance pop',
  'pop',
  'r&b',
  'south carolina hip hop',
  'urban contemporary'],
 ['dance pop',
  'girl group',
  'pop',
  'pop rap',
  'post-teen pop',
  'urban contemporary'],
 ['dance pop',
  'girl group',
  'hip pop',
  'neo soul',
  'pop',
  'r&b',
  'urban contemporary'],
 ['atl hip hop', 'dirty south rap', 'hip hop', 'rap', 'southern hip hop'],
 ['canadian latin', 'canadian pop', 'dance pop', 'pop', 'pop rap'],
 ['dance pop',
  'neo mellow',
  'pop',
  'pop rock',
  'post-teen pop',
  'urban contemporary',
  'viral pop'],
 ['dance pop',
  'neo mellow',
  'pop',
  'pop rock',
  'post-teen pop',
  'urban contemporary',
  'vir

In [38]:
long_set_of_songs['genres'] = long_genres

In [39]:
long_set_of_songs

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
0,Lose Control (feat. Ciara & Fat Man Scoop),The Cookbook,Missy Elliott,2005-07-04,226863,67,0.904,0.813,4,-7.105,0,0.1210,0.03110,0.006970,0.0471,0.810,125.461,4,"[dance pop, hip pop, pop, post-teen pop, r&b, ..."
1,Toxic,In The Zone,Britney Spears,2003-11-13,198800,82,0.774,0.838,5,-3.914,0,0.1140,0.02490,0.025000,0.2420,0.924,143.040,4,"[dance pop, pop, post-teen pop]"
2,Crazy In Love (feat. Jay-Z),Dangerously In Love (Alben für die Ewigkeit),Beyoncé,2003-06-23,235933,23,0.664,0.758,2,-6.583,0,0.2100,0.00238,0.000000,0.0598,0.701,99.259,4,"[houston rap, pop, pop rap, trap queen]"
3,Rock Your Body,Justified,Justin Timberlake,2002-11-04,267266,76,0.892,0.714,4,-6.055,0,0.1410,0.20100,0.000234,0.0521,0.817,100.972,4,"[dance pop, pop]"
4,It Wasn't Me,Hot Shot (International Version #2),Shaggy,2000,227600,0,0.853,0.606,0,-4.596,1,0.0713,0.05610,0.000000,0.3130,0.654,94.759,4,"[dance pop, pop rap, reggae fusion]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6094,Midnight Train to Georgia - Single Version,Imagination (Expanded Edition),Gladys Knight & The Pips,1973,237120,37,0.570,0.438,10,-9.928,0,0.0404,0.18900,0.000006,0.1010,0.425,89.246,4,"[brill building pop, classic soul, disco, funk..."
6095,My Thang,Hell,James Brown,1974-01-01,259760,0,0.523,0.743,1,-10.760,1,0.2400,0.04370,0.000000,0.3220,0.648,202.607,4,"[bass house, edm, electro house, house, pop da..."
6096,Pass The Peas,James Brown's Funky People,Various Artists,1986-01-01,192160,0,0.561,0.311,2,-14.981,1,0.2650,0.26000,0.000237,0.3530,0.702,195.996,4,"[classic rock, folk, folk rock, mellow gold, r..."
6097,Laisse tomber les filles,Paris in April,April March,1996,127280,0,0.606,0.949,4,-4.324,1,0.0461,0.47300,0.000000,0.3840,0.906,139.121,4,[]


In [41]:
long_set_of_songs.to_csv(r'C:\Users\ecusa\OneDrive\Desktop\MPSD\long_set_of_songs.csv')

In [88]:
#drop irrelevant features, run knn model
ml_features_for_long_set_of_songs = getRelevantData(user_playlist, long_set_of_songs)

In [89]:
ml_features_for_long_set_of_songs

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
0,Lose Control (feat. Ciara & Fat Man Scoop),The Cookbook,Missy Elliott,2005-07-04,226863,67,0.904,0.813,4,-7.105,0,0.1210,0.03110,0.006970,0.0471,0.810,125.461,4,"[dance pop, hip pop, pop, post-teen pop, r&b, ..."
1,Toxic,In The Zone,Britney Spears,2003-11-13,198800,82,0.774,0.838,5,-3.914,0,0.1140,0.02490,0.025000,0.2420,0.924,143.040,4,"[dance pop, pop, post-teen pop]"
2,Crazy In Love (feat. Jay-Z),Dangerously In Love (Alben für die Ewigkeit),Beyoncé,2003-06-23,235933,23,0.664,0.758,2,-6.583,0,0.2100,0.00238,0.000000,0.0598,0.701,99.259,4,"[houston rap, pop, pop rap, trap queen]"
3,Rock Your Body,Justified,Justin Timberlake,2002-11-04,267266,76,0.892,0.714,4,-6.055,0,0.1410,0.20100,0.000234,0.0521,0.817,100.972,4,"[dance pop, pop]"
4,It Wasn't Me,Hot Shot (International Version #2),Shaggy,2000,227600,0,0.853,0.606,0,-4.596,1,0.0713,0.05610,0.000000,0.3130,0.654,94.759,4,"[dance pop, pop rap, reggae fusion]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6054,Big Eyed Fish,Busted Stuff,Dave Matthews Band,2002-07-16,304493,43,0.516,0.584,7,-6.452,1,0.0264,0.31900,0.000167,0.1840,0.437,81.485,4,"[jam band, neo mellow, pop rock]"
6055,Captain,Busted Stuff,Dave Matthews Band,2002-07-16,225506,32,0.594,0.797,9,-8.711,1,0.0316,0.51400,0.014900,0.1310,0.578,99.613,4,"[jam band, neo mellow, pop rock]"
6056,Busted Stuff,Busted Stuff,Dave Matthews Band,2002-07-16,227106,41,0.556,0.657,7,-5.319,1,0.0422,0.40300,0.009470,0.5850,0.655,171.514,4,"[jam band, neo mellow, pop rock]"
6058,The Love You Save (May Be Your Own),Greatest Hits,Joe Tex,2000-11-06,174779,38,0.502,0.300,10,-13.698,1,0.0372,0.77100,0.000000,0.1330,0.453,94.582,3,"[alternative americana, chamber pop, freak fol..."


In [90]:
ml_features_for_long_set_of_songs.drop(['name',  'album',  'artist', 'release_date', 'popularity', 'genres'], axis=1, inplace=True)

In [91]:
long_frequencies_of_recs = recommendationList(ml_features_for_user_playlist, ml_features_for_long_set_of_songs)

In [92]:
long_sorted_indices = indicesOfMax(long_frequencies_of_recs)

[2644, 51]
[86, 49]
[2975, 40]
[2260, 39]
[541, 38]
[2209, 38]
[1847, 37]
[1809, 33]
[2147, 31]
[750, 29]


In [93]:
long_set_of_songs.iloc[long_sorted_indices]

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,genres
2644,In the Name of Love,The Martin Garrix Collection,Martin Garrix,2017-04-21,195840,21,0.49,0.485,4,-6.237,0,0.0406,0.0592,0.0,0.337,0.196,133.889,4,"[dance pop, dutch edm, edm, electro house, pop..."
86,YYZ,Moving Pictures (2011 Remaster),Rush,1981-02-12,265840,57,0.51,0.919,7,-8.244,1,0.0593,0.00205,0.809,0.068,0.386,141.053,4,"[australian dance, australian electropop, aust..."
2975,We Found Love (feat. Jess Moskaluke) - Bonus,Honestly (Deluxe Version),Tyler Ward,2013-10-18,187333,35,0.419,0.173,2,-14.715,0,0.0391,0.866,0.0,0.157,0.336,99.149,4,"[channel pop, indie cafe pop, neo mellow, vira..."
2260,"Sit Still, Look Pretty","Sit Still, Look Pretty",Daya,2017-01-06,202226,62,0.655,0.787,2,-3.031,1,0.264,0.122,3e-06,0.178,0.554,181.799,4,"[dance pop, edm, electropop, pop, pop dance, t..."
541,Yellow,Good For You,Aminé,2017-07-28,180000,56,0.869,0.526,0,-5.766,1,0.111,0.139,0.0,0.119,0.217,128.996,4,"[hip hop, pop rap, portland hip hop, rap, unde..."
2209,Starboy,Starboy,The Weeknd,2016-11-25,230453,1,0.681,0.594,7,-7.028,1,0.282,0.165,3e-06,0.134,0.535,186.054,4,"[canadian contemporary r&b, canadian pop, pop]"
1847,Chan Chan,Buena Vista Social Club,Buena Vista Social Club,1997-09-16,257737,0,0.506,0.433,2,-14.183,0,0.0345,0.842,0.0311,0.112,0.641,83.222,4,"[latin jazz, musica tradicional cubana, world]"
1809,Twenty Miles,The Black Dirt Sessions,Deer Tick,2010-06-09,223933,47,0.501,0.784,0,-7.089,1,0.0334,0.529,0.0766,0.131,0.34,113.643,4,"[alternative country, deep new americana, indi..."
2147,I'm Yours,We Sing. We Dance. We Steal Things.,Jason Mraz,2008-05-01,242186,72,0.686,0.457,11,-8.322,1,0.0468,0.595,0.0,0.105,0.718,150.953,4,"[acoustic pop, neo mellow, pop, pop rock, vira..."
750,Hard To Handle,Shake Your Money Maker,The Black Crowes,1990-01-01,188066,0,0.591,0.79,4,-3.597,1,0.0432,0.00224,0.000886,0.292,0.961,104.171,4,"[album rock, alternative rock, blues rock, cla..."
