In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

In [2]:
import json
import re
import os

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore")

In [3]:
FILE_PATH = "C:/Users/user/OneDrive - National University of Singapore/Y3S2/IS3107 Data Engineering/Project/songs"

os.chdir(FILE_PATH)
mdp_files = os.listdir()

tracks_df = pd.DataFrame()

for f in mdp_files:
    json_file = json.loads(open(f).read())
    playlists = json_file["playlists"]
    sub_df = pd.json_normalize(playlists, record_path = "tracks", meta = ["name"])
    tracks_df = pd.concat([tracks_df, sub_df])

tracks_df.drop_duplicates(subset = "track_uri", inplace = True)
tracks_df["id"] = tracks_df["track_uri"].apply(lambda x: re.findall(r'\w+$', x)[0])

tracks_df.head()

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,name,id
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,0UaMYEvWZi0ZqiDOoHU3YI
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,0WqIKmW4BTrj3eJFmnCKMv
3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified,Throwbacks,1AWQoqb9bSvzTjaLralEkT
4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot,Throwbacks,1lzr43nnXAijIGYnCT8M8H


In [4]:
# Testing

# auth_manager = SpotifyClientCredentials(
#         client_id = "048ca22982da402e81d73f56c5b62c8f",
#         client_secret = "b710d3163f0747908258356f7f4324eb"
#     )
# sp = spotipy.Spotify(auth_manager = auth_manager)

# track_uri = "spotify:track:0UaMYEvWZi0ZqiDOoHU3YI"

# track_features = []

# audio_features = sp.audio_features(track_uri)[0]
# artist = sp.track(track_uri)["artists"][0]["id"]
# artist_genres = sp.artist(artist)["genres"] # List

# # Artist and track popularity
# artist_popularity = sp.artist(artist)["popularity"]
# track_popularity = sp.track(track_uri)["popularity"]

# track_features.append(audio_features)
# track_features[0]["artist_genres"] = artist_genres
# track_features[0]["artist_popularity"] = artist_popularity
# track_features[0]["track_popularity"] = track_popularity

# track_features = pd.DataFrame(track_features)
# track_features

In [5]:
def get_track_features(track_uri):
    
    auth_manager = SpotifyClientCredentials(
        client_id = "048ca22982da402e81d73f56c5b62c8f",
        client_secret = "b710d3163f0747908258356f7f4324eb"
    )
    sp = spotipy.Spotify(auth_manager = auth_manager)

    track_features = []
    
    # Audio features
    audio_features = sp.audio_features(track_uri)[0]
    
    # Year of release
    release_date_precision = sp.track(track_uri)["album"]["release_date_precision"]
    release_date = sp.track(track_uri)["album"]["release_date"]
    year = 0
    if release_date_precision == "year":
        rd = datetime.strptime(release_date, "%Y")
        year = rd.year
    elif release_date_precision == "month":
        rd = datetime.strptime(release_date, "%Y-%m")
        year = rd.year
    else:
        rd = datetime.strptime(release_date, "%Y-%m-%d")
        year = rd.year

    # Artist genre
    artist = sp.track(track_uri)["artists"][0]["id"]
    artist_genres = sp.artist(artist)["genres"] # List

    # Artist and track popularity
    artist_popularity = sp.artist(artist)["popularity"]
    track_popularity = sp.track(track_uri)["popularity"]

    track_features.append(audio_features)
    track_features[0]["year"] = year
    track_features[0]["artist_genres"] = artist_genres
    track_features[0]["artist_popularity"] = artist_popularity
    track_features[0]["track_popularity"] = track_popularity

    return track_features[0]

In [6]:
# Testing
get_track_features("spotify:track:0UaMYEvWZi0ZqiDOoHU3YI")

{'danceability': 0.904,
 'energy': 0.813,
 'key': 4,
 'loudness': -7.105,
 'mode': 0,
 'speechiness': 0.121,
 'acousticness': 0.0311,
 'instrumentalness': 0.00697,
 'liveness': 0.0471,
 'valence': 0.81,
 'tempo': 125.461,
 'type': 'audio_features',
 'id': '0UaMYEvWZi0ZqiDOoHU3YI',
 'uri': 'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI',
 'track_href': 'https://api.spotify.com/v1/tracks/0UaMYEvWZi0ZqiDOoHU3YI',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0UaMYEvWZi0ZqiDOoHU3YI',
 'duration_ms': 226864,
 'time_signature': 4,
 'year': 2005,
 'artist_genres': ['dance pop',
  'hip hop',
  'hip pop',
  'neo soul',
  'pop rap',
  'r&b',
  'rap',
  'urban contemporary',
  'virginia hip hop'],
 'artist_popularity': 73,
 'track_popularity': 69}

In [7]:
from tqdm import tqdm

In [8]:
# TEMP CODE TO SUBSET DF
# SMALLER DATASET FOR FASTER TESTING

tracks_df = tracks_df[:100]
tracks_df

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,name,id
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,0UaMYEvWZi0ZqiDOoHU3YI
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,0WqIKmW4BTrj3eJFmnCKMv
3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified,Throwbacks,1AWQoqb9bSvzTjaLralEkT
4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot,Throwbacks,1lzr43nnXAijIGYnCT8M8H
...,...,...,...,...,...,...,...,...,...,...
96,5,Lovelyz,spotify:track:24psBRmEw3kHjBGZfl1dmb,spotify:artist:3g34PW5oNmDBxMVUTzx2XK,Ah-Choo,spotify:album:5ZJuawNI3RvxURIBtsDHs0,218474,Lovelyz8,korean,24psBRmEw3kHjBGZfl1dmb
97,6,LEE HI,spotify:track:06L1apH8kLF47dbhZ4Zg9A,spotify:artist:7cVZApDoQZpS447nHTsNqu,BREATHE,spotify:album:1xnXVzinhfO4I9CzTocPfh,288992,SEOULITE,korean,06L1apH8kLF47dbhZ4Zg9A
98,7,LEE HI,spotify:track:2qWgqPdW1OiAP8KSBH1b93,spotify:artist:7cVZApDoQZpS447nHTsNqu,FXXK WIT US,spotify:album:1xnXVzinhfO4I9CzTocPfh,217861,SEOULITE,korean,2qWgqPdW1OiAP8KSBH1b93
99,8,Ailee,spotify:track:2PTf3zh9UUsgdxQ5b0eXg8,spotify:artist:3uGFTJ7JMllvhgGpumieHF,I Will Show You,spotify:album:7IyU3Bqm8ERDh7i4wq4OuN,234106,U&I,korean,2PTf3zh9UUsgdxQ5b0eXg8


In [9]:
tracks_feature_df = []

for track in tqdm(tracks_df["track_uri"]):
    track_features = get_track_features(track)
    tracks_feature_df.append(track_features)

tracks_feature_df = pd.DataFrame(tracks_feature_df)
tracks_feature_df.head()

100%|██████████| 100/100 [01:05<00:00,  1.52it/s]


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,id,uri,track_href,analysis_url,duration_ms,time_signature,year,artist_genres,artist_popularity,track_popularity
0,0.904,0.813,4,-7.105,0,0.121,0.0311,0.00697,0.0471,0.81,...,0UaMYEvWZi0ZqiDOoHU3YI,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4,2005,"[dance pop, hip hop, hip pop, neo soul, pop ra...",73,69
1,0.774,0.838,5,-3.914,0,0.114,0.0249,0.025,0.242,0.924,...,6I9VzXrHxO9rA9A5euc8Ak,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,https://api.spotify.com/v1/audio-analysis/6I9V...,198800,4,2003,"[dance pop, pop, post-teen pop]",81,83
2,0.664,0.758,2,-6.583,0,0.21,0.00238,0.0,0.0598,0.701,...,0WqIKmW4BTrj3eJFmnCKMv,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,https://api.spotify.com/v1/audio-analysis/0WqI...,235933,4,2003,"[dance pop, pop, r&b]",87,21
3,0.892,0.714,4,-6.055,0,0.141,0.201,0.000234,0.0521,0.817,...,1AWQoqb9bSvzTjaLralEkT,spotify:track:1AWQoqb9bSvzTjaLralEkT,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,https://api.spotify.com/v1/audio-analysis/1AWQ...,267267,4,2002,"[dance pop, pop]",80,78
4,0.853,0.606,0,-4.596,1,0.0713,0.0561,0.0,0.313,0.654,...,1lzr43nnXAijIGYnCT8M8H,spotify:track:1lzr43nnXAijIGYnCT8M8H,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,https://api.spotify.com/v1/audio-analysis/1lzr...,227600,4,2000,"[dance pop, pop rap, reggae fusion]",74,0


In [10]:
# Remove duplicated `duration_ms` column prior to merge

tracks_df = tracks_df.drop(columns = ["duration_ms"])
tracks_feature_df = tracks_feature_df.drop(columns = ["id"])
tracks_complete_df = tracks_df.set_index("track_uri").join(tracks_feature_df.set_index("uri"))
tracks_complete_df = tracks_complete_df.drop(columns = ['pos', 'artist_name', 'artist_uri', 'track_name', 'album_uri', 'album_name', 'name', 'track_href', 'analysis_url', 'duration_ms', 'type'])
tracks_complete_df.head()

Unnamed: 0_level_0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,year,artist_genres,artist_popularity,track_popularity
track_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,0UaMYEvWZi0ZqiDOoHU3YI,0.904,0.813,4,-7.105,0,0.121,0.0311,0.00697,0.0471,0.81,125.461,4,2005,"[dance pop, hip hop, hip pop, neo soul, pop ra...",73,69
spotify:track:6I9VzXrHxO9rA9A5euc8Ak,6I9VzXrHxO9rA9A5euc8Ak,0.774,0.838,5,-3.914,0,0.114,0.0249,0.025,0.242,0.924,143.04,4,2003,"[dance pop, pop, post-teen pop]",81,83
spotify:track:0WqIKmW4BTrj3eJFmnCKMv,0WqIKmW4BTrj3eJFmnCKMv,0.664,0.758,2,-6.583,0,0.21,0.00238,0.0,0.0598,0.701,99.259,4,2003,"[dance pop, pop, r&b]",87,21
spotify:track:1AWQoqb9bSvzTjaLralEkT,1AWQoqb9bSvzTjaLralEkT,0.892,0.714,4,-6.055,0,0.141,0.201,0.000234,0.0521,0.817,100.972,4,2002,"[dance pop, pop]",80,78
spotify:track:1lzr43nnXAijIGYnCT8M8H,1lzr43nnXAijIGYnCT8M8H,0.853,0.606,0,-4.596,1,0.0713,0.0561,0.0,0.313,0.654,94.759,4,2000,"[dance pop, pop rap, reggae fusion]",74,0


In [11]:
def feature_engineering(df):
    
    final = pd.get_dummies(df, columns = ["key", "mode"])
    final.reset_index(drop = True, inplace = True)

    scaler = StandardScaler()
    norm_cols = [
        "danceability",
        "energy",
        "loudness",
        "speechiness",
        "acousticness",
        "instrumentalness",
        "liveness",
        "valence",
        "tempo",
        "time_signature",
        "year",
        "artist_popularity",
        "track_popularity"
    ]
    scaled_df = final[norm_cols].reset_index(drop = True)
    scaled_df = pd.DataFrame(scaler.fit_transform(scaled_df), columns = norm_cols)
    final.drop(columns = norm_cols, inplace = True)
    final = pd.concat([scaled_df, final], axis = 1)

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(final["artist_genres"].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.reset_index(drop = True, inplace = True)
    final = pd.concat([final, genre_df], axis = 1)

    final["id"] = df["id"].values

    return final

In [12]:
# def feature_engineering(df):
#     tfidf = TfidfVectorizer()
#     tfidf_matrix = tfidf.fit_transform(df["artist_genres"].apply(lambda x: " ".join(x)))
#     genre_df = pd.DataFrame(tfidf_matrix.toarray())
#     genre_df.reset_index(drop = True, inplace = True)

#     encoded_df = pd.get_dummies(df, columns = ["key", "mode"])
#     encoded_df.reset_index(drop = True, inplace = True)

#     scaler = StandardScaler()
#     norm_cols = [
#         "danceability",
#         "energy",
#         "loudness",
#         "speechiness",
#         "acousticness",
#         "instrumentalness",
#         "liveness",
#         "valence",
#         "tempo",
#         "time_signature",
#         "year",
#         "artist_popularity",
#         "track_popularity"
#     ]
#     scale_df = df[norm_cols].reset_index(drop = True)
#     scaled_df = pd.DataFrame(scaler.fit_transform(scale_df), columns = norm_cols)

#     final_df = pd.concat([genre_df, encoded_df, scaled_df], axis = 1)
#     final_df["id"] = df["id"].values

#     return final_df

In [13]:
# Testing
tracks_complete_engineered = feature_engineering(tracks_complete_df)
tracks_complete_engineered.head()

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,...,78,79,80,81,82,83,84,85,86,87
0,1.857381,0.445566,-0.374324,0.231849,-0.577368,-0.323988,-0.952974,1.134649,0.136098,0.200297,...,0.0,0.0,0.0,0.0,0.0,0.186353,0.276896,0.0,0.0,0.0
1,1.05647,0.58664,0.861188,0.161616,-0.611398,-0.251889,0.489188,1.660334,0.713508,0.200297,...,0.0,0.457313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.378776,0.135205,-0.172213,1.124809,-0.735002,-0.351859,-0.859,0.632021,-0.724549,0.200297,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.78345,-0.113084,0.032222,0.432514,0.355149,-0.350924,-0.915976,1.166928,-0.668282,0.200297,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.543177,-0.722521,0.597127,-0.266804,-0.440153,-0.351859,1.014552,0.415291,-0.872358,0.200297,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
def get_user_playlist(playlist_uri):

    auth_manager = SpotifyClientCredentials(
        client_id = "048ca22982da402e81d73f56c5b62c8f",
        client_secret = "b710d3163f0747908258356f7f4324eb"
    )
    sp = spotipy.Spotify(auth_manager = auth_manager)

    results = sp.user_playlist(None, playlist_uri, 'tracks')
    playlist_tracks_data = results['tracks']
    user_playlist = []

    for song in playlist_tracks_data['items']:
        track = {}
        track["track_uri"] = song["track"]["uri"]
        track["id"] = re.findall(r'\w+$', track["track_uri"])[0]
        
        user_playlist.append(track)

    user_playlist = pd.DataFrame(user_playlist)

    return user_playlist

# Testing
get_user_playlist('https://open.spotify.com/playlist/7i7K7r0IEs3H2zKKMFds2M?si=80d9e9d775fc4386')

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,album_name,name,id
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook,Throwbacks,0UaMYEvWZi0ZqiDOoHU3YI
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,In The Zone,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,0WqIKmW4BTrj3eJFmnCKMv
3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,Justified,Throwbacks,1AWQoqb9bSvzTjaLralEkT
4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,Hot Shot,Throwbacks,1lzr43nnXAijIGYnCT8M8H


In [15]:
def vectorise_playlist(library, playlist):
    # Songs from the library which are also within the playlist
    playlist_with_feature = library[library["id"].isin(playlist["id"].values)]
    playlist_with_feature.drop(columns = ["id", "artist_genres"], axis = 1, inplace = True)

    # Songs from the library which are not found in the playlist
    nonplaylist_with_feature = library[~library["id"].index.isin(playlist["id"].values)]
    
    return playlist_with_feature.sum(axis = 0), nonplaylist_with_feature

In [16]:
# Testing
playlist, nonplaylist = vectorise_playlist(tracks_complete_engineered, test_df)
playlist

danceability    20.826960
energy          12.639948
loudness        24.283658
speechiness      3.374266
acousticness   -14.279096
                  ...    
83               3.209424
84               0.276896
85               0.000000
86               0.991920
87               0.000000
Length: 115, dtype: float64

In [17]:
def recommend_songs(library, playlist, nonplaylist, num):
    nonplaylist_df = library[library["id"].isin(nonplaylist["id"].values)]
    a = nonplaylist.drop(columns = ["id", "artist_genres"], axis = 1).values
    b = playlist.values.reshape(1, -1)
    nonplaylist_df["similarity_score"] = cosine_similarity(a, b)[:,0]
    recommended_playlist = nonplaylist_df.sort_values("similarity_score", ascending = False).head(num)

    return recommended_playlist

In [18]:
# Testing
new_playlist = recommend_songs(tracks_df, playlist, nonplaylist, 5)
new_playlist

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,album_name,name,id,similarity_score
22,22,Bowling For Soup,spotify:track:19Js5ypV6JKn4DMExHQbGc,spotify:artist:5ND0mGcL9SKSjWIjPd0xIb,Stacy's Mom,spotify:album:3Q7xpHmP8k3HryE0LQdIk0,I've Never Done Anything Like This,Throwbacks,19Js5ypV6JKn4DMExHQbGc,0.68486
28,28,Jason Derulo,spotify:track:67T6l4q3zVjC5nZZPXByU8,spotify:artist:07YZf4WDAMNwqr4jfgOZ8y,Whatcha Say,spotify:album:0aVJmVAeEx78nAA1rAKYf7,Jason Derulo,Throwbacks,67T6l4q3zVjC5nZZPXByU8,0.678353
46,46,Miley Cyrus,spotify:track:3utIAb67sOu0QHxBE88P1M,spotify:artist:5YGY8feqx7naU7z4HrwZM6,See You Again,spotify:album:6SkirMQoL4QhnXOM5MH5El,See You Again,Throwbacks,3utIAb67sOu0QHxBE88P1M,0.669167
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,In The Zone,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak,0.667828
21,21,The Black Eyed Peas,spotify:track:0uqPG793dkDDN7sCUJJIVC,spotify:artist:1yxSLGMDHlW21z4YXirZDS,Where Is The Love?,spotify:album:1bNyYpkDRovmErm4QeDrpJ,Elephunk,Throwbacks,0uqPG793dkDDN7sCUJJIVC,0.664575


In [19]:
# holder = []
# index = 0

# input_playlist = playlist.drop(columns = ["id", "artist_genres", "artist_uri"], axis = 1).values

# for i in input_playlist:
#     noncurr = nonplaylist.drop(columns = ["id", "artist_genres", "artist_uri"], axis = 1).iloc[:1].values[0].reshape(1,-1)
#     curr = i.reshape(1,-1)
#     holder.append([cosine_similarity(curr, noncurr), index])
#     index += 1