In [24]:
import pandas as pd
import time
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials


In [32]:
# Concatenates playlist csvs and then concatenates that with the big csv we downloaded

def csvCondensor(directory):
    # Reads in and concatenates data that is from Ben and friends
    alldfs = []
    for file in os.listdir("./"+directory):
        if file.startswith("good"):
            path = os.path.join("./data", file)
            csv = pd.read_csv(path)
            csv.drop(columns=csv.columns[0], axis=1,  inplace=True)
            alldfs.append(csv)
    result = pd.concat(alldfs)
    # Reads huge song csv and appends the two dfts
    bigCsv = pd.read_csv('./data/downloadedData.csv')
    bigCsv["url"] = "https://open.spotify.com/track/"+bigCsv["url"]
    listDifference = []
    for element in bigCsv.columns:
        if element not in result.columns:
            listDifference.append(element)

    # Data processing
    bigCsv = bigCsv.drop(listDifference,axis=1)
    result = pd.concat([result,bigCsv])
    result["artist"] = result["artist"].str.lower()
    result["name"] = result["name"].str.lower()
    result = result.drop_duplicates(subset='url', keep='first')
    result = result.drop_duplicates(subset=['name','artist'], keep='first')
    result = result.reset_index(drop=True)
    return result


condensed = csvCondensor('data')
condensed

 

Unnamed: 0,name,artist,url,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,perth - dusky remix,kink,https://open.spotify.com/track/38mFPR6JTtMywd4feWQJKL,0.564,0.834,10,-10.098,1,0.0571,0.000403,0.903,0.706,0.375,124.996
1,fashion (with boys noize),kungs,https://open.spotify.com/track/7E5YYuHcdlIIGMqkOBiBh0,0.806,0.885,11,-6.19,1,0.039,0.00003,0.814,0.0779,0.171,121.995
2,dedication to house music - extended mix,dennis quin,https://open.spotify.com/track/2Lux0SyVTI1N2lAdUB5UpD,0.796,0.647,6,-5.882,1,0.0505,0.000874,0.89,0.029,0.322,129.975
3,frankenstein (feat. a$ap rocky),swedish house mafia,https://open.spotify.com/track/3gDRZkL8YfzpZg1N1YsW5x,0.531,0.612,1,-7.966,1,0.306,0.0119,0.000098,0.311,0.226,105.997
4,burnin',daft punk,https://open.spotify.com/track/6nSOhDfZOulWp0pnEHFrnn,0.87,0.875,6,-8.501,0,0.205,0.192,0.357,0.0804,0.715,124.298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134894,come back home,calum scott,https://open.spotify.com/track/0cvfSKcm9VeduwyYPrxtLx,0.601,0.801,11,-5.174,1,0.0323,0.00678,0.0,0.0991,0.289,131.049
134895,enough for you,saint claire,https://open.spotify.com/track/43MP9F7UzvfilSrw2SqZGJ,0.387,0.249,9,-13.233,1,0.0437,0.918,0.0,0.103,0.346,94.039
134896,do it,mike stud,https://open.spotify.com/track/4TWlUuFk81NGUNKwndyS5Q,0.717,0.532,8,-8.351,0,0.206,0.33,0.0,0.0997,0.546,156.977
134897,no smoke,d savage,https://open.spotify.com/track/5iGBXzOoRo4sBTy8wdzMyK,0.772,0.51,4,-9.67,0,0.12,0.0079,0.0,0.131,0.0755,120.049


In [39]:
# Evaluation


# playlists to retrieve similar songs to
testUpbeatTrap = pd.read_csv('./data/goodplaylist47.csv')
testSadTrap = pd.read_csv('./data/goodplaylist2.csv')
testHouse = pd.read_csv('./data/goodplaylist38.csv')

allTests = [testUpbeatTrap,testSadTrap,testHouse]
scalable_columns = [
    'danceability',
    'energy',
    'key',
    'loudness',
    'mode',
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness',
    'valence',
    'tempo'
]
pd.set_option('display.max_colwidth', None)
for test in allTests:
    scaler = StandardScaler()
    # Standardizes features 
    dataScaled = scaler.fit_transform(condensed[scalable_columns])
    playlistScaled = scaler.transform(test[scalable_columns])
    # generate similarity matrix by feature
    similarity_matrix = cosine_similarity(playlistScaled,dataScaled)
    # average feature similar
    similarity_scores = similarity_matrix.mean(axis=0)
    conSim = csvCondensor('data')
    conSim['similarity_score'] = similarity_scores
    recommendations = conSim.sort_values(by='similarity_score', ascending=False)
    # Get the top N recommendations, for example, top 10
    top_recommendations = recommendations.head(15)
    print(top_recommendations[["name","artist","url"]])



                                                   name           artist  \
1252                                           mistress            gunna   
390     xotic (feat. future, rich the kid & young thug)          dj esco   
11255                                         no defeat            preme   
94473                                         two packs     boyz on marz   
76687                                           i.e.d.s          kanardo   
48978                                      i can’t wait       coca vango   
124522                                el anillo - remix   jennifer lopez   
771                                           my legacy           future   
302                         undefeated (feat. lil keed)           future   
20975           flo rida (feat. ski mask the slump god)  higher brothers   
38822                                    fuckin shit up     lilcj kasino   
122711                                   do not disturb      trill sammy   
108335      

# User made genre & Upbeat vs. Downbeat distinctions
### Trap: Modern Rap/Hip-Hop that's typically basier & uses a variety of synthesized drums such as 808s
# Upbeat Trap (used a party/gym playlist)
## - Energy accuracy: 14/15
### Incorrect placements: i.e.d.s by kanardo
## - Genre accuracy: 14/15     
### Incorrect placements: el anillo - remix, by ennifer lopez
## - Overall accuracy: 13/15
# Downbeat Trap (used a trap "feels" playlist)
## First track's album name was "pain", which was funny & a positive indicator
## - Energy accuracy: 12/15
### Incorrect placements : Peacoat by Future, We Alive by Kris Wu & Audemars by young igi
## - Genre accuracy: 15/15     
## - Overall accuracy: 13/15
# House music (more melodic, chill subgenre of EDM)
## - Genre Accuracy: 15/15 were under the umbrella of EDM
## - Overall Accuracy: 12/15 
### Incorrect placements : Don't Give Up by Ghastly, Gas by Top Shelf & Look At Me by krnfx


In [12]:


# dummy account authentication 
clientID = "6efec37cd2184e35b82519d4fda73793"
ClientSecret = "e37bf82427ef4d98b54f9e796d43c5df"

credentials = SpotifyClientCredentials(client_id=clientID, client_secret=ClientSecret)
sp = spotipy.Spotify(client_credentials_manager=credentials)

# INPUT LINK IN FOLLOWING FORMAT: https://open.spotify.com/playlist/33PIJ4bhVgkOzyW5cKepM6


def usingYourPlaylist(link,offset=0):
    playlistUri = link.split("/")[-1].split("?")[0]
    limit = 100
    
    column_names = [
        'name',
        'artist',
        'url',
        'danceability',
        'energy',
        'key',
        'loudness',
        'mode',
        'speechiness',
        'acousticness',
        'instrumentalness',
        'liveness',
        'valence',
        'tempo'
    ]
    
    scalable_columns = [
    'danceability',
    'energy',
    'key',
    'loudness',
    'mode',
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness',
    'valence',
    'tempo'
    ]
    playlistDf = pd.DataFrame(columns=column_names)
    while True:
        # Fetching a batch of tracks from the playlist
        response = sp.playlist_tracks(playlistUri, offset=offset, limit=100, fields='items.track(name,artists,name,album,name,external_urls(spotify)),next')
        listOfTracks = response['items']
        lis = listOfTracks
        # Extract the track data and audio features
        for i, item in enumerate(listOfTracks):
            track = item['track']
            if 'spotify' in track['external_urls']:
                url = track['external_urls']['spotify']
                artistName = track['artists'][0]['name']
                trackName = track['name']
                try:
                    audioFeatures = sp.audio_features([url])[0]
                    time.sleep(.5)
                    rowData = [trackName, artistName,url, audioFeatures['danceability'], audioFeatures['energy'],
                               audioFeatures['key'], audioFeatures['loudness'], audioFeatures['mode'], audioFeatures['speechiness'],
                               audioFeatures['acousticness'], audioFeatures['instrumentalness'], audioFeatures['liveness'],
                               audioFeatures['valence'], audioFeatures['tempo']]
                    playlistDf.loc[len(playlistDf)] = rowData
                except spotipy.SpotifyException as e:
                    if e.http_status == 429:
                        print(e.headers)        
                        return playlistDf
        if response['next'] is not None:
            offset += limit
        else:
            break
    
    condensed = csvCondensor('data')
    scaler = StandardScaler()
    dataScaled = scaler.fit_transform(condensed[scalable_columns])
    playlistScaled = scaler.transform(playlistDf[scalable_columns])
    # generate similarity matrix by feature
    similarity_matrix = cosine_similarity(playlistScaled,dataScaled)
    # average feature similar
    similarity_scores = similarity_matrix.mean(axis=0)
    
    yourSimilarityMatrix = condensed.copy()
    yourSimilarityMatrix['similarity_score'] = similarity_scores
    recommendations = yourSimilarityMatrix.sort_values(by='similarity_score', ascending=False)
    pd.set_option('display.max_colwidth', None)
    top_recommendations = recommendations.head(20)
    return top_recommendations

# Replace the link with your playlist

usingYourPlaylist("https://open.spotify.com/playlist/6ZDVwQTn9IO38eORFDfnWC?si=J3jp0aOWTZCOh7O8-wBIdA")

Unnamed: 0,name,artist,url,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,similarity_score
811,Fly Rich,Rich Gang,https://open.spotify.com/track/5TEJdGAiRJgAbIwFzhZnZT,0.753,0.816,7,-4.603,1,0.329,0.0468,0.0,0.244,0.607,143.066,0.648363
67575,Rubberbandz,Tray Loop,https://open.spotify.com/track/0I5Uf88cwQCP98BVwpQbar,0.755,0.756,7,-5.702,1,0.374,0.0605,0.0,0.312,0.67,156.077,0.646891
77897,Cherry,cute girls doing cute things,https://open.spotify.com/track/7Izt8YhdGjJYFoKxJqQnlm,0.742,0.911,6,-2.2,1,0.284,0.0701,0.000876,0.324,0.538,172.082,0.645042
8597,Take Off (feat. Slim Dunkin & Wooh Da Kid),Waka Flocka Flame,https://open.spotify.com/track/18zTEdaEMjZKxPFYCNienp,0.659,0.817,5,-7.114,1,0.306,0.0429,0.0,0.233,0.564,155.962,0.641308
35631,Fish Outta Water,Froggy Fresh,https://open.spotify.com/track/7rAeTBkeWUigkMbNnIbzaz,0.797,0.871,7,-3.292,1,0.315,0.106,0.0,0.331,0.73,159.997,0.64058
52043,Gnarly,Sauve,https://open.spotify.com/track/2wV9a6E4x3yAUBZB1A785K,0.731,0.733,5,-5.614,1,0.278,0.177,3e-06,0.273,0.575,160.108,0.639357
19147,BLOCK ST HOOD,Blu,https://open.spotify.com/track/5qk9zJjTrVW3vu6Jd5uIRv,0.747,0.934,7,-5.56,1,0.367,0.187,0.0,0.245,0.717,188.085,0.638369
110987,Hava Tequila,Andre Xcellence!,https://open.spotify.com/track/5dDH2DoVcBw1Ldn1pSqIlx,0.682,0.817,6,-4.002,1,0.366,0.119,0.0,0.313,0.734,158.41,0.63641
178,Stick Talk,Future,https://open.spotify.com/track/20fAoPjfYltmd3K3bO7gbt,0.729,0.672,7,-6.749,1,0.282,0.0265,0.0,0.244,0.595,159.903,0.634372
104334,Roof,Rico Nasty,https://open.spotify.com/track/1uX18gfpzbP8iLaOjy6p3V,0.807,0.861,10,-3.164,0,0.421,0.0469,0.0,0.344,0.715,175.983,0.634232
