#  Top 10 tracks
takes in list of pids and returns the top 10 tracks (defined as the most frequently occuring tracks across the list of playlists) using the sql database

In [1]:
import sqlalchemy as db
import pandas as pd

In [2]:
# this code assumes the following: 1) the variable pid_nns contains a list of pid numbers that are the result of the
# playlist2vec algorithm and 2) the sql database has been created and can be queried

#create engine and connect to sqlite database
engine = db.create_engine("sqlite:///spotify_mpd.db")
conn = engine.connect()

In [3]:
# turn list of pids in pid_nns into str so it can be used in sql query

# remove hashes to use test set of pids
#pid_nns = [97228, 616461, 459916, 238861, 328167, 415900, 18732, 24285, 273824, 415108, 553672, 38225, 232498, 508452, 149232,
           #659219, 282275, 415234, 648796, 858713, 217067, 508233, 543295, 38064, 259834, 508893, 245491, 508668, 114795, 
           #616712, 580005, 38649, 218295, 508249, 139097, 659081, 631583, 858442, 132771, 616984]
pid_nns_str = ",".join(map(str, pid_nns))

In [4]:
# sql query selects pids from playlist table that occur in in pid_nns, then joins those rows from playlist table to 
# pairings table on pid key value, groups results by track_uri, counts the number of times each track_uri appears, and orders
# results by descending value so most frequent track_uris appear at top. Query is turned into an F string (formatted string)
# so it we can embed variable holding list of pids

ordered_tracks = conn.execute(f"SELECT pairings.track_uri, COUNT(*) as frequency FROM pairings INNER JOIN playlists on playlists.pid = pairings.pid WHERE playlists.pid IN ({pid_nns_str}) GROUP BY pairings.track_uri ORDER BY frequency DESC")
results = ordered_tracks.fetchall()

In [5]:
# go through results from sql query and take the top 10 results, than turn those results into a list of strings
# that can be called in the next sql query
top_10 = [row[0] for row in results[:10]]
top_10_str = ",".join(f"'{uri}'" for uri in top_10)

In [6]:
# sql query takes list of top 10 track_uris and returns track_uri, track_name, artist_name from tracks table. 
# This sql query also uses an F string
top_10_info = conn.execute(f"SELECT track_uri, track_name, artist_name FROM tracks where track_uri IN ({top_10_str})")
top_10_info.fetchall()

[('spotify:track:0XUfyU2QviPAs6bxSpXYG4', 'Yeah!', 'Usher'),
 ('spotify:track:1lzr43nnXAijIGYnCT8M8H', "It Wasn't Me", 'Shaggy'),
 ('spotify:track:3Bjr9MzHM7KHk6zq7KvJRN', 'Love Myself', 'Hailee Steinfeld'),
 ('spotify:track:3hB5DgAiMAQ4DzYbsMq1IT', 'Love Yourself', 'Justin Bieber'),
 ('spotify:track:4Pwjz3DfvfQWV0rO2V8jyh', 'Bitch, Don’t Kill My Vibe', 'Kendrick Lamar'),
 ('spotify:track:5dNfHmqgr128gMY2tc5CeJ', 'Ignition - Remix', 'R. Kelly'),
 ('spotify:track:66hayvUbTotekKU3H4ta1f', 'Where Are Ü Now (with Justin Bieber)', 'Jack Ü'),
 ('spotify:track:6Knv6wdA0luoMUuuoYi2i1', 'My House', 'Flo Rida'),
 ('spotify:track:7KXjTSCq5nL1LoYtL7XAwS', 'HUMBLE.', 'Kendrick Lamar'),
 ('spotify:track:7vFoFDWqTX0mHzLfrF1Cfy', 'Cheerleader - Felix Jaehn Remix Radio Edit', 'OMI')]